# 感知器（单层感知器） 

In [6]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd

In [3]:
from collections import OrderedDict

In [2]:
class Perceptron:
    def __init__(self, n_inputs=2):
        self.W = np.random.randn(n_inputs,1)
        self.b = np.random.randn(1)
    def forward(self, x):
        # 实际上相当于使用了阶跃函数作为激活函数
        return ((np.matmul(x.reshape((1,-1)),self.W.reshape((-1,1)))+self.b) > 0).astype(np.int32)
    
    def backward(self, x, label, pre, alpha=0.05):
        self.W = self.W + alpha * (label - pre) * x.reshape(*self.W.shape)
        self.b -= 0

In [6]:
data_x = np.array([[0,0],[0,1],[1,0],[1,1]])

## 与门 

In [3]:
class Andgate(Perceptron):
    def __init__(self):
        self.W = np.array([[0.5],[0.5]])
        self.b = -0.6

In [8]:
and_gate = Andgate()
for x in data_x:
    print x,and_gate.forward(x).squeeze()

[0 0] 0
[0 1] 0
[1 0] 0
[1 1] 1


## 或门 

In [4]:
class Orgate(Perceptron):
    def __init__(self):
        self.W = np.array([[0.5],[0.5]])
        self.b = -0.4

In [9]:
or_gate = Orgate()
for x in data_x:
    print x,or_gate.forward(x).squeeze()

[0 0] 0
[0 1] 1
[1 0] 1
[1 1] 1


## 与非门 

In [5]:
class Nandgate(Perceptron):
    def __init__(self):
        self.W = np.array([[-0.5],[-0.5]])
        self.b = 0.6

In [10]:
nand_gate = Nandgate()
for x in data_x:
    print x,nand_gate.forward(x).squeeze()

[0 0] 1
[0 1] 1
[1 0] 1
[1 1] 0


## 异或门 

In [13]:
class Xorgate(Perceptron):
    def __init__(self):
        pass
    def forward(self, x):
        and_gate = Andgate()
        or_gate = Orgate()
        nand_gate = Nandgate()
        
        or_res = or_gate.forward(x)
        nand_res = nand_gate.forward(x)
        return and_gate.forward(np.array([or_res, nand_res]))




In [14]:
xor_gate = Xorgate()
for x in data_x:
    print x,xor_gate.forward(x).squeeze()

[0 0] 0
[0 1] 1
[1 0] 1
[1 1] 0


## 感知器学习 

感知器学习的策略是最小化如下的损失函数
$$L_{(W,b)} = -\sum_{X_{i} \in M}y_{i}(WX_{i}+b)$$
其中M表示错误分类的实例集合，$WX_{i}+b$表示$X_{i}$到超平面$WX+b=0$的距离，且该距离与$y_{i}$符号相反，所以需要添加一个`-`号；为了最小化这个损失函数为0，即所有实例均被正确分类，我们采用SGD来进行最小化。


### 感知器学习算法的原始形式 

In [23]:
class Perceptron_single(object):
    def __init__(self, n_inputs=2):
        self.W = np.random.randn(n_inputs, 1)
        self.b = np.random.randn(1)

    def forward(self, x):
        out = np.dot(x, self.W) + self.b
        return out

    def loss(self, x, y):
        t = self.forward(x)
        p = np.where(t > 0, 1, -1)
        mid = np.abs((y - p))*y / 2.0
        losses = -np.dot(mid.T, t)
        return losses

    def accuracy(self, x, y):
        t = np.where(self.forward(x) > 0, 1, -1)
        return (1.0 * np.sum(t == y)) / y.shape[0]

    def gradient(self, x, y):
        t = self.forward(x)
        p = np.where(t > 0, 1, -1)
        mid = np.abs((p - y))*y / 2.0
        dW = -np.dot(x.T, mid)
        db = -np.sum(mid)
        return (dW, db)

def train(datas_x, datas_y, alpha=0.01, epochs=100, tests_x=None, tests_y=None, batch_size=100):
    net = Perceptron_single(n_inputs=datas_x.shape[1])
    train_loss_list = []
    train_size = datas_x.shape[0]
    for i in range(epochs):
        batch_mask = np.random.choice(train_size, batch_size)
        x = datas_x[batch_mask]
        y = datas_y[batch_mask] # 抽样
        grad = net.gradient(x, y)
        
        net.W -= grad[0]
        net.b -= grad[1]
        loss = net.loss(x, y)
        print "has trained %d times, the train's loss %f" % (i+1, loss)
        if tests_x is not None and tests_y is not None:
            print net.accuracy(tests_x, tests_y)
        train_loss_list.append(loss)
    return net, train_loss_list

In [28]:
def load_pers_data():
    W0 = 2.5
    W1 = -0.5
    b = -7.5
    
    x0 = np.random.randint(-200, 100, 10000)
    x1 = np.random.randint(-67, 209, 10000)
    y = np.where(((W0 * x0 + W1 * x1 + b) - 3.6 * np.random.randn(10000) + 13) > 0, 1, -1)
    return np.array([x0, x1]).T, y.reshape(1, -1).T

In [29]:
x, y = load_pers_data()
print x.shape
print x[:5]
print y[:10]

(10000, 2)
[[-37 155]
 [-87 -29]
 [ 98 122]
 [-97 170]
 [-19 175]]
[[-1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [ 1]
 [-1]
 [-1]]


In [30]:
y_1 = y[y == 1]
y_0 = y[y == -1]
print y_1.shape
print y_0.shape

(2923,)
(7077,)


In [31]:
tests_x = x[8000:]
tests_y = y[8000:]

In [32]:
trains_x = x[:8000]
trains_y = y[:8000]

In [36]:
train(trains_x, trains_y, alpha=0.01, epochs=100, tests_x=tests_x, tests_y=tests_y, batch_size=100)

has trained 1 times, the train's loss 5713990.670348
0.838
has trained 2 times, the train's loss 2537047.171514
0.869
has trained 3 times, the train's loss 987054.216512
0.906
has trained 4 times, the train's loss 502597.914359
0.9475
has trained 5 times, the train's loss -0.000000
0.957
has trained 6 times, the train's loss 15131.235517
0.988
has trained 7 times, the train's loss -0.000000
0.9935
has trained 8 times, the train's loss -0.000000
0.9935
has trained 9 times, the train's loss 37805.765933
0.991
has trained 10 times, the train's loss -0.000000
0.991
has trained 11 times, the train's loss -0.000000
0.991
has trained 12 times, the train's loss -0.000000
0.991
has trained 13 times, the train's loss -0.000000
0.991
has trained 14 times, the train's loss -0.000000
0.991
has trained 15 times, the train's loss -0.000000
0.991
has trained 16 times, the train's loss -0.000000
0.991
has trained 17 times, the train's loss 870.301162
0.9935
has trained 18 times, the train's loss -0.000

(<__main__.Perceptron_single at 0x7fbc2218bc10>,
 [array([[ 5713990.67034782]]),
  array([[ 2537047.17151396]]),
  array([[ 987054.21651171]]),
  array([[ 502597.9143593]]),
  array([[-0.]]),
  array([[ 15131.23551739]]),
  array([[-0.]]),
  array([[-0.]]),
  array([[ 37805.76593256]]),
  array([[-0.]]),
  array([[-0.]]),
  array([[-0.]]),
  array([[-0.]]),
  array([[-0.]]),
  array([[-0.]]),
  array([[-0.]]),
  array([[ 870.30116222]]),
  array([[-0.]]),
  array([[-0.]]),
  array([[-0.]]),
  array([[-0.]]),
  array([[-0.]]),
  array([[ 5057.53294815]]),
  array([[-0.]]),
  array([[-0.]]),
  array([[-0.]]),
  array([[-0.]]),
  array([[-0.]]),
  array([[-0.]]),
  array([[-0.]]),
  array([[ 4539.12563472]]),
  array([[-0.]]),
  array([[-0.]]),
  array([[ 9517.69019159]]),
  array([[-0.]]),
  array([[-0.]]),
  array([[-0.]]),
  array([[ 1881.41669983]]),
  array([[ 5449.35756114]]),
  array([[ 10572.02527893]]),
  array([[ 26273.31705475]]),
  array([[-0.]]),
  array([[-0.]]),
  array([[ 

In [4]:
a = np.array([1,2,3])
b = np.array([2,3,4])
np.dot(a, b) + np.random.randn(1,)

array([ 19.20443274])

In [71]:
def accuracy_per(x_data, y_data, weight):
    y_pre = np.where(np.dot(x_data, weight)>0, 1, -1)
    return np.sum(y_pre == y_data)*1. / y_data.shape

In [85]:
def perceptron_train(x_train, y_train, epochs=100, is_pocket=False, init_weight_radio=0.1, alpha=1):
    M , N = x_train.shape
    W = init_weight_radio * np.random.randn(N, )
#     b = init_weight_radio * np.random.randn(1, )
    count = 0
    while True:
        for i in range(M):
            if accuracy_per(x_train, y_train, W) == 1:
                return (W, b, count)
            while True:
                y_pre = np.where(np.dot(x_train[i], W) > 0, 1, -1)
                if y_pre == y_train[i]:
                    break
                else:
                    W += alpha * y_train[i] * x_train[i]
                    # b += alpha * y_train[i]
                    count += 1

In [63]:
def read_csv(path='../data/coursera_mlf.csv'):
    data = pd.read_csv(filepath_or_buffer=path, delim_whitespace=True, header=None)
    return data.values

In [64]:
data = read_csv()

In [79]:
x_train = data[:, :-1]
y_train = data[:, -1].astype(np.int)

In [80]:
print x_train.shape
print y_train.shape

(400, 4)
(400,)


In [86]:
W, b, count = perceptron_train(x_train, y_train, init_weight_radio=0)

KeyboardInterrupt: 