In [1]:
from sklearn.datasets import make_classification
from sklearn import preprocessing,model_selection
from sklearn.linear_model import Perceptron
from sklearn.model_selection import train_test_split
import numpy as np

In [2]:
x,y=make_classification(n_samples=200,n_features=10,n_clusters_per_class=1,n_redundant=0,n_informative=1)
def changes(x):
    if x==0:
        return -1
    else:
        return 1
y=np.array(list(map(changes,y)))
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,shuffle=True,random_state=2)

In [4]:
# 感知机的原始算法
def perceptron_original(x,y,eta):
    # input : x,y,eta
    # output : omega,b,f(x)=omega*x+b
    m,n = np.shape(x)
    omega=np.zeros(n)
    b=0.0
    tag = True
    while tag:
        for i in range(m):
            if y[i]*(omega.T*x[i]).sum()+b <= 0:
                omega += eta*y[i]*x[i]
                b += eta*y[i]
                break
        tag=False
        
    return omega,b

def sign(x):
    if x>=0.0:
        return 1
    else:
        return -1

def perceptron_test(x,y,omega,b):
    m,n=np.shape(x)
    y_test= np.dot(x,omega) + b
    error_num=np.array([1 for i in range(m) if sign(y_test[i])==y[i]]).sum()
    print('error_rate: ',1.0-error_num/m)    

In [5]:
omega,b = perceptron_original(x_train,y_train,0.2)
print(omega)
print(b)
perceptron_test(x_test,y_test,omega,b)

[-0.11587695 -0.23555703  0.09030393 -0.08758801 -0.21071945 -0.18070192
  0.06767885  0.30269262 -0.04529923 -0.1559973 ]
-0.2
error_rate:  0.3666666666666667


In [23]:
# 感知机的对偶算法
def dual(x,y,x1,y1,eta):
    # input:x,y,eta
    # output: alpha,b,f(x)
    m,n=np.shape(x)
    alpha=np.zeros(m)
    b=0.0
    Gram = np.dot(x,x.T)
    tag = True
    while tag:
        for i in range(m):
            if y[i]*(alpha*y*Gram[:,i]+b).sum()<=0:
                alpha[i]+=eta
                b+=y[i]*eta
        tag=False
    
    Gram_t=np.dot(x,x1.T)
    # sign_v = np.vectorize(sign)
    y_test=np.array([(alpha*y*Gram_t[:,i]).sum() for i in range(np.shape(x1)[0])])
    right_num = np.array([1 for i in range(np.shape(x1)[0]) if sign(y_test[i])==y1[i]]).sum()
    print(alpha)
    print(b)
    print(1-right_num/np.shape(x1)[0])

In [24]:
dual(x_train,y_train,x_test,y_test,0.2)

[0.2 0.  0.  0.  0.2 0.  0.  0.  0.2 0.  0.  0.  0.2 0.  0.  0.  0.  0.
 0.  0.2 0.  0.2 0.  0.  0.2 0.  0.  0.2 0.  0.2 0.2 0.  0.  0.  0.  0.
 0.  0.  0.2 0.  0.2 0.2 0.  0.  0.  0.  0.  0.2 0.  0.2 0.  0.2 0.  0.
 0.  0.  0.  0.  0.  0.  0.2 0.2 0.  0.2 0.  0.2 0.  0.  0.2 0.  0.2 0.
 0.2 0.2 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.2 0.2 0.  0.
 0.  0.2 0.2 0.  0.  0.  0.  0.2 0.2 0.  0.  0.  0.  0.  0.  0.  0.  0.
 0.  0.  0.  0.  0.  0.  0.  0.2 0.2 0.  0.  0.  0.2 0.  0.2 0.  0.  0.2
 0.2 0.  0.  0.  0.  0.  0.  0.  0.2 0.2 0.  0.  0.  0. ]
0.0
0.1333333333333333


In [42]:
# 使用sklearn 训练感知机
ppt = Perceptron(penalty='l2',alpha=0.001,eta0=0.9,max_iter=2000,shuffle=True)
xx=[[1,0],[0,1],[0,0],[1,1]]
yy=[1,1,0,0]
ppt.fit(x_train,y_train)
print(ppt.coef_,ppt.intercept_)
print("params:\n")
print(Perceptron.get_params(ppt))
print("\n-----")
y_pred=ppt.predict(x_test)
score=ppt.score(x_test,y_test)
print(y_pred,score)

[[ 1.89502521 -0.95597044  1.91998953  0.68107992  1.32319038 -2.03689198
  -0.65694462  9.07081723 -2.40390577  0.5651715 ]] [-1.8]
params:

{'alpha': 0.001, 'class_weight': None, 'early_stopping': False, 'eta0': 0.9, 'fit_intercept': True, 'max_iter': 2000, 'n_iter_no_change': 5, 'n_jobs': None, 'penalty': 'l2', 'random_state': 0, 'shuffle': True, 'tol': 0.001, 'validation_fraction': 0.1, 'verbose': 0, 'warm_start': False}

-----
[-1 -1 -1  1  1  1 -1 -1  1 -1 -1  1  1 -1  1 -1 -1  1  1 -1 -1  1  1  1
  1  1 -1  1 -1 -1  1  1  1  1 -1 -1 -1 -1  1 -1 -1  1  1 -1  1 -1 -1 -1
 -1  1  1 -1 -1 -1 -1 -1 -1  1  1  1] 0.8833333333333333


In [43]:
# k折交叉验证
scores = model_selection.cross_validate(ppt,x,y,cv=20)
print(scores)

{'fit_time': array([0.00199342, 0.00199437, 0.00102186, 0.00099683, 0.00099921,
       0.00099754, 0.0019908 , 0.00099659, 0.00099778, 0.00099683,
       0.00099683, 0.0009973 , 0.00099754, 0.        , 0.00099707,
       0.00099754, 0.00099707, 0.0010097 , 0.        , 0.0009923 ]), 'score_time': array([0.00102091, 0.        , 0.        , 0.        , 0.        ,
       0.00101066, 0.00098825, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.0009973 , 0.        ,
       0.        , 0.        , 0.        , 0.0010078 , 0.        ]), 'test_score': array([0.8, 1. , 1. , 0.8, 0.9, 0.7, 0.8, 0.9, 0.7, 0.7, 0.8, 0.9, 0.9,
       0.7, 0.9, 0.9, 0.7, 0.8, 0.8, 0.8])}


In [50]:
ppt.fit(xx,yy)
ppt.predict([[0,1]])

array([1])