In [1]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier,KNeighborsRegressor

In [2]:
X_train = np.array([
    [158, 64],
    [170, 86],
    [183, 84],
    [191, 80],
    [155, 49],
    [163, 59],
    [180, 67],
    [158, 54],
    [170, 67]
])
y_train = ['male', 'male', 'male', 'male', 'female', 'female', 'female', 'female', 'female']

### K-近邻算法  
KNN模型假设 : 互相接近的实例拥有类似的响应变量值  
近邻 : 度量空间中的训练实例  
度量空间 : 定义了集合中所有成员之间距离的特征空间  
惰性学习模型(基于实例的学习模型) : 会对训练数据集进行少量的处理或者完全不处理  
非参数模型 : 模型参数的个数并不固定,可能随着训练实例数量的增加而增加  
超参K : 设为一个奇数防止出现平局现象

#### 分类

In [3]:
x=np.array([[155,70]])
distances=np.sqrt(np.sum((X_train-x)**2,axis=1))  #欧氏距离
from collections import Counter
b=Counter(np.take(y_train,distances.argsort()[:3]))
b.most_common(1)[0][0]

'female'

In [4]:
from sklearn.preprocessing import LabelBinarizer  #转换器接口

In [5]:
lb=LabelBinarizer()
y_binarized=lb.fit_transform(y_train)
knnc=KNeighborsClassifier(n_neighbors=3)
knnc.fit(X_train,y_binarized.reshape(-1))
lb.inverse_transform(knnc.predict(np.array([155,70]).reshape(1,-1))[0])

array(['female'], dtype='<U6')

In [6]:
X_test = np.array([
    [168, 65],
    [180, 96],
    [160, 52],
    [169, 67]
])
y_test = ['male', 'male', 'female', 'female']
y_test_binarized=lb.transform(y_test)
y_=knnc.predict(X_test)
print(y_test_binarized.T[0],'Ture')
print(y_,"Predict")
lb.inverse_transform(y_)

[1 1 0 0] Ture
[0 1 0 0] Predict


array(['female', 'male', 'female', 'female'], dtype='<U6')

In [7]:
from sklearn.metrics import accuracy_score #准确率

In [8]:
accuracy_score(y_test_binarized,y_)

0.75

In [9]:
from sklearn.metrics import precision_score #精准率

In [10]:
precision_score(y_test_binarized,y_)

1.0

In [11]:
from sklearn.metrics import recall_score #召回率

In [12]:
recall_score(y_test_binarized,y_)

0.5

In [13]:
from sklearn.metrics import f1_score #f1得分

In [14]:
f1_score(y_test_binarized,y_)

0.6666666666666666

In [15]:
from sklearn.metrics import matthews_corrcoef #马修斯相关系数

In [16]:
matthews_corrcoef(y_test_binarized,y_)

0.5773502691896258

In [17]:
from sklearn.metrics import classification_report #精准率 召回率 F1得分

In [18]:
classification_report(y_test_binarized,y_,target_names=['Male'],labels=[1])

'              precision    recall  f1-score   support\n\n        Male       1.00      0.50      0.67         2\n\n   micro avg       1.00      0.50      0.67         2\n   macro avg       1.00      0.50      0.67         2\nweighted avg       1.00      0.50      0.67         2\n'

#### 回归

In [19]:
from sklearn.metrics import mean_absolute_error,mean_squared_error # 平均绝对误差,均方误差

In [20]:
X_train = np.array([
    [158, 1],
    [170, 1],
    [183, 1],
    [191, 1],
    [155, 0],
    [163, 0],
    [180, 0],
    [158, 0],
    [170, 0]
])
y_train = [64, 86, 84, 80, 49, 59, 67, 54, 67]

X_test = np.array([
    [160, 1],
    [196, 1],
    [168, 0],
    [177, 0]
])
y_test = [66, 87, 68, 74]
knnr=KNeighborsRegressor(n_neighbors=3)
knnr.fit(X_train,y_train)
y_=knnr.predict(X_test)

In [21]:
from sklearn.metrics import r2_score #决定系数

In [22]:
print(y_)
print(r2_score(y_test,y_))
print(mean_absolute_error(y_test,y_))
mean_squared_error(y_test,y_)

[59.         77.         70.66666667 72.66666667]
0.41250645994832036
5.25


39.47222222222223

In [23]:
from sklearn.preprocessing import StandardScaler #特征缩放的转换器
# 
ss=StandardScaler()

In [24]:
X_ss_train=ss.fit_transform(X_train)
X_ss_train

array([[-0.9908706 ,  1.11803399],
       [ 0.01869567,  1.11803399],
       [ 1.11239246,  1.11803399],
       [ 1.78543664,  1.11803399],
       [-1.24326216, -0.89442719],
       [-0.57021798, -0.89442719],
       [ 0.86000089, -0.89442719],
       [-0.9908706 , -0.89442719],
       [ 0.01869567, -0.89442719]])

In [25]:
knnr.fit(X_ss_train,y_train)
y_=knnr.predict(X_test)

In [26]:
print(y_)
print(r2_score(y_test,y_))
print(mean_absolute_error(y_test,y_))
mean_squared_error(y_test,y_)

[77. 77. 77. 77.]
-0.1572093023255814
8.25


77.75