
# 模型验证

In [2]:
# 准确度
import numpy as np
X = np.random.random((10, 5))
y = np.array(['M', 'M', 'F', 'F', 'M', 'F', 'M', 'M', 'F', 'F'])
X[X < 0.7]  = 0
X

array([[0.        , 0.        , 0.86089451, 0.        , 0.82379675],
       [0.        , 0.7225362 , 0.81335889, 0.        , 0.87131057],
       [0.        , 0.        , 0.        , 0.        , 0.8363595 ],
       [0.7978362 , 0.        , 0.74741815, 0.96998286, 0.        ],
       [0.        , 0.84816995, 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.87410696, 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.90354473],
       [0.        , 0.99263962, 0.        , 0.        , 0.7454347 ],
       [0.        , 0.        , 0.        , 0.        , 0.85255938],
       [0.        , 0.86538734, 0.97190933, 0.        , 0.85000801]])

In [4]:
# 进行二值化处理
from sklearn.preprocessing import Binarizer
binarizer = Binarizer(threshold=0.0).fit(X)
binarizer_X = binarizer.transform(X)
binarizer_X

array([[0., 0., 1., 0., 1.],
       [0., 1., 1., 0., 1.],
       [0., 0., 0., 0., 1.],
       [1., 0., 1., 1., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1.],
       [0., 1., 0., 0., 1.],
       [0., 0., 0., 0., 1.],
       [0., 1., 1., 0., 1.]])

In [6]:
# 对Y进行条件编码
from sklearn.preprocessing import LabelEncoder
enc = LabelEncoder()
enc_y = enc.fit_transform(y)
enc_y

array([1, 1, 0, 0, 1, 0, 1, 1, 0, 0], dtype=int64)

In [8]:
# 将数据切分成训练集和测试集
from sklearn.model_selection import train_test_split

train_data, test_data, train_target, test_target = train_test_split(binarizer_X, enc_y, random_state=2021)
train_data, train_target


(array([[1., 0., 1., 1., 0.],
        [0., 1., 1., 0., 1.],
        [0., 0., 0., 0., 1.],
        [0., 0., 1., 0., 1.],
        [0., 1., 1., 0., 1.],
        [0., 0., 1., 0., 0.],
        [0., 1., 0., 0., 0.]]),
 array([0, 0, 0, 1, 1, 0, 1], dtype=int64))

In [11]:
# 用KNN进行预测
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(train_data, train_target)
y_pred = knn.predict(test_data)
print('预测标签：', y_pred)
print('实际标签：', test_target)
from sklearn.metrics import accuracy_score
print('准确率 Accuracy: ', accuracy_score(test_target, y_pred))

预测标签： [1 1 1]
实际标签： [1 1 0]
准确率 Accuracy:  0.6666666666666666


In [12]:
# 查准率， 查全率
from sklearn.metrics import precision_score, recall_score
print('查准率： ', precision_score(test_target, y_pred))
print('查全率： ', recall_score(test_target, y_pred))


查准率：  0.6666666666666666
查全率：  1.0


In [13]:
# F1值
from sklearn.metrics import f1_score
print('F1： ', f1_score(test_target, y_pred))

F1：  0.8


In [24]:
# 混淆矩阵
from sklearn.metrics import confusion_matrix
print(confusion_matrix(test_target, y_pred))

# ROC
from sklearn.metrics import roc_curve
from sklearn.metrics import auc

fpr, tpr, threshold = roc_curve(test_target, y_pred)
print('AUC: \n', auc(fpr, tpr))

[[0 1]
 [0 2]]
AUC: 
 0.5
