# 实现混淆矩阵,召回率和精准率

In [39]:
import numpy as np
from sklearn import datasets

In [41]:
digits = datasets.load_digits()
X = digits.data
y = digits.target.copy()

In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
X_train, X_test, y_train, y_test = train_test_split(X, y)
poly_log = Pipeline([("Poly",PolynomialFeatures(2)), ("Standard", StandardScaler()), ("log", LogisticRegression())])
poly_log.fit(X_train, y_train)
poly_log.score(X_test, y_test)



0.9866666666666667

In [4]:
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
log_reg.score(X_test, y_test)



0.9644444444444444

In [5]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
knn.score(X_test, y_test)

0.9911111111111112

In [43]:
y[digits.target == 9] = 1
y[digits.target != 9] = 0
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 666)
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
log_reg.score(X_test, y_test)



0.9755555555555555

In [44]:
y_predict = log_reg.predict(X_test)

In [55]:
FP = sum((y_predict[y_predict == 1] == y_test[y_predict == 1]) == False)

In [56]:
TP = sum(y_predict[y_predict == 1] == y_test[y_predict == 1])

In [57]:
precision = TP / (TP+FP)
precision

0.9473684210526315

In [58]:
FN = sum((y_predict[y_predict == 0] == y_test[y_predict == 0]) == False)

In [59]:
recall = TP / (TP + FN)
recall

0.8

In [73]:
def TP(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true == 1) & (y_predict == 1))
def TN(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true == 0) & (y_predict == 0))
def FP(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true == 0) & (y_predict == 1))
def FN(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true == 1) & (y_predict == 0))

In [74]:
def confusion_matrix(y_true,y_predict):
    return np.array([[TN(y_true,y_predict), FP(y_true,y_predict)],[FN(y_true,y_predict), TP(y_true,y_predict)]])

In [75]:
confusion_matrix(y_test, y_predict)

array([[403,   2],
       [  9,  36]])

In [76]:
def precision_score(y_true,y_predict):
    try:
        return  TP(y_true,y_predict) / (TP(y_true,y_predict)+FP(y_true,y_predict))
    except:
        return 0

In [77]:
precision_score(y_test, y_predict)

0.9473684210526315

In [86]:
def recall_score(y_true,y_predict):
    try:
        return  TP(y_true,y_predict) / (TP(y_true,y_predict)+FN(y_true,y_predict))
    except:
        return 0

In [87]:
recall_score(y_test, y_predict)

0.8

In [88]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
print(knn.score(X_test, y_test))
knn_y_predict = knn.predict(X_test)

0.9977777777777778


In [89]:
precision_score(y_test, knn_y_predict)

1.0

In [90]:
recall_score(y_test, knn_y_predict)

0.9777777777777777

In [91]:
poly_log = Pipeline([("Poly",PolynomialFeatures(2)), ("Standard", StandardScaler()), ("log", LogisticRegression())])
poly_log.fit(X_train, y_train)
poly_y_predict = poly_log.predict(X_test)
precision_score(y_test, poly_y_predict)



0.8775510204081632

In [92]:
recall_score(y_test, poly_y_predict)

0.9555555555555556

In [93]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_predict)

array([[403,   2],
       [  9,  36]], dtype=int64)

In [94]:
from sklearn.metrics import precision_score
precision_score(y_test, y_predict)

0.9473684210526315

In [95]:
from sklearn.metrics import recall_score
recall_score(y_test, y_predict)

0.8

In [98]:
from sklearn.metrics import f1_score
f1_score(y_test, poly_y_predict)

0.9148936170212767

In [101]:
def f1_score(y_true, y_predict):
    preci = precision_score(y_true, y_predict)
    recall = recall_score(y_true, y_predict)
    try:
        return 2 * preci * recall /(preci + recall) 
    except:
        return 0

In [102]:
f1_score(y_test, y_predict)

0.8674698795180723