In [1]:
import numpy as np
from sklearn import datasets

In [4]:
digits = datasets.load_digits()
X = digits.data
y = digits.target

### 将digits数据集变成极度偏斜的：

In [6]:
y = digits.target.copy() #为了防止修改原数据
y[digits.target == 9] = 1
y[digits.target != 9] = 0

In [9]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 666)

In [10]:
from sklearn.linear_model import LogisticRegression

log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
log_reg.score(X_test, y_test)



0.9755555555555555

In [11]:
y_log_predict = log_reg.predict(X_test)

### TN: true是主对角线的格子，是预测neg，是左上角的格子

In [13]:
def TN(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    bool_array = (y_true == 0) & (y_predict == 0)
    return np.sum(bool_array)

TN(y_test, y_log_predict)

403

### FP: false是副对角线的格子，是预测pos，是右上角的格子

In [14]:
def FP(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    bool_array = (y_true == 0) & (y_predict == 1)
    return np.sum(bool_array)

FP(y_test, y_log_predict)

2

### FN: false是副对角线的格子，是预测neg，是左下角的格子

In [16]:
def FN(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    bool_array = (y_true == 1) & (y_predict == 0)
    return np.sum(bool_array)

FN(y_test, y_log_predict)

9

### TP: true是正对角线的格子，是预测pos，是右下角的格子

In [17]:
def TP(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    bool_array = (y_true == 1) & (y_predict == 1)
    return np.sum(bool_array)

TP(y_test, y_log_predict)

36

### Confusion matrix

In [20]:
def confusion_matrix(y_true, y_predict):
    tn = TN(y_true, y_predict)
    fn = FN(y_true, y_predict)
    tp = TP(y_true, y_predict) 
    fp = FP(y_true, y_predict)
    return np.array([
        [tn, fp],
        [fn, tp]
    ])

confusion_matrix(y_test, y_log_predict)

array([[403,   2],
       [  9,  36]])

### Precision score

In [22]:
def precision_score(y_true, y_predict):
    tp = TP(y_true, y_predict) 
    fp = FP(y_true, y_predict)
    try:
        return tp / (tp + fp)
    except:
        return 0.0 #防止 tp + fp == 0
    
precision_score(y_test, y_log_predict)

0.9473684210526315

### Recall score

In [23]:
def recall_score(y_true, y_predict):
    tp = TP(y_true, y_predict) 
    fn = FN(y_true, y_predict)
    try:
        return tp / (tp + fn)
    except:
        return 0.0 #防止 tp + fp == 0
    
recall_score(y_test, y_log_predict)

0.8

### Confusion matrix, precision score and recall score in sklearn

In [24]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_test, y_log_predict)

array([[403,   2],
       [  9,  36]])

In [25]:
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

print(precision_score(y_test, y_log_predict))
print(recall_score(y_test, y_log_predict))

0.9473684210526315
0.8
