In [1]:
import numpy as np
from sklearn import datasets

In [2]:
# 使用手写数据作为样本数据
digits = datasets.load_digits()
X = digits.data
y = digits.target.copy()

In [5]:
X

array([[  0.,   0.,   5., ...,   0.,   0.,   0.],
       [  0.,   0.,   0., ...,  10.,   0.,   0.],
       [  0.,   0.,   0., ...,  16.,   9.,   0.],
       ..., 
       [  0.,   0.,   1., ...,   6.,   0.,   0.],
       [  0.,   0.,   2., ...,  12.,   0.,   0.],
       [  0.,   0.,  10., ...,  12.,   1.,   0.]])

In [6]:
# 将多分类问题转换为二分类问题，同时让样本数据产生极度偏斜，
# 也就是我们关注的数据占总数据的1/9
y[digits.target == 9] = 1
y[digits.target != 9] = 0

In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=666)

In [9]:
from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
log_reg.score(X_test, y_test)

0.97555555555555551

In [10]:
y_predict = log_reg.predict(X_test)

In [12]:
def TN(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true == 0) & (y_predict == 0))

TN(y_test, y_predict)

403

In [13]:
def FP(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true == 0) & (y_predict == 1))

FP(y_test, y_predict)

2

In [14]:
def FN(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true == 1) & (y_predict == 0))

FN(y_test, y_predict)

9

In [15]:
def TP(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true == 1) & (y_predict == 1))

TP(y_test, y_predict)

36

In [17]:
def confusion_matrix(y_true, y_predict):
    return np.array([
        [TN(y_test, y_predict), FP(y_test, y_predict)],
        [FN(y_test, y_predict), TP(y_test, y_predict)]
    ])

confusion_matrix(y_test, y_predict)

array([[403,   2],
       [  9,  36]])

In [18]:
def precision_score(y_true, y_predict):
    tp = TP(y_test, y_predict)
    fp = FP(y_test, y_predict)
    try:
        return tp / (tp + fp) # 避免分母为0报错 
    except:
        return 0.0
    
precision_score(y_test, y_predict)

0.94736842105263153

In [19]:
def recall_score(y_true, y_predict):
    tp = TP(y_test, y_predict)
    fn = FN(y_test, y_predict)
    try:
        return tp / (tp + fn) # 避免分母为0报错 
    except:
        return 0.0
    
recall_score(y_test, y_predict)

0.80000000000000004

In [20]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_predict)

array([[403,   2],
       [  9,  36]])

In [21]:
from sklearn.metrics import precision_score
precision_score(y_test, y_predict)

0.94736842105263153

In [22]:
from sklearn.metrics import recall_score
recall_score(y_test, y_predict)

0.80000000000000004