## 混淆矩阵，精准率和召回率

## 混淆矩阵

|  真实\预测| 0  | 1| 
|  ----  | ----  | ----|
| 0  | TN | FP|
| 1  | FN | TP|

## 精准率

# $$precision = \frac{TP}{TP+FP}$$

## 召回率

# $$recall = \frac {TP}{TP+FN}$$

In [1]:
import numpy as np
from sklearn import datasets

In [3]:
digits = datasets.load_digits()
X = digits.data
y = digits.target.copy()

y[digits.target==9]=1
y[digits.target!=9]=0

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=666)

In [5]:
from sklearn.linear_model import LogisticRegression


log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LogisticRegression()

In [6]:
log_reg.score(X_test, y_test)

0.9638888888888889

In [7]:
y_log_predict = log_reg.predict(X_test)

In [9]:
def TN(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true == 0) & (y_predict==0))
TN(y_test, y_log_predict)

318

In [10]:
def FP(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true == 0) & (y_predict==1))
FP(y_test, y_log_predict)

6

In [11]:
def FN(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true == 1) & (y_predict==0))
FN(y_test, y_log_predict)

7

In [12]:
def TP(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true == 1) & (y_predict==1))
TP(y_test, y_log_predict)

29

In [13]:
def confusion_matrix(y_true, y_predict):
    return np.array([
        [TN(y_test, y_log_predict), FP(y_test, y_log_predict)],
        [FN(y_test, y_log_predict), TP(y_test, y_log_predict)]
    ])
confusion_matrix(y_test, y_log_predict)

array([[318,   6],
       [  7,  29]])

In [14]:
def precision_score(y_true, y_predict):
    tp = TP(y_true, y_predict)
    fp = FP(y_true, y_predict)
    try:
        return tp / (tp+fp)
    except:
        return 0.0;
    
precision_score(y_test, y_log_predict)

0.8285714285714286

In [15]:
def recall_score(y_true, y_predict):
    tp = TP(y_true, y_predict)
    fn = FN(y_true, y_predict)
    try:
        return tp / (tp+fn)
    except:
        return 0.0;
    
recall_score(y_test, y_log_predict)

0.8055555555555556

# sklearn

In [16]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_test, y_log_predict)

array([[318,   6],
       [  7,  29]], dtype=int64)

In [17]:
from sklearn.metrics import precision_score

precision_score(y_test, y_log_predict)

0.8285714285714286

In [18]:
from sklearn.metrics import recall_score

recall_score(y_test, y_log_predict)

0.8055555555555556