# Precision and Recall

[![LOR10.png](https://i.postimg.cc/c1QhzCmN/LOR10.png)](https://postimg.cc/HjkbcpRS)

[![LOR11.png](https://i.postimg.cc/1tDGb7DK/LOR11.png)](https://postimg.cc/7GYJ2VsC)

### 1. 实现混淆矩阵，精准率和召回率

In [1]:
import numpy as np
from sklearn import datasets

In [2]:
digits = datasets.load_digits()

X = digits.data
y = digits.target.copy()

# 把十个数字分类问题转化成二分类问题. 把数据集变成一个极度偏斜的数据集
y[digits.target==9] = 1
y[digits.target!=9] = 0

In [3]:
np.sum(y == 1)

180

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=666)

In [5]:
from sklearn.linear_model import LogisticRegression

log_reg = LogisticRegression(multi_class='auto')
log_reg.fit(X_train, y_train)
log_reg.score(X_test, y_test)



0.9755555555555555

In [6]:
y_log_predict = log_reg.predict(X_test)

In [7]:
def TN(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true==0) & (y_predict==0))
TN(y_test, y_log_predict)

403

In [8]:
def FN(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true==1) & (y_predict==0))
FN(y_test, y_log_predict)

9

In [9]:
def FP(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true==0) & (y_predict==1))

FP(y_test, y_log_predict)

2

In [10]:
def TP(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true==1) & (y_predict==1))

TP(y_test, y_log_predict)

36

In [12]:
def confusion_matrix(y_true, y_predict):
    return np.array([
        [TN(y_test, y_log_predict), FP(y_test, y_log_predict)],
        [FN(y_test, y_log_predict), TP(y_test, y_log_predict)]
    ])

confusion_matrix(y_test, y_log_predict)

array([[403,   2],
       [  9,  36]])

In [13]:
def precision_score(y_true, y_predict):
    tp = TP(y_test, y_log_predict)
    fp = FP(y_test, y_log_predict)
    try:
        return tp / (tp + fp)
    except:
        return 0.0
    
precision_score(y_test, y_log_predict)

0.9473684210526315

In [16]:
def recall_score(y_true, y_predict):
    tp = TP(y_test, y_log_predict)
    fn = FN(y_test, y_log_predict)
    try:
        return tp / (tp + fn)
    except:
        return 0.0
    
recall_score(y_test, y_log_predict)

0.8

### 2. scikit-learn 中的混淆矩阵，精准率和召回率

In [17]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_test, y_log_predict)

array([[403,   2],
       [  9,  36]], dtype=int64)

In [18]:
from sklearn.metrics import precision_score

precision_score(y_test, y_log_predict)

0.9473684210526315

In [None]:
from sklearn.metrics import re