# 实现混淆矩阵，精准率与召回率

In [1]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets

In [2]:
digits = datasets.load_digits()
x = digits.data
y = digits.target.copy()

# 将数据进行转换，为使数据发生偏斜
y[digits.target == 9] = 1
y[digits.target != 9] = 0

In [3]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state = 666)

In [4]:
from sklearn.linear_model import LogisticRegression

log_reg = LogisticRegression()
log_reg.fit(x_train, y_train)
log_reg.score(x_test, y_test)



0.9755555555555555

In [5]:
y_log_pre = log_reg.predict(x_test)

In [6]:
def TN(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true==0) & (y_predict==0))

def FP(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true==0) & (y_predict==1))

def FN(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true==1) & (y_predict==0))

def TP(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true==1) & (y_predict==1))

In [7]:
def confusion_matrix(y_true, y_predict):
    return np.array([
        [TN(y_true, y_predict), FP(y_true, y_predict)],
        [FN(y_true, y_predict), TP(y_true, y_predict)]
    ])

def precision(y_true, y_predict):
    try:
        return TP(y_true, y_predict) / (TP(y_true, y_predict) + FP(y_true, y_predict))
    except:
        return 0.

def recall(y_true, y_predict):
    try:
        return TP(y_true, y_predict) / (TP(y_true, y_predict) + FN(y_true, y_predict))
    except:
        return 0.

In [8]:
confusion_matrix(y_test, y_log_pre)

array([[403,   2],
       [  9,  36]])

In [9]:
precision(y_test, y_log_pre)

0.9473684210526315

In [10]:
recall(y_test, y_log_pre)

0.8

## sklearn中的混淆矩阵，精准率与召回率

In [12]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_test, y_log_pre)

array([[403,   2],
       [  9,  36]], dtype=int64)

In [14]:
from sklearn.metrics import precision_score

precision_score(y_test, y_log_pre)

0.9473684210526315

In [15]:
from sklearn.metrics import recall_score

recall_score(y_test, y_log_pre)

0.8

# 总结：

* 1、在预测一组偏斜数据集是，需要使用precision、recall来评价模型的好坏
* 2、根据具体情况进行选择