# 混淆矩阵

In [None]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

X, y = make_classification(n_samples=1000, n_features=5, n_classes=2, random_state=42)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)

tpc = np.sum((y_pred == 1) & (y_test == 1))
tnc = np.sum((y_pred == 0) & (y_test == 0))
fpc = np.sum((y_pred == 1) & (y_test == 0))
fnc = np.sum((y_pred == 0) & (y_test == 1))
print(tpc, tnc, fpc, fnc)


conf_mat = confusion_matrix(y_test, y_pred)
print(conf_mat)

# 使用Seaborn绘制混淆矩阵
sns.heatmap(conf_mat, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()


plt.hist(y_prob[:,0], alpha=0.5, edgecolor='black')
plt.show()

# 相关指标

In [None]:
tn, fp, fn, tp = conf_mat.ravel()

print("准确率:", (tp + tn) / (tp + tn + fp + fn))
print("召回率:", fp / (tn + fp))
print("假正率:", fp / (fp + tn))
print("精确率:", tp / (tp + fp))

# ROC

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix,roc_curve, auc
import seaborn as sns

threshold = 0.5
stddev = 0.2

np.random.seed(0)
data1 = np.random.normal(loc=0, scale=stddev, size=2000)
data2 = np.random.normal(loc=1, scale=stddev, size=2000)

data1 = data1[(data1 >= 0) & (data1 <= 1)]
data2 = data2[(data2 >= 0) & (data2 <= 1)]

true_data = np.append(np.zeros_like(data1), np.ones_like(data2))

pred1 = (data1 >= threshold).astype(int)
pred2 = (data2 >= threshold).astype(int)
pred_data = np.concatenate((pred1, pred2))

plt.hist(data1, bins=100, alpha=0.5, color='green')
plt.hist(data2, bins=100, alpha=0.5, color='red')
plt.axvline(x=threshold, color='blue', linestyle='--')
plt.show()


conf_mat = confusion_matrix(true_data, pred_data)
sns.heatmap(conf_mat, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()


fpr, tpr, thresholds = roc_curve(true_data, pred_data)
roc_auc = auc(fpr, tpr)
plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC')
plt.legend(loc="lower right")
plt.show()