In [8]:
from sklearn import metrics, datasets
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams
import numpy as np
%matplotlib inline

In [9]:
#回歸問題
#生成資料
X,y = datasets.make_regression(n_features = 1, random_state = 42, noise = 4)
model = LinearRegression()
# 將資料放進模型訓練
model.fit(X, y)
#進行預測
prediction = model.predict(X)
#使用mse評估
mae = metrics.mean_absolute_error(prediction, y)
#使用msa評估
mse = metrics.mean_squared_error(prediction, y)
#使用r-square評估
r2 = metrics.r2_score(prediction, y)
print("mse : ", mae)
print("msa : ", mse)
print("r+square : ", r2)

mse :  2.841797252565566
msa :  12.48868006739824
r+square :  0.9916581036260311


In [10]:
#分解問題
# 使用 sklearn 內含的乳癌資料集
cancer = datasets.load_breast_cancer() 
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=50, random_state=0)

In [11]:
# 測試集中的 label
print(y_test)

[0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 0 0 0 1 1 0 1 1 0 1 0 1 0 1 0 1 0 1
 0 1 0 0 1 0 1 1 0 1 1 1 0]


In [12]:
# 我們先隨機生成 50 筆預測值，範圍都在 0~1 之間，代表機率值
y_pred = np.random.random((50,))

In [13]:
print(y_pred)

[0.49045999 0.64081435 0.37786969 0.81631016 0.45100652 0.26651602
 0.98193758 0.30308256 0.69139528 0.39873434 0.3652403  0.58129142
 0.75594864 0.79805753 0.94963733 0.38997701 0.03614678 0.42823222
 0.88407421 0.57370017 0.73810747 0.78679127 0.40056299 0.42434015
 0.77531047 0.76267427 0.68507471 0.07891992 0.66979056 0.23516996
 0.47376302 0.56192084 0.49607036 0.79079419 0.60419522 0.46667303
 0.12015167 0.94882927 0.25068631 0.66359704 0.6075676  0.3639139
 0.51649405 0.79471991 0.58642591 0.1804026  0.93207299 0.6345341
 0.73956135 0.98580952]


In [14]:
# 使用 roc_auc_score 來評估。 **這邊特別注意 y_pred 必須要放機率值進去!**
auc = metrics.roc_auc_score(y_test, y_pred)
# 得到結果約 0.5，與亂猜的結果相近，因為我們的預測值是用隨機生成的
print("AUC : ", auc)

AUC :  0.4702886247877759


In [15]:
#F1-Score
threshold = 0.5
 # 使用 np.where 函數, 將 y_pred > 0.5 的值變為 1，小於 0.5 的為 0
y_pred_binarized = np.where(y_pred > threshold,1, 0)
# 使用 F1-Score 評估
f1 = metrics.f1_score(y_test, y_pred_binarized)
# 使用 Precision 評估
precision = metrics.precision_score(y_test, y_pred_binarized)
# 使用 recall 評估
recall = metrics.recall_score(y_test, y_pred_binarized)
print("F1-Score : ", precision)
print("precision : ", precision)
print("Recall :", recall)

F1-Score :  0.5862068965517241
precision :  0.5862068965517241
Recall : 0.5483870967741935


In [16]:
#練習時間
# 生成 100 個隨機的 0 / 1 prediction
y_pred = np.random.randint(2, size = 100)
# 生成 100 個隨機的 0 / 1 ground truth
y_true = np.random.randint(2, size = 100)

In [17]:
y_pred

array([1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0,
       0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0,
       1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0])

In [18]:

import os
 
os.environ["PATH"] += os.pathsep + 'C:/Users/User/Graphviz2.38/bin'

In [19]:
precision =  metrics.precision_score(y_true, y_pred)
recall = metrics.recall_score(y_true, y_pred)
#F2-score 公式
f2 = 5*(precision * recall) / (4* precision + recall)
print("F2-Score: ", f2) 
print("Precision: ", precision)
print("Recall: ", recall)

F2-Score:  0.480349344978166
Precision:  0.4489795918367347
Recall:  0.4888888888888889
