In [20]:
import numpy as np
def precision(y_pred: np.array, y_true: np.array) -> float:
    TP = (y_pred * y_true).sum()
    FP =((1 - y_pred) * y_true).sum()
    answer = TP/(TP+FP)
    return answer
def recall(y_pred: np.array, y_true: np.array) -> float:
    TP = (y_pred * y_true).sum()
    FN =  (y_pred * (1 - y_true)).sum()
    answer = TP/(TP+FN)
    return answer
def f1(y_pred: np.array, y_true: np.array) -> float:
    pr = precision(y_pred,y_true)
    re = recall(y_pred,y_true)
    answer = 2*pr*re/(pr+re)
    return answer

In [21]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score, precision_recall_curve, auc, roc_curve,precision_score,recall_score,f1_score
import plotly.graph_objs as go




data = fetch_openml(data_id=42608)
X, y = data['data'].drop(columns='Outcome').values, data['data']['Outcome'].astype(int).values
scaler = StandardScaler()

tree = DecisionTreeClassifier(max_depth= 9, random_state= 42)
knn = KNeighborsClassifier(n_neighbors=5)
lr = LogisticRegression()
svm = SVC(probability=True)
models =[tree,knn,lr,svm]

x_train, x_test, y_train, y_test = train_test_split(X,y,random_state= 42, test_size= 0.3)
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

predictions = {}
rocScores = {}
prScores = {}
preds = {}
layout = go.Layout(autosize = False, width = 1200, height= 600, xaxis_title='recall', yaxis_title='precision')
fig_1 = go.Figure(layout= layout)
fig_2 = go.Figure(layout=layout)
for model in models: # Считаем для каждой модели предсказание и метрики, добавляем зависимости соответсвующие различным моделям на график
    model.fit(x_train,y_train)
    predictions[type(model).__name__] = model.predict_proba(x_test)[:,1] 
    preds[type(model).__name__] = model.predict(x_test) 
    prec,rec,thresholds = precision_recall_curve(y_test,predictions[type(model).__name__])

    print(f1_score(y_test,preds[type(model).__name__]),recall_score(y_test,preds[type(model).__name__]),f1(y_test,preds[type(model).__name__]),recall(y_test,preds[type(model).__name__]))
    
    fig_1.add_trace(go.Scatter(x = rec, y = prec, mode= 'lines', name = type(model).__name__))

    prScores[type(model).__name__] = auc(rec,prec)
    rocScores[type(model).__name__] = roc_auc_score(y_test,predictions[type(model).__name__])
    fpr, tpr, thresholds = roc_curve(y_test, predictions[type(model).__name__])

    fig_2.add_trace(go.Scatter(x =fpr, y = tpr, mode= 'lines', name= str(model)  ))
fig_1.show()
fig_2.update_layout(xaxis_title = 'fpr', yaxis_title = 'tpr' )
fig_2.show()
prScores = dict(sorted(prScores.items(), key = lambda x: x[1],reverse=True))
rocScores = dict(sorted(rocScores.items(), key = lambda x: x[1], reverse=True))
print(prScores,'\n'*2,rocScores)



0.6292134831460674 0.7 0.6292134831460674 0.7
0.5548387096774193 0.5375 0.5548387096774193 0.5375
0.6211180124223602 0.625 0.6211180124223602 0.625
0.6143790849673203 0.5875 0.6143790849673203 0.5875


{'DecisionTreeClassifier': 0.6814708197980257, 'LogisticRegression': 0.6639162986466747, 'SVC': 0.646433465148704, 'KNeighborsClassifier': 0.5876288276242685} 

 {'LogisticRegression': 0.7980132450331127, 'SVC': 0.7973509933774834, 'KNeighborsClassifier': 0.7504552980132451, 'DecisionTreeClassifier': 0.728725165562914}
