In [41]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
import pandas as pd
from sklearn.metrics import roc_auc_score, roc_curve
import plotly.express as px
from sklearn.datasets import make_gaussian_quantiles
import warnings
warnings.filterwarnings('ignore')

In [42]:
X_1, y_1 = make_gaussian_quantiles(n_samples= 100,
                                 n_features=2,
                                 n_classes=2,
                                 )

# transform the dataset
from imblearn.over_sampling import RandomOverSampler
ros = RandomOverSampler(random_state=1)
X1, y1 = ros.fit_resample(X_1, y_1)



X1 = pd.DataFrame(X1,columns=['x','y'])
y1 = pd.Series(y1)

In [43]:
X_train,X_test,y_train,y_test=train_test_split(X1,y1,
                                              test_size=0.23,
                                              random_state=1)

In [44]:
model1 = LogisticRegression(random_state=1)
model2 = SVC(kernel= "rbf", probability=True)
model3 = DecisionTreeClassifier(random_state=1)
model4 = GradientBoostingClassifier(
    max_depth=2,
    n_estimators=3,
    learning_rate=1.0
)
model5 = KNeighborsClassifier(n_neighbors=5)
model6  = RandomForestClassifier(random_state=1)  

In [45]:
model_lr = model1.fit(X_train, y_train)
probs_lr = model_lr.predict_proba(X_test)[:, 1]

model_SVC = model2.fit(X_train, y_train)
probs_SVC = model_SVC.predict_proba(X_test)[:, 1]

model_dt = model3.fit(X_train, y_train)
probs_dt = model_dt.predict_proba(X_test)[:, 1]


model_gb = model4.fit(X_train, y_train)
probs_gb = model_gb.predict_proba(X_test)[:, 1]

model_KNN =  model5.fit(X_train, y_train)
probs_KNN = model_KNN.predict_proba(X_test)[:, 1]

model_rf = model6.fit(X_train, y_train)
probs_rf = model_rf.predict_proba(X_test)[:, 1]

In [46]:
y_test_int = y_test.replace({'Good': 1, 'Bad': 0})
auc_lr = roc_auc_score(y_test_int, probs_lr)
fpr_lr, tpr_lr, thresholds = roc_curve(y_test_int, probs_lr)

auc_SVC = roc_auc_score(y_test_int, probs_SVC)
fpr_SVC, tpr_SVC, thresholds = roc_curve(y_test_int, probs_SVC)

auc_dt = roc_auc_score(y_test_int, probs_dt)
fpr_dt, tpr_dt, thresholds = roc_curve(y_test_int, probs_dt)

auc_gb = roc_auc_score(y_test_int, probs_gb)
fpr_gb, tpr_gb, thresholds = roc_curve(y_test_int, probs_gb)

auc_KNN = roc_auc_score(y_test_int, probs_KNN)
fpr_KNN, tpr_KNN, thresholds = roc_curve(y_test_int, probs_KNN)

auc_rf = roc_auc_score(y_test_int, probs_rf)
fpr_rf, tpr_rf, thresholds = roc_curve(y_test_int, probs_rf)


In [47]:
#The histogram of scores compared to true labels
#LogisticRegression
fig_hist = px.histogram(
    x=probs_lr,
    title='Logistic Regression', 
    color=y_test, nbins=50,
    labels=dict(color='True Labels', x='Score',
    )
)

fig_hist.show()

df = pd.DataFrame({
    'False Positive Rate': fpr_lr,
    'True Positive Rate': tpr_lr,
})
df.index.name = "Thresholds"
df.columns.name = "Rate"

fig_thresh = px.line(
    df, title='TPR and FPR at every threshold of Logistic Regression',
    width=700, height=500
)

fig_thresh.update_yaxes(scaleanchor="x", scaleratio=1)
fig_thresh.update_xaxes(range=[0, 1], constrain='domain')
fig_thresh.show()


In [48]:
fig_hist = px.histogram(
    x=probs_SVC,
    title='Support-vector machines', 
    color=y_test, nbins=50,
    labels=dict(color='True Labels', x='Score',
    )
)

fig_hist.show()
df = pd.DataFrame({
    'False Positive Rate': fpr_SVC,
    'True Positive Rate': tpr_SVC,
})
df.index.name = "Thresholds"
df.columns.name = "Rate"

fig_thresh = px.line(
    df, title='TPR and FPR at every threshold of support-vector machines',
    width=700, height=500
)

fig_thresh.update_yaxes(scaleanchor="x", scaleratio=1)
fig_thresh.update_xaxes(range=[0, 1], constrain='domain')
fig_thresh.show()

In [49]:
fig_hist = px.histogram(
    x=probs_dt,
    title='Decision Tree', 
    color=y_test, nbins=50,
    labels=dict(color='True Labels', x='Score',
    )
)

fig_hist.show()
df = pd.DataFrame({
    'False Positive Rate': fpr_dt,
    'True Positive Rate': tpr_dt,
})
df.index.name = "Thresholds"
df.columns.name = "Rate"

fig_thresh = px.line(
    df, title='TPR and FPR at every threshold of Decision Tree',
    width=700, height=500
)

fig_thresh.update_yaxes(scaleanchor="x", scaleratio=1)
fig_thresh.update_xaxes(range=[0, 1], constrain='domain')
fig_thresh.show()

In [50]:
fig_hist = px.histogram(
    x=probs_gb,
    title='Gradient Boosting', 
    color=y_test, nbins=50,
    labels=dict(color='True Labels', x='Score',
    )
)

fig_hist.show()

df = pd.DataFrame({
    'False Positive Rate': fpr_gb,
    'True Positive Rate': tpr_gb,
})
df.index.name = "Thresholds"
df.columns.name = "Rate"

fig_thresh = px.line(
    df, title='TPR and FPR at every threshold of Gradient Boosting ',
    width=700, height=500
)

fig_thresh.update_yaxes(scaleanchor="x", scaleratio=1)
fig_thresh.update_xaxes(range=[0, 1], constrain='domain')
fig_thresh.show()

In [51]:
fig_hist = px.histogram(
    x=probs_KNN,
    title='K-Nearest Neighbors', 
    color=y_test, nbins=50,
    labels=dict(color='True Labels', x='Score',
    )
)

fig_hist.show()


df = pd.DataFrame({
    'False Positive Rate': fpr_KNN,
    'True Positive Rate': tpr_KNN,
})
df.index.name = "Thresholds"
df.columns.name = "Rate"

fig_thresh = px.line(
    df, title='TPR and FPR at every threshold of K-Nearest Neighbors ',
    width=700, height=500
)

fig_thresh.update_yaxes(scaleanchor="x", scaleratio=1)
fig_thresh.update_xaxes(range=[0, 1], constrain='domain')
fig_thresh.show()

In [52]:
fig_hist = px.histogram(
    x=probs_rf,
    title='Random Forest', 
    color=y_test, nbins=50,
    labels=dict(color='True Labels', x='Score',
    )
)

fig_hist.show()


df = pd.DataFrame({
    'False Positive Rate': fpr_rf,
    'True Positive Rate': tpr_rf,
})
df.index.name = "Thresholds"
df.columns.name = "Rate"

fig_thresh = px.line(
    df, title='TPR and FPR at every threshold of Random Forest ',
    width=700, height=500
)

fig_thresh.update_yaxes(scaleanchor="x", scaleratio=1)
fig_thresh.update_xaxes(range=[0, 1], constrain='domain')
fig_thresh.show()