In [1]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_curve, roc_auc_score
import joblib

# Artificially add noise to make task harder
df = pd.read_csv(
    "../datasets/iris.csv"
)
X_train, X_test, y_train, y_test = train_test_split(df.drop(columns="species"),
                                                    LabelEncoder().fit_transform(df["species"]), random_state=123,
                                                    test_size=0.2)

x = RandomForestClassifier(random_state=123, n_jobs=-1, n_estimators=100)
x.fit(X_train, y_train)

joblib.dump(x, "../models/irisFores.joblib")


['../models/irisFores.joblib']

In [None]:
y = df['species']
print(y)

In [None]:
# Fit the model
import joblib

model = joblib.load("../models/iris.joblib")
# model = LogisticRegression(max_iter=200)
# model.fit(X, y)
y_scores = model.predict_proba(X)
print(y_scores.shape)


In [None]:
# One hot encode the labels in order to plot them
y_onehot = pd.get_dummies(y, columns=model.classes_)
print(y_onehot)

In [None]:
fig = go.Figure()
fig.add_shape(
    type='line', line=dict(dash='dash'),
    x0=0, x1=1, y0=0, y1=1
)

In [None]:
for i in range(y_scores.shape[1]):
    y_true = y_onehot.iloc[:, i]
    y_score = y_scores[:, i]

    fpr, tpr, _ = roc_curve(y_true, y_score)
    auc_score = roc_auc_score(y_true, y_score)

    name = f"{y_onehot.columns[i]} (AUC={auc_score:.2f})"
    fig.add_trace(go.Scatter(x=fpr, y=tpr, name=name, mode='lines'))

In [None]:
fig.update_layout(
    xaxis_title='False Positive Rate',
    yaxis_title='True Positive Rate',
    yaxis=dict(scaleanchor="x", scaleratio=1),
    xaxis=dict(constrain='domain'),
    width=700, height=500
)
fig.show()