https://plotly.com/python/roc-and-pr-curves/

In [103]:
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    roc_curve,              # 根据 y_true, y_score 得到roc曲线
                            # x: 假正类率 (false postive rate, FPR); y: Recall(true postive rate, TPR)
    auc,                    # 根据 x,y 坐标得到roc曲线下的面积
    roc_auc_score,          # 根据 y_true,y_score 得到roc曲线下的面积
    precision_recall_curve,
    average_precision_score,
)
from sklearn.datasets import make_classification

# Preliminary plots

Before diving into the receiver operating characteristic (ROC) curve, we will look at two plots that will give some context to the thresholds mechanism behind the ROC and PR curves.

In the histogram, we observe that the score spread such that most of the positive labels are binned near 1, and a lot of the negative labels are close to 0. When we set a threshold on the score, all of the bins to its left will be classified as 0's, and everything to the right will be 1's. There are obviously a few outliers, such as negative samples that our model gave a high score, and positive samples with a low score. If we set a threshold right in the middle, those outliers will respectively become false positives and false negatives.

As we adjust thresholds, the number of positive positives will increase or decrease, and at the same time the number of true positives will also change; this is shown in the second plot. As you can see, the model seems to perform fairly well, because the true positive rate decreases slowly, whereas the false positive rate decreases sharply as we increase the threshold. Those two lines each represent a dimension of the ROC curve.

In [60]:
X, y = make_classification(n_samples=500, random_state=0)

In [61]:
model = LogisticRegression()
model.fit(X, y)

In [62]:
y_score = model.predict_proba(X)[:, 1]
y_score[:10]

array([0.69570219, 0.93001569, 0.99212056, 0.93671581, 0.93177138,
       0.02689803, 0.12747884, 0.07889723, 0.04034918, 0.14588297])

In [63]:
# False Positive Rate: fpr
# True Positive Rate: tpr
fpr, tpr, thresholds = roc_curve(y, y_score)
fpr[:5], tpr[:5], thresholds[:5]

(array([0.        , 0.        , 0.        , 0.00398406, 0.00398406]),
 array([0.        , 0.00401606, 0.44578313, 0.44578313, 0.51405622]),
 array([       inf, 0.99968821, 0.91396378, 0.91108317, 0.88448622]))

In [64]:
auc_score = auc(fpr, tpr)
auc_score

0.909902558440935

In [65]:
# roc_auc_score = roc_curve + auc
roc_auc_score(y, y_score)

0.909902558440935

In [66]:
# The histogram of scores compared to true labels
fig_hist = px.histogram(
    x=y_score,
    color=y,
    nbins=50,
    labels=dict(color='True Labels', x='Score')
)
fig_hist.show()

In [67]:
# Evaluating model performance at various thresholds
df = pd.DataFrame({
    'False Positive Rate': fpr,
    'True Positive Rate': tpr
}, index=thresholds)
df.index.name = "Thresholds"
df.columns.name = "Rate"

In [68]:
fig_thresh = px.line(
    data_frame=df,
    title='TPR and FPR at every threshold',
    width=700,
    height=500,
)
fig_thresh.update_yaxes(scaleanchor="x", scaleratio=1)
fig_thresh.update_xaxes(range=[0, 1], constrain='domain')
fig_thresh.show()

# Basic binary ROC curve

Notice how this ROC curve looks similar to the True Positive Rate curve from the previous plot. This is because they are the same curve, except the x-axis consists of increasing values of FPR instead of threshold, which is why the line is flipped and distorted.

We also display the area under the ROC curve (ROC AUC), which is fairly high, thus consistent with our interpretation of the previous plots.

In [69]:
fpr[:5], tpr[:5]

(array([0.        , 0.        , 0.        , 0.00398406, 0.00398406]),
 array([0.        , 0.00401606, 0.44578313, 0.44578313, 0.51405622]))

In [70]:
fig = px.area(
    x=fpr,
    y=tpr,
    title=f'ROC Curve (AUC={auc_score:.4f})',
    labels=dict(x='False Positive Rate', y='True Positive Rate'),
    width=500,
    height=500
)
fig.add_shape(
    type="line",
    line=dict(dash="dash"),
    x0=0, x1=1,
    y0=0, y1=1
)
fig.show()

# Multiclass ROC Curve

When you have more than 2 classes, you will need to plot the ROC curve for each class separately. Make sure that you use a one-versus-rest model, or make sure that your problem has a multi-label format; otherwise, your ROC curve might not return the expected results.

In [71]:
# Artificially add noise to make task harder
iris = px.data.iris()
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,species_id
0,5.1,3.5,1.4,0.2,setosa,1
1,4.9,3.0,1.4,0.2,setosa,1
2,4.7,3.2,1.3,0.2,setosa,1
3,4.6,3.1,1.5,0.2,setosa,1
4,5.0,3.6,1.4,0.2,setosa,1


In [72]:
samples = iris.species.sample(n=50, random_state=0)
samples

114     virginica
62     versicolor
33         setosa
107     virginica
7          setosa
100     virginica
40         setosa
86     versicolor
76     versicolor
71     versicolor
134     virginica
51     versicolor
73     versicolor
54     versicolor
63     versicolor
37         setosa
78     versicolor
90     versicolor
45         setosa
16         setosa
121     virginica
66     versicolor
24         setosa
8          setosa
126     virginica
22         setosa
44         setosa
97     versicolor
93     versicolor
26         setosa
137     virginica
84     versicolor
27         setosa
127     virginica
132     virginica
59     versicolor
18         setosa
83     versicolor
61     versicolor
92     versicolor
112     virginica
2          setosa
141     virginica
43         setosa
10         setosa
60     versicolor
116     virginica
144     virginica
119     virginica
108     virginica
Name: species, dtype: object

In [73]:
np.random.shuffle(samples.values)

In [74]:
iris.loc[samples.index, "species"] = samples.values
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,species_id
0,5.1,3.5,1.4,0.2,setosa,1
1,4.9,3.0,1.4,0.2,setosa,1
2,4.7,3.2,1.3,0.2,versicolor,1
3,4.6,3.1,1.5,0.2,setosa,1
4,5.0,3.6,1.4,0.2,setosa,1


In [75]:
# Define the inputs and outputs
X = iris.drop(columns=['species', 'species_id'])
y = iris['species']

In [76]:
# Fit the model
model = LogisticRegression(max_iter=200)
model.fit(X, y)

In [77]:
y_scores = model.predict_proba(X)
y_scores[:5]

array([[0.73573509, 0.22255604, 0.04170888],
       [0.65437023, 0.31681445, 0.02881532],
       [0.70340533, 0.26142264, 0.03517202],
       [0.6757169 , 0.28664155, 0.03764155],
       [0.75140137, 0.20161193, 0.04698671]])

In [78]:
# One hot encode the labels in order to plot them
y_onehot = pd.get_dummies(y, columns=model.classes_)
y_onehot.head()

Unnamed: 0,setosa,versicolor,virginica
0,True,False,False
1,True,False,False
2,False,True,False
3,True,False,False
4,True,False,False


In [79]:
# Create an empty figure, and iteratively add new lines
# every time we compute a new class
fig = go.Figure()
fig.add_shape(
    type="line",
    line=dict(dash='dash'),
    x0=0, x1=1, y0=0, y1=1
)

for i in range(y_scores.shape[1]):
    y_true = y_onehot.iloc[:, i]
    y_score = y_scores[:, i]

    fpr, tpr, _ = roc_curve(y_true, y_score)
    auc_score = roc_auc_score(y_true, y_score)

    name = f"{y_onehot.columns[i]} (AUC={auc_score:.2f})"

    fig.add_trace(go.Scatter(x=fpr, y=tpr, name=name, mode='lines'))

fig.update_layout(
    xaxis_title='False Positive Rate',
    yaxis_title='True Positive Rate',
    yaxis=dict(scaleanchor="x", scaleratio=1),
    xaxis=dict(constrain='domain'),
    width=700, height=500
)

fig.show()

# Precision-Recall Curves

In [85]:
X, y = make_classification(n_samples=500, random_state=0)

In [87]:
model = LogisticRegression()
model.fit(X, y)

In [88]:
y_score = model.predict_proba(X)[:, 1]
y_score[:5]

array([0.69570219, 0.93001569, 0.99212056, 0.93671581, 0.93177138])

In [89]:
precision, recall, thresholds = precision_recall_curve(y, y_score)
precision[:5], recall[:5], thresholds[:5]

(array([0.498     , 0.498998  , 0.5       , 0.50100604, 0.50201613]),
 array([1., 1., 1., 1., 1.]),
 array([0.00457785, 0.00514869, 0.00661475, 0.00827054, 0.00901408]))

In [92]:
auc_score = auc(recall, precision)
auc_score

0.9268200322697185

In [93]:
fig = px.area(
    x=recall,
    y=precision,
    title=f'Precision-Recall Curve (AUC={auc_score:.4f})',
    labels=dict(x='Recall', y='Precision'),
    width=700, height=500
)
fig.add_shape(
    type='line',
    line=dict(dash='dash'),
    x0=0, x1=1, y0=1, y1=0
)
fig.update_yaxes(scaleanchor="x", scaleratio=1)
fig.update_xaxes(constrain='domain')

fig.show()

## In this example, we use the average precision metric, which is an alternative scoring method to the area under the PR curve

In [96]:
# Artificially add noise to make task harder
iris = px.data.iris()
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,species_id
0,5.1,3.5,1.4,0.2,setosa,1
1,4.9,3.0,1.4,0.2,setosa,1
2,4.7,3.2,1.3,0.2,setosa,1
3,4.6,3.1,1.5,0.2,setosa,1
4,5.0,3.6,1.4,0.2,setosa,1


In [98]:
samples = iris.species.sample(n=30, random_state=0)
np.random.shuffle(samples.values)
iris.loc[samples.index, 'species'] = samples.values
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,species_id
0,5.1,3.5,1.4,0.2,setosa,1
1,4.9,3.0,1.4,0.2,setosa,1
2,4.7,3.2,1.3,0.2,setosa,1
3,4.6,3.1,1.5,0.2,setosa,1
4,5.0,3.6,1.4,0.2,setosa,1


In [100]:
# Define the inputs and outputs
X = iris.drop(columns=['species', 'species_id'])
y = iris['species']
y_onehot = pd.get_dummies(y, columns=model.classes_)
y_onehot.head()

Unnamed: 0,setosa,versicolor,virginica
0,True,False,False
1,True,False,False
2,True,False,False
3,True,False,False
4,True,False,False


In [101]:
# Fit the model
model = LogisticRegression(max_iter=200)
model.fit(X, y)

In [102]:
y_scores = model.predict_proba(X)
y_scores[:5]

array([[8.30403279e-01, 1.69478454e-01, 1.18266642e-04],
       [7.86221597e-01, 2.13658046e-01, 1.20357409e-04],
       [8.15262155e-01, 1.84629025e-01, 1.08820181e-04],
       [7.75902253e-01, 2.23937466e-01, 1.60280658e-04],
       [8.36781645e-01, 1.63095297e-01, 1.23058412e-04]])

In [106]:
# Create an empty figure, and iteratively add new lines
# every time we compute a new class
fig = go.Figure()
fig.add_shape(
    type='line',
    line=dict(dash='dash'),
    x0=0, x1=1, y0=1, y1=0
)

for i in range(y_scores.shape[1]):
    y_true = y_onehot.iloc[:, i]
    y_score = y_scores[:, i]

    precision, recall, _ = precision_recall_curve(y_true, y_score)
    auc_score = average_precision_score(y_true, y_score)
    name = f"{y_onehot.columns[i]} (AP={auc_score:.2f})"
    fig.add_trace(go.Scatter(x=recall, y=precision, name=name, mode='lines'))

fig.update_layout(
    xaxis_title='Recall',
    yaxis_title='Precision',
    yaxis=dict(scaleanchor="x", scaleratio=1),
    xaxis=dict(constrain='domain'),
    width=700, height=500
)

fig.show()