In [1]:
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.svm import LinearSVC
from sklearn.datasets import load_iris
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import PrecisionRecallDisplay, precision_recall_curve, average_precision_score

# Binary Problem

In [2]:
X, y = load_iris(return_X_y=True)

# Add noisy features
random_state = np.random.RandomState(0)
n_samples, n_features = X.shape
# X = np.concatenate([X, random_state.randn(n_samples, 200 * n_features)], axis=1)

# Limit to the two first classes, and split into training and test
X_train, X_test, y_train, y_test = train_test_split(
    X[y < 2], y[y < 2], test_size=0.5, random_state=random_state
)

In [3]:
classifier = make_pipeline(StandardScaler(), LinearSVC(random_state=random_state))
classifier.fit(X_train, y_train)

In [4]:
def plot_binary_pr_curve(clf, X_test, y_test):
    # make predictions on the test data
    y_pred = clf.decision_function(X_test)

    # calculate precision and recall for different probability thresholds
    precision, recall, _ = precision_recall_curve(y_test, y_pred)

    # calculate the average precision
    ap = average_precision_score(y_test, y_pred)

    # Plotting
    fig = go.Figure()

    fig.add_trace(
        go.Scatter(
            x=recall,
            y=precision,
            mode="lines",
            name=f"LinearSVC (AP={ap:.2f})",
            line=dict(color="blue"),
            showlegend=True,
            line_shape="hv"
        )
    )

    # Make x-range slightly larger than max value
    fig.update_xaxes(range=[-0.05, 1.05])
    # Make Legend text size larger
    fig.update_layout(
        title='2-Class Precision-Recall Curve',
        xaxis_title='Recall (Positive label: 1)',
        yaxis_title='Precision (Positive label: 1)',
        legend=dict(
            x=0.009,
            y=0.05,
            font=dict(
                size=12,
            ),
        )
    )

    return fig

In [5]:
fig = plot_binary_pr_curve(classifier, X_test, y_test)
fig.show()

# Multi-Label

In [6]:
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier

In [7]:
# Use label_binarize to be multi-label like settings
Y = label_binarize(y, classes=[0, 1, 2])
n_classes = Y.shape[1]

# Split into training and test
X_train_multi, X_test_multi, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.5, random_state=random_state
)

In [8]:
classifier = OneVsRestClassifier(
    make_pipeline(StandardScaler(), LinearSVC(random_state=random_state))
)
classifier.fit(X_train_multi, Y_train)

In [48]:
def plot_multi_label_pr_curve(clf, X_test, Y_test):
    n_classes = Y_test.shape[1]
    y_score = clf.decision_function(X_test)
    # For each class
    precision = dict()
    recall = dict()
    average_precision = dict()
    for i in range(n_classes):
        precision[i], recall[i], _ = precision_recall_curve(Y_test[:, i], y_score[:, i])
        average_precision[i] = average_precision_score(Y_test[:, i], y_score[:, i])

    # A "micro-average": quantifying score on all classes jointly
    precision["micro"], recall["micro"], _ = precision_recall_curve(
        Y_test.ravel(), y_score.ravel()
    )
    average_precision["micro"] = average_precision_score(Y_test, y_score, average="micro")
    
    # Plotting
    fig = go.Figure()

    
    # Plottin Precision-Recall Curves for each class
    colors = ["navy", "turquoise", "darkorange", "gold"]
    keys = list(precision.keys())

    for color, key in zip(colors, keys):
        if key=="micro":
            name = f"Micro-average (AP={average_precision[key]:.2f})"
        else:
            name = f"Class {key} (AP={average_precision[key]:.2f})"
        fig.add_trace(
            go.Scatter(
                x=recall[key],
                y=precision[key],
                mode="lines",
                name=name,
                line=dict(color=color),
                showlegend=True,
                line_shape="hv"
            )
        )

    # Creating Iso-F1 Curves
    f_scores = np.linspace(0.2, 0.8, num=4)
    for idx, f_score in enumerate(f_scores):
        if idx==0:
            name = "Iso-F1 Curves"
            showlegend = True
        else:
            name = ""
            showlegend = False
        x = np.linspace(0.01, 1, 1001)
        y = f_score * x / (2 * x - f_score)
        mask = y >= 0
        fig.add_trace(go.Scatter(x=x[mask], y=y[mask], mode='lines', line_color='gray', name=name, showlegend=showlegend))
        fig.add_annotation(x=0.9, y=y[900] + 0.02, text=f"<b>f1={f_score:0.1f}</b>", showarrow=False, font=dict(size=15))


    fig.update_yaxes(range=[0, 1.05])

    fig.update_layout(
        title='Extension of Precision-Recall Curve to Multi-Class', 
        xaxis_title='Recall', 
        yaxis_title='Precision',
        legend=dict(
            orientation="h",
            entrywidth=70,
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=0.65
        )
    )

    return fig
    

In [49]:
fig = plot_multi_label_pr_curve(classifier, X_test_multi, Y_test)
fig.show()