# ROC curve

In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve, auc
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd



In [2]:
# Load the Iris dataset
cancer = load_breast_cancer()
X = cancer.data
y = cancer.target

In [3]:
# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
# Train a logistic regression model
clf = LogisticRegression(random_state=42)
clf.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [5]:
# Predict the probabilities of the positive class
y_score = clf.predict_proba(X_test)[:, 1]

In [6]:
# Calculate true positive and false positive rates

specified_thresholds = np.linspace(0, 0.99, 15)

fpr_list = []
tpr_list = []
for threshold in specified_thresholds:
    y_pred = (y_score >= threshold).astype('int')
    fpr, tpr, _ = roc_curve(y_test, y_pred)
    fpr_list.append(fpr[1])
    tpr_list.append(tpr[1])

In [7]:
# Calculate the area under the ROC curve
roc_auc = auc(fpr_list, tpr_list)
roc_auc

0.9983622666229939

In [8]:
y_prob = [round(p, 3) for p in y_score]
df = pd.DataFrame([y_test, y_prob, y_pred], index=['real', 'prob', 'pred']).T
print(df.to_string(index=False))

 real  prob  pred
  1.0 0.829   0.0
  0.0 0.000   0.0
  0.0 0.001   0.0
  1.0 0.995   1.0
  1.0 0.998   1.0
  0.0 0.000   0.0
  0.0 0.000   0.0
  0.0 0.009   0.0
  1.0 0.998   1.0
  1.0 0.986   0.0
  1.0 0.952   0.0
  0.0 0.000   0.0
  1.0 0.991   1.0
  0.0 0.191   0.0
  1.0 0.997   1.0
  0.0 0.001   0.0
  1.0 0.997   1.0
  1.0 0.999   1.0
  1.0 0.997   1.0
  0.0 0.000   0.0
  0.0 0.832   0.0
  1.0 0.979   0.0
  0.0 0.000   0.0
  1.0 0.994   1.0
  1.0 0.987   0.0
  1.0 0.999   1.0
  1.0 0.996   1.0
  1.0 0.988   0.0
  1.0 0.992   1.0
  0.0 0.000   0.0
  1.0 0.992   1.0
  1.0 0.998   1.0
  1.0 0.974   0.0
  1.0 0.977   0.0
  1.0 0.997   1.0
  1.0 0.994   1.0
  0.0 0.003   0.0
  1.0 0.993   1.0
  0.0 0.000   0.0
  1.0 0.807   0.0
  1.0 0.996   1.0
  0.0 0.001   0.0
  1.0 0.998   1.0
  1.0 0.986   0.0
  1.0 0.997   1.0
  1.0 0.948   0.0
  1.0 0.998   1.0
  1.0 0.986   0.0
  1.0 0.911   0.0
  1.0 0.994   1.0
  0.0 0.000   0.0
  0.0 0.000   0.0
  1.0 0.891   0.0
  1.0 0.998   1.0
  1.0 0.99

In [9]:
pd.DataFrame([specified_thresholds, fpr_list, tpr_list], index=['Cutoff', 'FPR', 'TPR']).T

Unnamed: 0,Cutoff,FPR,TPR
0,0.0,1.0,1.0
1,0.070714,0.139535,1.0
2,0.141429,0.116279,1.0
3,0.212143,0.093023,1.0
4,0.282857,0.093023,0.985915
5,0.353571,0.093023,0.985915
6,0.424286,0.093023,0.985915
7,0.495,0.093023,0.985915
8,0.565714,0.046512,0.985915
9,0.636429,0.046512,0.985915


In [10]:
# Display the ROC curve

# Create the ROC curve trace
roc_trace = go.Scatter(
    x=fpr_list,
    y=tpr_list,
    mode='lines',
    line=dict(color='red', width=2),
    name='ROC curve (area = %0.4f)' % roc_auc
)

# Create the random line trace
random_trace = go.Scatter(
    x=[0, 1],
    y=[0, 1],
    mode='lines',
    line=dict(color='navy', width=2, dash='dash'),
    name='Random assignment'
)

# Create the layout
layout = go.Layout(
    width=800,  # Set the desired width
    height=500,  # Set the desired height
    xaxis=dict(range=[-0.02, 1.0], title='False Positive Rate'),
    yaxis=dict(range=[0.0, 1.04], title='True Positive Rate'),
    title='Receiver Operating Characteristic',
    legend=dict(x=1, y=0, bgcolor='rgba(255, 255, 255, 0.5)', bordercolor='rgba(0, 0, 0, 0.5)')
)

# Create the figure
fig = go.Figure(data=[roc_trace, random_trace], layout=layout)

# Display the figure
fig.show()