In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv("diabetes.csv")

In [3]:
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [4]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state = 42)

In [6]:
print(X.shape)
print(X_train.shape)
print(X_test.shape)
print(y.shape)
print(y_train.shape)
print(y_test.shape)

(768, 8)
(614, 8)
(154, 8)
(768,)
(614,)
(154,)


In [7]:
from sklearn.metrics import accuracy_score
lr = LogisticRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
accuracy_score(y_test, y_pred)

0.7467532467532467

In [8]:
y_scores = lr.predict_proba(X_test)[:,1]
y_scores # Probabilities of all the patients to be diabetic

array([0.27136387, 0.19305603, 0.11711638, 0.16331373, 0.4649504 ,
       0.42952516, 0.01586582, 0.65068481, 0.54194385, 0.76598415,
       0.25701229, 0.89390041, 0.34024545, 0.29552099, 0.08329845,
       0.38111633, 0.14024914, 0.07633383, 0.86767171, 0.56357457,
       0.20382988, 0.07771863, 0.53836587, 0.09654447, 0.53894104,
       0.88566255, 0.12759142, 0.03080148, 0.25402597, 0.11399796,
       0.91126064, 0.86138325, 0.76811579, 0.83719513, 0.59972109,
       0.68418735, 0.96699349, 0.23978994, 0.50099891, 0.73615478,
       0.07074974, 0.57780797, 0.56862713, 0.32636151, 0.02824046,
       0.50504464, 0.62316226, 0.22742467, 0.34857297, 0.95286801,
       0.05069545, 0.65321194, 0.80430525, 0.2493136 , 0.09636819,
       0.04240272, 0.7686719 , 0.00599499, 0.4104802 , 0.75583697,
       0.73940657, 0.35474564, 0.18442565, 0.20029552, 0.07770669,
       0.63199179, 0.05182499, 0.72901956, 0.03741832, 0.71570159,
       0.67143903, 0.07012597, 0.17484455, 0.11551808, 0.09261

In [9]:
from sklearn.metrics import roc_curve
fpr, tpr, thresholds = roc_curve(y_test, y_scores)
thresholds

array([1.97029152, 0.97029152, 0.96699349, 0.95286801, 0.9022175 ,
       0.89390041, 0.77386212, 0.7686719 , 0.76811579, 0.76598415,
       0.73462045, 0.73118503, 0.72901956, 0.7191228 , 0.68418735,
       0.67143903, 0.65321194, 0.65068481, 0.64214055, 0.63199179,
       0.60851907, 0.56862713, 0.54976749, 0.53836587, 0.52642354,
       0.5052242 , 0.50504464, 0.42814745, 0.42100918, 0.40819471,
       0.38111633, 0.3442766 , 0.34024545, 0.32674768, 0.301864  ,
       0.25971239, 0.23978994, 0.23446497, 0.22806733, 0.20382988,
       0.20029552, 0.16331373, 0.15879746, 0.15761045, 0.14923706,
       0.14024914, 0.13913424, 0.12759142, 0.12601526, 0.11711638,
       0.11551808, 0.04240272, 0.04230028, 0.00438207])

In [None]:
import plotly.graph_objects as go

#Generate a trace for ROC Curve

trace0 = go.Scatter(
    x=fpr,
    y=tpr,
    mode = "lines",
    name = "ROC curve"
)

# Only label every nth point to avoid cluttering
n=10
indices = np.arange(len(thresholds)) % n == 0 # CHoose indidces whre index mod n is 0

trace1 = go.Scatter(
    x=fpr[indices],
    y=tpr[indices],
    mode = "text",
    name = "Threshold points",
    test = [f"Thre={thr:.2f}" for thr in thresholds[indices]],
            textposition = "top center"
)
# Diagonal line

trace2 = go.Scatter(
    x=[0, 1],
    y=[0, 1],
    mode = "lines",
    name = "Random (Area = 0.5)",
    line = dict(dash="dash")
)

data = [trace0, trace1, trace2]

#Define layout with square aspect ratio
layout = go.Layout(
    title = "Receiver Operating Characteristic",
    xaxis = dict(title = "False Positive Rate"),
    yaxis = dict(title = "True Positive Rate"),
    autosize = False,
    width = 800,
    height = 800,
    showlegend = False

)

#Define figure and add data
fig = go.Figure(data=data, layout=layout)

#show figure
fig.show()