# ROC Curve and AUC

In [0]:

from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from matplotlib import pyplot


In [0]:
# generate 2 class dataset
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=1)


In [0]:
print(X.shape)

In [0]:
print(y.shape)

In [0]:
# split into train/test sets
trainX, testX, trainy, testy = train_test_split(X, y, test_size=0.5, random_state=2)


In [0]:
# generate a no skill prediction (majority class)
ns_probs = [0 for _ in range(len(testy))]

In [0]:
#ns_probs

In [0]:
# fit a model
model = LogisticRegression()
model.fit(trainX, trainy)


In [0]:
# predict probabilities
lr_probs = model.predict_proba(testX)

In [0]:
#print(lr_probs)

In [0]:
lr_probs.shape

In [0]:
# keep probabilities for the positive outcome only
lr_probs = lr_probs[:, 1]


In [0]:
# calculate scores
ns_auc = roc_auc_score(testy, ns_probs)
lr_auc = roc_auc_score(testy, lr_probs)


In [0]:
# summarize scores
print('No Skill: ROC AUC=%.3f' % (ns_auc))
print('Logistic: ROC AUC=%.3f' % (lr_auc))


In [0]:
# calculate roc curves
ns_fpr, ns_tpr, _ = roc_curve(testy, ns_probs)
lr_fpr, lr_tpr, _ = roc_curve(testy, lr_probs)


In [0]:
# plot the roc curve for the model
pyplot.plot(ns_fpr, ns_tpr, linestyle='--', label='No Skill')
pyplot.plot(lr_fpr, lr_tpr, marker='.', label='Logistic')

# axis labels
pyplot.xlabel('False Positive Rate')
pyplot.ylabel('True Positive Rate')

# show the legend
pyplot.legend()

# show the plot
pyplot.show()