In [None]:
# This converts the linear regression output to a probability
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import os
import warnings
warnings.filterwarnings('ignore') 

## Create a logistic regression model

In [None]:
# Copied from etl_workflow for convenience
churn_db = pd.read_csv("Resources/BankChurners.csv", low_memory=False)
clean_churn_db=churn_db.iloc[:,2:21]
clean_churn_db = pd.get_dummies(clean_churn_db)
x = clean_churn_db
y = pd.get_dummies(churn_db["Attrition_Flag"]).iloc[:,0]

In [None]:
# Split our data into training and testing data
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(x, y, random_state=1)

In [None]:
# Create a logistic regression model
from sklearn.linear_model import LogisticRegression
lr_model = LogisticRegression(max_iter=10000) 


In [None]:
# Fit (train) our model by using the training data
lr_model.fit(X_train, y_train)

In [None]:
# Display Results
print(f"Training Data Score: {lr_model.score(X_train, y_train)}")
print(f"Testing Data Score: {lr_model.score(X_test, y_test)}")

## Logistic ROC curve

In [None]:
# Create a StandardScaler() model and fit it to the training data
from sklearn.preprocessing import StandardScaler

X_scaler = StandardScaler().fit(X_train)

In [None]:
# Transform the training and testing data by using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Get the probability for each class in the model
y_pred_proba_lr = lr_model.predict_proba(X_test_scaled)[:,1]

In [None]:
# Get the ROC curve data for the logistic regression model
# Import the roc_curve module from scikit-learn
from sklearn.metrics import roc_curve
fpr_lr, tpr_lr, thresholds_lr = roc_curve(y_test, y_pred_proba_lr)

In [None]:
plt.figure()
plt.plot(fpr_lr, tpr_lr, color="darkorange")
plt.plot([0, 1], [0, 1], color="navy", linestyle="--")
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve - Logistic Regression")
plt.show()

## Compute AUC

In [None]:
# Compute AUC for the logistic regression model
from sklearn.metrics import auc


roc_auc_lr = auc(fpr_lr, tpr_lr)
print(f"AUC for the Logistic Regression Model: {roc_auc_lr}")

In [None]:
# Show the confusion matrix for the logistic regression model
from sklearn.metrics import confusion_matrix

y_pred_lr = lr_model.predict(X_test_scaled)
cm_lr = confusion_matrix(y_test, y_pred_lr)
print(cm_lr)

In [None]:
# Print the classification report for the logistic regression model
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred_lr))