In [2]:
# Connect the colab notebook with drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.linear_model import LogisticRegressionCV
from sklearn import metrics

In [4]:
dataset =  pd.read_csv("/content/drive/MyDrive/Customer_Churn/Cleaned_data_24_10_22.csv")

In [5]:
df = dataset[['Geography', 'Gender','Age','Tenure','Balance','NumOfProducts','HasCrCard','IsActiveMember','Exited']].copy()


In [6]:
df.head(5)

Unnamed: 0,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,Exited
0,0,0,42,2,0.0,1,1,1,1
1,2,0,41,1,83807.86,1,0,1,0
2,0,0,42,8,159660.8,3,1,0,1
3,0,0,39,1,0.0,2,0,0,0
4,2,0,43,2,125510.82,1,1,1,0


In [7]:
X = dataset.drop(['Exited'],axis=1)
y = dataset['Exited']

In [8]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)

In [9]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1)

In [10]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()

In [11]:
from scipy.stats import loguniform
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import RandomizedSearchCV

# define evaluation
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
# define search space
space = dict()
space['solver'] = ['newton-cg', 'lbfgs', 'liblinear']
space['penalty'] = ['none', 'l1', 'l2', 'elasticnet']
space['C'] = loguniform(1e-5, 100)
# define search
lr_model = RandomizedSearchCV(model, space, n_iter=500, scoring='accuracy', n_jobs=-1, cv=cv, random_state=1)
# execute search
result = lr_model.fit(X_train,y_train)

7080 fits failed out of a total of 15000.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1590 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.8/dist-packages/sklearn/linear_model/_logistic.py", line 1461, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/usr/local/lib/python3.8/dist-packages/sklearn/linear_model/_logistic.py", line 447, in _check_solver
    raise ValueError(
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got elasticnet penalty.

-------------------------

In [12]:
# summarize result
print('Best Score: %s' % result.best_score_)
print('Best Hyperparameters: %s' % result.best_params_)

Best Score: 0.7779729990193401
Best Hyperparameters: {'C': 0.002589146167630413, 'penalty': 'l2', 'solver': 'newton-cg'}


In [13]:
# Predict values using the training data
lr_cv_predict_train = lr_model.predict(X_train)

print("Accuracy against training data: {0:.4f}".format(metrics.accuracy_score(y_train, lr_cv_predict_train)))
print()

Accuracy against training data: 0.7785



In [14]:
# Predict values using the test data
lr_cv_predict_test = lr_model.predict(X_test)

print("Accuracy against test data: {0:.4f}".format(metrics.accuracy_score(y_test, lr_cv_predict_test)))
print()

Accuracy against test data: 0.7730



In [15]:
print("Confusion Matrix")
print(metrics.confusion_matrix(y_test, lr_cv_predict_test))
print()

Confusion Matrix
[[1203  358]
 [ 354 1222]]



In [16]:
print("Classification Report")
print(metrics.classification_report(y_test, lr_cv_predict_test))
print()

Classification Report
              precision    recall  f1-score   support

           0       0.77      0.77      0.77      1561
           1       0.77      0.78      0.77      1576

    accuracy                           0.77      3137
   macro avg       0.77      0.77      0.77      3137
weighted avg       0.77      0.77      0.77      3137


