# Predicting Churning Customers

In [212]:
import pandas as pd
import numpy as np
from collections import Counter


### Exploratory Data Analysis

In [213]:
data = pd.read_csv('Resources/Client_info.csv')
df.head()

Unnamed: 0,CLIENTNUM,Attrition_Flag,Customer_Age,Gender,Dependent_count,Education_Level,Marital_Status,Income_Category,Card_Category,Months_on_book
0,768805383,Existing Customer,45,M,3,High School,Married,$60K - $80K,Blue,39
1,818770008,Existing Customer,49,F,5,Graduate,Single,Less than $40K,Blue,44
2,713982108,Existing Customer,51,M,3,Graduate,Married,$80K - $120K,Blue,36
3,769911858,Existing Customer,40,F,4,High School,Unknown,Less than $40K,Blue,34
4,709106358,Existing Customer,40,M,3,Uneducated,Married,$60K - $80K,Blue,21


In [214]:
data.loc[data["Attrition_Flag"] == "Existing Customer", ["Attrition_Flag"]] =0
data.loc[data["Attrition_Flag"] == "Attrited Customer", ["Attrition_Flag"]] =1
data["Attrition_Flag"] = data["Attrition_Flag"].astype(int)


 ## Separate the Features (X) from the Target (y)

In [215]:
y = df["Attrition_Flag"]
X = df.drop(columns="Attrition_Flag")

In [216]:
X = pd.get_dummies(X)
print (X)

       CLIENTNUM  Customer_Age  Dependent_count  Months_on_book  Gender_F  \
0      768805383            45                3              39         0   
1      818770008            49                5              44         1   
2      713982108            51                3              36         0   
3      769911858            40                4              34         1   
4      709106358            40                3              21         0   
...          ...           ...              ...             ...       ...   
10122  772366833            50                2              40         0   
10123  710638233            41                2              25         0   
10124  716506083            44                1              36         1   
10125  717406983            30                2              36         0   
10126  714337233            43                2              25         1   

       Gender_M  Education_Level_College  Education_Level_Doctorate  \
0   

 ## Split our data into training and testing

In [236]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,
    y, random_state=1, stratify=y)

In [237]:
# implement random oversampling
from imblearn.over_sampling import RandomOverSampler
ros = RandomOverSampler(random_state=1)
X_resampled, y_resampled = ros.fit_resample(X_train, y_train)

Counter(y_resampled)

Counter({'Existing Customer': 6375, 'Attrited Customer': 6375})

 ## Create a Logistic Regression Model

In [238]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(solver='lbfgs', random_state=1)
classifier.fit(X_train, y_train)

LogisticRegression(random_state=1)

In [239]:
# Display the confusion matrix
from sklearn.metrics import confusion_matrix

y_pred =  classifier.predict(X_test)
confusion_matrix(y_test, y_pred)

array([[   0,  407],
       [   0, 2125]], dtype=int64)

In [240]:
from sklearn.metrics import balanced_accuracy_score

balanced_accuracy_score(y_test, y_pred)

0.5

 ## Make predictions

In [241]:
# Predict outcomes for test data set
predictions = classifier.predict(X_test)
pd.DataFrame({"Prediction": predictions, "Actual": y_test})

Unnamed: 0,Prediction,Actual
7260,Existing Customer,Existing Customer
9088,Existing Customer,Existing Customer
330,Existing Customer,Existing Customer
8950,Existing Customer,Existing Customer
9203,Existing Customer,Existing Customer
...,...,...
223,Existing Customer,Existing Customer
8159,Existing Customer,Existing Customer
8453,Existing Customer,Existing Customer
5552,Existing Customer,Existing Customer


In [242]:
from imblearn.metrics import classification_report_imbalanced
confusion_matrix(y_test, y_pred)

print(classification_report_imbalanced(y_test, y_pred))

                         pre       rec       spe        f1       geo       iba       sup

Attrited Customer       0.00      0.00      1.00      0.00      0.00      0.00       407
Existing Customer       0.84      1.00      0.00      0.91      0.00      0.00      2125

      avg / total       0.70      0.84      0.16      0.77      0.00      0.00      2532



  _warn_prf(average, modifier, msg_start, len(result))


In [243]:
# Calculating the confusion matrix.
cm = confusion_matrix(y_test, predictions)

# Create a DataFrame from the confusion matrix.
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"])

cm_df

Unnamed: 0,Predicted 0,Predicted 1
Actual 0,0,407
Actual 1,0,2125


In [244]:
# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {balanced_accuracy_score}")
print("Classification Report")
print(classification_report(y_test, predictions))

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,0,407
Actual 1,0,2125


Accuracy Score : <function balanced_accuracy_score at 0x0000026ED6848558>
Classification Report
                   precision    recall  f1-score   support

Attrited Customer       0.00      0.00      0.00       407
Existing Customer       0.84      1.00      0.91      2125

         accuracy                           0.84      2532
        macro avg       0.42      0.50      0.46      2532
     weighted avg       0.70      0.84      0.77      2532



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
