In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## <span style = 'background :lightblue'>STEP 1 : Import Transformed Dataset</span>

In [2]:
filepath_train = r"S2b_Part2_FE_Pipeline_downsampling_Train_DS.csv"
filepath_test = r"S2b_Part2_FE_Pipeline_downsampling_Test_DS.csv"

In [3]:
d_train = pd.read_csv(filepath_train  ,index_col= 0 , header= 0)
d_test = pd.read_csv(filepath_test  ,index_col= 0 , header= 0)

In [4]:
d_train.head(3)

Unnamed: 0,MonthlyCharges,TotalCharges,gender_Female,gender_Male,SeniorCitizen_NO,SeniorCitizen_Yes,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,...,Contract_Month-to-month,Contract_One year,Contract_Two year,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,Churn
0,-0.338701,0.049061,0,1,1,0,0,1,1,0,...,0,0,1,1,0,1,0,0,0,0
1,1.030607,0.139714,1,0,1,0,1,0,1,0,...,1,0,0,0,1,1,0,0,0,0
2,0.147022,-0.956056,1,0,1,0,1,0,1,0,...,1,0,0,1,0,0,0,1,0,1


In [5]:
d_test.head(3)

Unnamed: 0,MonthlyCharges,TotalCharges,gender_Female,gender_Male,SeniorCitizen_NO,SeniorCitizen_Yes,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,...,Contract_Month-to-month,Contract_One year,Contract_Two year,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,Churn
0,-1.457688,-0.536472,0,1,1,0,0,1,1,0,...,0,0,1,1,0,0,0,0,1,0
1,-1.480896,-0.374264,0,1,1,0,1,0,1,0,...,0,0,1,1,0,1,0,0,0,0
2,0.142049,0.81081,0,1,1,0,1,0,1,0,...,0,1,0,1,0,1,0,0,0,0


## <span style = 'background :lightblue'>STEP 2 : Train - Test Split</span>

In [6]:
X_train = d_train.drop(labels= ['Churn'] , axis = 1)
y_train = d_train['Churn']

X_test = d_test.drop(labels= ['Churn'] , axis = 1)
y_test = d_test['Churn']

In [7]:
X_train.shape , y_train.shape

((5634, 51), (5634,))

In [8]:
X_test.shape , y_test.shape

((1409, 51), (1409,))

In [9]:
y_train.value_counts()

0    4149
1    1485
Name: Churn, dtype: int64

In [10]:
y_test.value_counts()

0    1025
1     384
Name: Churn, dtype: int64

## <span style = 'background :lightblue'>STEP 3 : Down-sampling of Train and Test DataSet</span>

In [11]:
import warnings
warnings.filterwarnings('ignore')

In [12]:
from imblearn.under_sampling import EditedNearestNeighbours

In [13]:
# ENN :  EditedNearestNeighbours
def ENN(X_DF , y_ser ,samp_strategy ):
    
    # Define EditedNearestNeighbours object with sampling_strategy
    enn = EditedNearestNeighbours(sampling_strategy=samp_strategy )

    
    # fit and apply the transform
    X_DF_enn_res, y_ser_enn_res = enn.fit_resample(X_DF, y_ser)
    
    #print(under.sampling_strategy)
    return (X_DF_enn_res, y_ser_enn_res)

In [14]:
X_train_res, y_train_res = ENN(X_DF = X_train ,  y_ser = y_train , samp_strategy = "majority")

In [15]:
X_test_res , y_test_res = ENN(X_DF = X_test ,  y_ser = y_test , samp_strategy = "majority")

In [16]:
X_train_res.shape , y_train_res.shape

((3983, 51), (3983,))

In [17]:
X_test_res.shape , y_test_res.shape

((957, 51), (957,))

In [18]:
y_train_res.value_counts()

0    2498
1    1485
Name: Churn, dtype: int64

In [19]:
y_test_res.value_counts()

0    573
1    384
Name: Churn, dtype: int64

## <span style = 'background :lightblue'>STEP 4 : Training models</span>

In [20]:
from sklearn.metrics import classification_report , accuracy_score

In [21]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [22]:
models = [LogisticRegression(max_iter=1000), SVC(), GaussianNB(),DecisionTreeClassifier(), RandomForestClassifier()]

In [23]:
def ModelTraining(ML_models , X_train ,y_train ,X_test,y_test):
    
    for model in ML_models :

        model.fit(X_train_res.values, y_train_res)
        y_hat_train = model.predict(X_train_res.values)
        y_hat_test = model.predict(X_test_res.values)

        print(f"******************* \33[1m{str(model)}\33[0m *******************\n")

        print("\33[1mTraining classification Report :\33[0m \n")
        print(classification_report(y_train_res,y_hat_train ,zero_division = 1))

        print() 

        print("\33[1mTesting classification Report :\33[0m \n")
        print(classification_report(y_test_res,y_hat_test , zero_division = 1))

        print()
        

In [24]:
ModelTraining(ML_models = models ,  X_train = X_train_res ,y_train = y_train_res ,
              X_test = X_test_res, y_test = y_test_res)

******************* [1mLogisticRegression(max_iter=1000)[0m *******************

[1mTraining classification Report :[0m 

              precision    recall  f1-score   support

           0       0.89      0.91      0.90      2498
           1       0.85      0.82      0.83      1485

    accuracy                           0.88      3983
   macro avg       0.87      0.87      0.87      3983
weighted avg       0.88      0.88      0.88      3983


[1mTesting classification Report :[0m 

              precision    recall  f1-score   support

           0       0.87      0.90      0.89       573
           1       0.84      0.80      0.82       384

    accuracy                           0.86       957
   macro avg       0.86      0.85      0.85       957
weighted avg       0.86      0.86      0.86       957


******************* [1mSVC()[0m *******************

[1mTraining classification Report :[0m 

              precision    recall  f1-score   support

           0       0.90