In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## <span style = 'background :lightblue'>STEP 1 : Import Transforrmed Dataset</span>

In [2]:
filepath_train = r"S2a_Part2_FE_Pipeline_Train_DS.csv"
filepath_test = r"S2a_Part2_FE_Pipeline_Test_DS.csv"

In [3]:
d_train = pd.read_csv(filepath_train  ,index_col= 0 , header= 0)
d_test = pd.read_csv(filepath_test  ,index_col= 0 , header= 0)

In [4]:
d_train.head(3)

Unnamed: 0,MonthlyCharges,TotalCharges,gender_Female,gender_Male,SeniorCitizen_NO,SeniorCitizen_Yes,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,...,Contract_Month-to-month,Contract_One year,Contract_Two year,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,Churn
0,-0.338701,0.049061,0,1,1,0,0,1,1,0,...,0,0,1,1,0,1,0,0,0,0
1,1.030607,0.139714,1,0,1,0,1,0,1,0,...,1,0,0,0,1,1,0,0,0,0
2,0.147022,-0.956056,1,0,1,0,1,0,1,0,...,1,0,0,1,0,0,0,1,0,1


In [5]:
d_test.head(3)

Unnamed: 0,MonthlyCharges,TotalCharges,gender_Female,gender_Male,SeniorCitizen_NO,SeniorCitizen_Yes,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,...,Contract_Month-to-month,Contract_One year,Contract_Two year,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,Churn
0,-1.457688,-0.536472,0,1,1,0,0,1,1,0,...,0,0,1,1,0,0,0,0,1,0
1,-1.480896,-0.374264,0,1,1,0,1,0,1,0,...,0,0,1,1,0,1,0,0,0,0
2,0.142049,0.81081,0,1,1,0,1,0,1,0,...,0,1,0,1,0,1,0,0,0,0


## <span style = 'background :lightblue'>STEP 2 : Train - Test Split</span>

In [6]:
X_train = d_train.drop(labels= ['Churn'] , axis = 1)
y_train = d_train['Churn']

X_test = d_test.drop(labels= ['Churn'] , axis = 1)
y_test = d_test['Churn']

In [7]:
X_train.shape , y_train.shape

((5634, 51), (5634,))

In [8]:
X_test.shape , y_test.shape

((1409, 51), (1409,))

In [9]:
y_train.value_counts()

0    4149
1    1485
Name: Churn, dtype: int64

In [10]:
y_test.value_counts()

0    1025
1     384
Name: Churn, dtype: int64

## <span style = 'background :lightblue'>STEP 3 : Upsampling of Train and Test DataSet</span>

In [11]:
import warnings
warnings.filterwarnings('ignore')

In [12]:
from imblearn.over_sampling import KMeansSMOTE 

In [13]:
def KMeans_SMOTE(X_DF , y_ser ,samp_strategy):
    
    # Define KMeansSMOTE object with sampling_strategy
    kms = KMeansSMOTE(random_state = 100 ,sampling_strategy=samp_strategy  )
    
    # fit and apply the transform
    X_DF_knn_res, y_ser_knn_res = kms.fit_resample(X_DF, y_ser)
    
    return (X_DF_knn_res, y_ser_knn_res)

In [14]:
X_train_res, y_train_res = KMeans_SMOTE(X_DF = X_train , y_ser = y_train , samp_strategy = 0.928 )

X_test_res , y_test_res = KMeans_SMOTE(X_DF = X_test ,  y_ser = y_test , samp_strategy = 0.854)

In [15]:
X_train_res.shape

(7999, 51)

In [16]:
y_train_res.shape

(7999,)

In [17]:
y_train_res.value_counts()

0    4149
1    3850
Name: Churn, dtype: int64

In [18]:
X_test_res.shape

(1900, 51)

In [19]:
y_test_res.shape

(1900,)

In [20]:
y_test_res.value_counts()

0    1025
1     875
Name: Churn, dtype: int64

## <span style = 'background :lightblue'>STEP 4 : Training models</span>

In [21]:
from sklearn.metrics import classification_report

In [22]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [23]:
models = [LogisticRegression(max_iter=1000), SVC(), GaussianNB(),DecisionTreeClassifier(), RandomForestClassifier()]

In [24]:
def ModelTraining(ML_models , X_train ,y_train ,X_test,y_test):
    
    for model in ML_models :

        model.fit(X_train_res.values, y_train_res)
        y_hat_train = model.predict(X_train_res.values)
        y_hat_test = model.predict(X_test_res.values)

        print(f"******************* \33[1m{str(model)}\33[0m *******************\n")

        print("\33[1mTraining classification Report :\33[0m \n")
        print(classification_report(y_train_res,y_hat_train ,zero_division = 1))

        print() 

        print("\33[1mTesting classification Report :\33[0m \n")
        print(classification_report(y_test_res,y_hat_test , zero_division = 1))

        print()
        

In [25]:
ModelTraining(ML_models = models ,  X_train = X_train ,y_train = y_train ,
              X_test = X_test, y_test = y_test)

******************* [1mLogisticRegression(max_iter=1000)[0m *******************

[1mTraining classification Report :[0m 

              precision    recall  f1-score   support

           0       0.86      0.88      0.87      4149
           1       0.87      0.84      0.86      3850

    accuracy                           0.86      7999
   macro avg       0.86      0.86      0.86      7999
weighted avg       0.86      0.86      0.86      7999


[1mTesting classification Report :[0m 

              precision    recall  f1-score   support

           0       0.84      0.86      0.85      1025
           1       0.83      0.81      0.82       875

    accuracy                           0.84      1900
   macro avg       0.83      0.83      0.83      1900
weighted avg       0.84      0.84      0.84      1900


******************* [1mSVC()[0m *******************

[1mTraining classification Report :[0m 

              precision    recall  f1-score   support

           0       0.85