In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## <span style = 'background :lightblue'>STEP 1 : Import Transforrmed Dataset</span>

In [2]:
filepath_train = r"Part2_test_imbalance_Train_DS.csv"
filepath_test = r"Part2_test_imbalance_Test_DS.csv"

In [3]:
d_train = pd.read_csv(filepath_train  ,index_col= 0 , header= 0)
d_test = pd.read_csv(filepath_test  ,index_col= 0 , header= 0)

In [4]:
d_train.head(3)

Unnamed: 0,MonthlyCharges,TotalCharges,gender_Female,gender_Male,SeniorCitizen_NO,SeniorCitizen_Yes,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,...,Contract_Month-to-month,Contract_One year,Contract_Two year,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,Churn
0,-0.338701,0.049061,0,1,1,0,0,1,1,0,...,0,0,1,1,0,1,0,0,0,0
1,1.030607,0.139714,1,0,1,0,1,0,1,0,...,1,0,0,0,1,1,0,0,0,0
2,0.147022,-0.956056,1,0,1,0,1,0,1,0,...,1,0,0,1,0,0,0,1,0,1


In [5]:
d_test.head(3)

Unnamed: 0,MonthlyCharges,TotalCharges,gender_Female,gender_Male,SeniorCitizen_NO,SeniorCitizen_Yes,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,...,Contract_Month-to-month,Contract_One year,Contract_Two year,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,Churn
0,-1.457688,-0.536472,0,1,1,0,0,1,1,0,...,0,0,1,1,0,0,0,0,1,0
1,-1.480896,-0.374264,0,1,1,0,1,0,1,0,...,0,0,1,1,0,1,0,0,0,0
2,0.142049,0.81081,0,1,1,0,1,0,1,0,...,0,1,0,1,0,1,0,0,0,0


## <span style = 'background :lightblue'>STEP 2 : Train - Test Split</span>

In [6]:
X_train = d_train.drop(labels= ['Churn'] , axis = 1)
y_train = d_train['Churn']

X_test = d_test.drop(labels= ['Churn'] , axis = 1)
y_test = d_test['Churn']

In [7]:
X_train.shape , y_train.shape

((5634, 51), (5634,))

In [8]:
X_test.shape , y_test.shape

((1409, 51), (1409,))

In [9]:
y_test.value_counts()

0    1025
1     384
Name: Churn, dtype: int64

## <span style = 'background :lightblue'>STEP 3 : Training models</span>

In [10]:
import warnings
warnings.filterwarnings('ignore')

In [11]:
from sklearn.metrics import classification_report

In [12]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [13]:
models = [LogisticRegression(max_iter=1000), SVC(), GaussianNB(),DecisionTreeClassifier(), RandomForestClassifier()]

In [14]:
def ModelTraining(ML_models , X_train ,y_train ,X_test,y_test):
    
    for model in ML_models :

        model.fit(X_train.values, y_train)
        y_hat_train = model.predict(X_train.values)
        y_hat_test = model.predict(X_test.values)

        print(f"******************* \33[1m{str(model)}\33[0m *******************\n")

        print("\33[1mTraining classification Report :\33[0m \n")
        print(classification_report(y_train,y_hat_train ,zero_division = 1))

        print() 

        print("\33[1mTesting classification Report :\33[0m \n")
        print(classification_report(y_test,y_hat_test , zero_division = 1))

        print()
        

In [15]:
ModelTraining(ML_models = models ,  X_train = X_train ,y_train = y_train ,
              X_test = X_test, y_test = y_test)

******************* [1mLogisticRegression(max_iter=1000)[0m *******************

[1mTraining classification Report :[0m 

              precision    recall  f1-score   support

           0       0.84      0.91      0.88      4149
           1       0.68      0.52      0.59      1485

    accuracy                           0.81      5634
   macro avg       0.76      0.72      0.73      5634
weighted avg       0.80      0.81      0.80      5634


[1mTesting classification Report :[0m 

              precision    recall  f1-score   support

           0       0.83      0.90      0.86      1025
           1       0.64      0.50      0.56       384

    accuracy                           0.79      1409
   macro avg       0.74      0.70      0.71      1409
weighted avg       0.78      0.79      0.78      1409


******************* [1mSVC()[0m *******************

[1mTraining classification Report :[0m 

              precision    recall  f1-score   support

           0       0.85