Importing necessary libraries

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

Importing the data into a dataframe

In [None]:
df = pd.read_csv('Telco-Customer-Churn.csv')
df = df.drop('customerID', axis = 1)
df.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,Male,0,No,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,Male,0,No,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,Female,0,No,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


Understanding the data in the dataframe

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 20 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   gender            7043 non-null   object 
 1   SeniorCitizen     7043 non-null   int64  
 2   Partner           7043 non-null   object 
 3   Dependents        7043 non-null   object 
 4   tenure            7043 non-null   int64  
 5   PhoneService      7043 non-null   object 
 6   MultipleLines     7043 non-null   object 
 7   InternetService   7043 non-null   object 
 8   OnlineSecurity    7043 non-null   object 
 9   OnlineBackup      7043 non-null   object 
 10  DeviceProtection  7043 non-null   object 
 11  TechSupport       7043 non-null   object 
 12  StreamingTV       7043 non-null   object 
 13  StreamingMovies   7043 non-null   object 
 14  Contract          7043 non-null   object 
 15  PaperlessBilling  7043 non-null   object 
 16  PaymentMethod     7043 non-null   object 


Necessary Edge-Case Conversion

In [None]:
df = df[df["TotalCharges"] != " "]
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'])

Pre-Processing Functions

In [None]:
def target_transformation(df,target,targetidx):
    for i in range(0,len(df)):
        if df.iloc[i,targetidx] == "No":
            df.iloc[i,targetidx] = "0"
        else:
            df.iloc[i,targetidx] = "1"
    df[target] = pd.to_numeric(df[target])
    return df

def MinMaxScaling(df,target):
    columns = df.columns
    idx = -1
    for col in columns:
        idx += 1
        if col == target:
            continue
        typeCol = str(df[col].dtype)
        if(typeCol != 'object'):
            df[col] = pd.to_numeric(df[col])
            MIN = df[col].min()
            MAX = df[col].max()
            for i in range(0,len(df)):
                df.iloc[i,idx] = (df.iloc[i,idx] - MIN) / (MAX - MIN)
    return df

def OneHotEncoding_objects(df):
    columns = df.columns
    for col in columns:
        typeCol = str(df[col].dtype)
        if typeCol == 'object':
            enc = pd.get_dummies(df[col])
            encCol = enc.columns
            newColumns = {}
            for i in range(0,len(encCol)):
                newColumns[encCol[i]] = col + encCol[i]
            enc.rename(columns = newColumns, inplace = True)
            df = df.join(enc)
            df = df.drop([col],axis=1)
    return df

Processing the Data

In [None]:
data_processed = OneHotEncoding_objects(MinMaxScaling(target_transformation(df,'Churn',len(df.columns.to_list())-1),'Churn'))
data_processed.head()

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges,TotalCharges,Churn,genderFemale,genderMale,PartnerNo,PartnerYes,DependentsNo,...,StreamingMoviesYes,ContractMonth-to-month,ContractOne year,ContractTwo year,PaperlessBillingNo,PaperlessBillingYes,PaymentMethodBank transfer (automatic),PaymentMethodCredit card (automatic),PaymentMethodElectronic check,PaymentMethodMailed check
0,0,0.0,0.115423,0.001275,0,1,0,0,1,1,...,0,1,0,0,0,1,0,0,1,0
1,0,0.464789,0.385075,0.215867,0,0,1,1,0,1,...,0,0,1,0,1,0,0,0,0,1
2,0,0.014085,0.354229,0.01031,1,0,1,1,0,1,...,0,1,0,0,0,1,0,0,0,1
3,0,0.619718,0.239303,0.210241,0,0,1,1,0,1,...,0,0,1,0,1,0,1,0,0,0
4,0,0.014085,0.521891,0.01533,1,1,0,1,0,1,...,0,1,0,0,0,1,0,0,1,0


Report Generation for a Model

In [None]:
def Generate_Report(y_pred,y_test):
    print("Accuracy Score is: " + str(accuracy_score(y_test,y_pred)))
    cmat = confusion_matrix(y_test,y_pred)
    print("Confusion Matrix: ")
    print(cmat)
    crep = classification_report(y_test,y_pred)
    print("Classification Report: ")
    print(crep)

Test-Train Splitting the data for training the Models

In [None]:
y = data_processed['Churn']
x_columns = data_processed.columns.to_list()
x_columns.remove('Churn')
x = data_processed[x_columns]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 1324, shuffle = True)

Training with KNN Model

In [None]:
knnModel = KNeighborsClassifier()
knnModel.fit(x_train,y_train)

Checking metrics of KNN Model Trained

In [None]:
Generate_Report(knnModel.predict(x_test),y_test)

Accuracy Score is: 0.7516587677725118
Confusion Matrix: 
[[1293  256]
 [ 268  293]]
Classification Report: 
              precision    recall  f1-score   support

           0       0.83      0.83      0.83      1549
           1       0.53      0.52      0.53       561

    accuracy                           0.75      2110
   macro avg       0.68      0.68      0.68      2110
weighted avg       0.75      0.75      0.75      2110



Training with Logistic Regession Model

In [None]:
logregModel = LogisticRegression()
logregModel.fit(x_train,y_train)

Checking metrics of Logistic Regression Model Trained

In [None]:
Generate_Report(logregModel.predict(x_test),y_test)

Accuracy Score is: 0.8014218009478673
Confusion Matrix: 
[[1391  158]
 [ 261  300]]
Classification Report: 
              precision    recall  f1-score   support

           0       0.84      0.90      0.87      1549
           1       0.66      0.53      0.59       561

    accuracy                           0.80      2110
   macro avg       0.75      0.72      0.73      2110
weighted avg       0.79      0.80      0.79      2110



Training with Naive Bayes Model

In [None]:
gnbModel = GaussianNB()
gnbModel.fit(x_train,y_train)

Checking metrics of Naive Bayes Model Trained

In [None]:
Generate_Report(gnbModel.predict(x_test),y_test)

Accuracy Score is: 0.6971563981042654
Confusion Matrix: 
[[1007  542]
 [  97  464]]
Classification Report: 
              precision    recall  f1-score   support

           0       0.91      0.65      0.76      1549
           1       0.46      0.83      0.59       561

    accuracy                           0.70      2110
   macro avg       0.69      0.74      0.68      2110
weighted avg       0.79      0.70      0.71      2110



Training with Decision Tree Model

In [None]:
dtModel = DecisionTreeClassifier()
dtModel.fit(x_train,y_train)

Checking metrics of Decision Tree Model Trained

In [None]:
Generate_Report(dtModel.predict(x_test),y_test)

Accuracy Score is: 0.7355450236966825
Confusion Matrix: 
[[1272  277]
 [ 281  280]]
Classification Report: 
              precision    recall  f1-score   support

           0       0.82      0.82      0.82      1549
           1       0.50      0.50      0.50       561

    accuracy                           0.74      2110
   macro avg       0.66      0.66      0.66      2110
weighted avg       0.73      0.74      0.74      2110



Training with SVM Model

In [None]:
svcModel = SVC()
svcModel.fit(x_train,y_train)

Checking metrics of SVM Model Trained

In [None]:
Generate_Report(svcModel.predict(x_test),y_test)

Accuracy Score is: 0.7919431279620853
Confusion Matrix: 
[[1399  150]
 [ 289  272]]
Classification Report: 
              precision    recall  f1-score   support

           0       0.83      0.90      0.86      1549
           1       0.64      0.48      0.55       561

    accuracy                           0.79      2110
   macro avg       0.74      0.69      0.71      2110
weighted avg       0.78      0.79      0.78      2110



Conclusion:
Logistic Regression Model gave the highest accuracy with a score of 0.8014218009478673