dataset link: https://www.kaggle.com/datasets/blastchar/telco-customer-churn?resource=download

# Imports

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import SGDClassifier
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

# Reading the dataset

In [None]:
telcoDataset = pd.read_csv('Telco-Customer-Churn.csv')

In [None]:
telcoDataset.head(5)

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,Male,0,No,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,Male,0,No,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,Female,0,No,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [None]:
telcoDataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 20 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   gender            7043 non-null   object 
 1   SeniorCitizen     7043 non-null   int64  
 2   Partner           7043 non-null   object 
 3   Dependents        7043 non-null   object 
 4   tenure            7043 non-null   int64  
 5   PhoneService      7043 non-null   object 
 6   MultipleLines     7043 non-null   object 
 7   InternetService   7043 non-null   object 
 8   OnlineSecurity    7043 non-null   object 
 9   OnlineBackup      7043 non-null   object 
 10  DeviceProtection  7043 non-null   object 
 11  TechSupport       7043 non-null   object 
 12  StreamingTV       7043 non-null   object 
 13  StreamingMovies   7043 non-null   object 
 14  Contract          7043 non-null   object 
 15  PaperlessBilling  7043 non-null   object 
 16  PaymentMethod     7043 non-null   object 


# Encoding String Values To Integers

In [None]:
encoder = LabelEncoder()
telcoDataset['gender'] = encoder.fit_transform(telcoDataset['gender'])
telcoDataset['Partner'] = encoder.fit_transform(telcoDataset['Partner'])
telcoDataset['Dependents'] = encoder.fit_transform(telcoDataset['Dependents'])
telcoDataset['PhoneService'] = encoder.fit_transform(telcoDataset['PhoneService'])
telcoDataset['MultipleLines'] = encoder.fit_transform(telcoDataset['MultipleLines'])
telcoDataset['InternetService'] = encoder.fit_transform(telcoDataset['InternetService'])
telcoDataset['OnlineSecurity'] = encoder.fit_transform(telcoDataset['OnlineSecurity'])
telcoDataset['OnlineBackup'] = encoder.fit_transform(telcoDataset['OnlineBackup'])
telcoDataset['DeviceProtection'] = encoder.fit_transform(telcoDataset['DeviceProtection'])
telcoDataset['TechSupport'] = encoder.fit_transform(telcoDataset['TechSupport'])
telcoDataset['StreamingTV'] = encoder.fit_transform(telcoDataset['StreamingTV'])
telcoDataset['StreamingMovies'] = encoder.fit_transform(telcoDataset['StreamingMovies'])
telcoDataset['Contract'] = encoder.fit_transform(telcoDataset['Contract'])
telcoDataset['PaperlessBilling'] = encoder.fit_transform(telcoDataset['PaperlessBilling'])
telcoDataset['PaymentMethod'] = encoder.fit_transform(telcoDataset['PaymentMethod'])
telcoDataset['TotalCharges'] = encoder.fit_transform(telcoDataset['TotalCharges'])
telcoDataset['Churn'] = encoder.fit_transform(telcoDataset['Churn'])

In [None]:
telcoDataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 20 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   gender            7043 non-null   int64  
 1   SeniorCitizen     7043 non-null   int64  
 2   Partner           7043 non-null   int64  
 3   Dependents        7043 non-null   int64  
 4   tenure            7043 non-null   int64  
 5   PhoneService      7043 non-null   int64  
 6   MultipleLines     7043 non-null   int64  
 7   InternetService   7043 non-null   int64  
 8   OnlineSecurity    7043 non-null   int64  
 9   OnlineBackup      7043 non-null   int64  
 10  DeviceProtection  7043 non-null   int64  
 11  TechSupport       7043 non-null   int64  
 12  StreamingTV       7043 non-null   int64  
 13  StreamingMovies   7043 non-null   int64  
 14  Contract          7043 non-null   int64  
 15  PaperlessBilling  7043 non-null   int64  
 16  PaymentMethod     7043 non-null   int64  


# Splitting the dataset

In [None]:
X=telcoDataset.iloc[:,0:19]
Y=telcoDataset.iloc[:,19]
X_train, X_test, y_train, y_test = train_test_split(X, Y , test_size=0.25)

# Standardization of data

In [None]:
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.fit_transform(X_test)

# K-Nearest-Neighbors

Experiment 1 (n_neighbors=3)

In [None]:
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train,y_train)
y_pred = knn.predict(X_test)
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test, y_pred))

0.7512776831345827
[[1098  202]
 [ 236  225]]
              precision    recall  f1-score   support

           0       0.82      0.84      0.83      1300
           1       0.53      0.49      0.51       461

    accuracy                           0.75      1761
   macro avg       0.68      0.67      0.67      1761
weighted avg       0.75      0.75      0.75      1761



Experiment 2 (n_neighbors=7)

In [None]:
knn = KNeighborsClassifier(n_neighbors=7)
knn.fit(X_train,y_train)
y_pred = knn.predict(X_test)
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test, y_pred))

0.7671777399204998
[[1123  177]
 [ 233  228]]
              precision    recall  f1-score   support

           0       0.83      0.86      0.85      1300
           1       0.56      0.49      0.53       461

    accuracy                           0.77      1761
   macro avg       0.70      0.68      0.69      1761
weighted avg       0.76      0.77      0.76      1761



Experiment 3 (n_neighbors=11)

In [None]:
knn = KNeighborsClassifier(n_neighbors=11)
knn.fit(X_train,y_train)
y_pred = knn.predict(X_test)
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test, y_pred))

0.7671777399204998
[[1123  177]
 [ 233  228]]
              precision    recall  f1-score   support

           0       0.83      0.86      0.85      1300
           1       0.56      0.49      0.53       461

    accuracy                           0.77      1761
   macro avg       0.70      0.68      0.69      1761
weighted avg       0.76      0.77      0.76      1761



# Decision-Tree-Classifier

Experiment 1 (max_depth=3)

In [None]:
# Expirement 1
dtc = DecisionTreeClassifier(max_depth=3)
dtc.fit(X_train,y_train)
y_pred = dtc.predict(X_test)
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test, y_pred))

0.78137421919364
[[1140  160]
 [ 225  236]]
              precision    recall  f1-score   support

           0       0.84      0.88      0.86      1300
           1       0.60      0.51      0.55       461

    accuracy                           0.78      1761
   macro avg       0.72      0.69      0.70      1761
weighted avg       0.77      0.78      0.78      1761



Experiment 2 (max_depth=7)

In [None]:
# Expirement 2
dtc = DecisionTreeClassifier(max_depth=7)
dtc.fit(X_train,y_train)
y_pred = dtc.predict(X_test)
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test, y_pred))

0.7853492333901193
[[1155  145]
 [ 233  228]]
              precision    recall  f1-score   support

           0       0.83      0.89      0.86      1300
           1       0.61      0.49      0.55       461

    accuracy                           0.79      1761
   macro avg       0.72      0.69      0.70      1761
weighted avg       0.77      0.79      0.78      1761



Experiment 3 (max_depth=11)

In [None]:
# Expirement 3
dtc = DecisionTreeClassifier(max_depth=11)
dtc.fit(X_train,y_train)
y_pred = dtc.predict(X_test)
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test, y_pred))

0.7473026689381034
[[1064  236]
 [ 209  252]]
              precision    recall  f1-score   support

           0       0.84      0.82      0.83      1300
           1       0.52      0.55      0.53       461

    accuracy                           0.75      1761
   macro avg       0.68      0.68      0.68      1761
weighted avg       0.75      0.75      0.75      1761



# Random-Forest-Classifier

Experiment 1 (max_depth=3)

In [None]:
rfc = RandomForestClassifier(max_depth=3)
rfc.fit(X_train,y_train)
y_pred = rfc.predict(X_test)
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test, y_pred))

0.78137421919364
[[1233   67]
 [ 318  143]]
              precision    recall  f1-score   support

           0       0.79      0.95      0.86      1300
           1       0.68      0.31      0.43       461

    accuracy                           0.78      1761
   macro avg       0.74      0.63      0.65      1761
weighted avg       0.77      0.78      0.75      1761



Experiment 2 (max_depth=7)

In [None]:
rfc = RandomForestClassifier(max_depth=7)
rfc.fit(X_train,y_train)
y_pred = rfc.predict(X_test)
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test, y_pred))

0.8063600227143668
[[1199  101]
 [ 240  221]]
              precision    recall  f1-score   support

           0       0.83      0.92      0.88      1300
           1       0.69      0.48      0.56       461

    accuracy                           0.81      1761
   macro avg       0.76      0.70      0.72      1761
weighted avg       0.79      0.81      0.79      1761



Experiment 3 (max_depth=11)

In [None]:
# Expirement 3
rfc = RandomForestClassifier(max_depth=11)
rfc.fit(X_train,y_train)
y_pred = rfc.predict(X_test)
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test, y_pred))

0.8103350369108461
[[1188  112]
 [ 222  239]]
              precision    recall  f1-score   support

           0       0.84      0.91      0.88      1300
           1       0.68      0.52      0.59       461

    accuracy                           0.81      1761
   macro avg       0.76      0.72      0.73      1761
weighted avg       0.80      0.81      0.80      1761



# Support-Vector-Machines

Experiment 1 (kernel='linear')

In [None]:
svc = svm.SVC(kernel='linear')
svc.fit(X_train,y_train)
y_pred = svc.predict(X_test)
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test, y_pred))

0.794434980124929
[[1162  138]
 [ 224  237]]
              precision    recall  f1-score   support

           0       0.84      0.89      0.87      1300
           1       0.63      0.51      0.57       461

    accuracy                           0.79      1761
   macro avg       0.74      0.70      0.72      1761
weighted avg       0.78      0.79      0.79      1761



Experiment 2 (kernel='rbf')

In [None]:
svc = svm.SVC(kernel='rbf')
svc.fit(X_train,y_train)
y_pred = svc.predict(X_test)
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test, y_pred))

0.7995457126632595
[[1204   96]
 [ 257  204]]
              precision    recall  f1-score   support

           0       0.82      0.93      0.87      1300
           1       0.68      0.44      0.54       461

    accuracy                           0.80      1761
   macro avg       0.75      0.68      0.70      1761
weighted avg       0.79      0.80      0.78      1761



Experiment 3 (kernel='poly')

In [None]:
svc = svm.SVC(kernel='poly')
svc.fit(X_train,y_train)
y_pred = svc.predict(X_test)
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test, y_pred))

0.7921635434412265
[[1194  106]
 [ 260  201]]
              precision    recall  f1-score   support

           0       0.82      0.92      0.87      1300
           1       0.65      0.44      0.52       461

    accuracy                           0.79      1761
   macro avg       0.74      0.68      0.70      1761
weighted avg       0.78      0.79      0.78      1761



# Stochastic Gradient Descent

Experiment 1 (penalty="l2")

In [None]:
# Applying Stochastic-Gradient-Descent Classifier + Evaluation
sgd = SGDClassifier(penalty="l2")
sgd.fit(X_train,y_train)
y_pred = sgd.predict(X_test)
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test, y_pred))

0.7859170925610448
[[1174  126]
 [ 251  210]]
              precision    recall  f1-score   support

           0       0.82      0.90      0.86      1300
           1       0.62      0.46      0.53       461

    accuracy                           0.79      1761
   macro avg       0.72      0.68      0.69      1761
weighted avg       0.77      0.79      0.77      1761



Experiment 2 (penalty="l1")

In [None]:
# Applying Stochastic-Gradient-Descent Classifier + Evaluation
sgd = SGDClassifier(penalty="l1")
sgd.fit(X_train,y_train)
y_pred = sgd.predict(X_test)
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test, y_pred))

0.7796706416808632
[[1115  185]
 [ 203  258]]
              precision    recall  f1-score   support

           0       0.85      0.86      0.85      1300
           1       0.58      0.56      0.57       461

    accuracy                           0.78      1761
   macro avg       0.71      0.71      0.71      1761
weighted avg       0.78      0.78      0.78      1761



Experiment 3 (penalty="elasticnet")

In [None]:
# Applying Stochastic-Gradient-Descent Classifier + Evaluation
sgd = SGDClassifier(penalty="elasticnet")
sgd.fit(X_train,y_train)
y_pred = sgd.predict(X_test)
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test, y_pred))

0.7762634866553095
[[1060  240]
 [ 154  307]]
              precision    recall  f1-score   support

           0       0.87      0.82      0.84      1300
           1       0.56      0.67      0.61       461

    accuracy                           0.78      1761
   macro avg       0.72      0.74      0.73      1761
weighted avg       0.79      0.78      0.78      1761

