In [5]:
#Loading and exploring the data

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

df = pd.read_csv("telco.csv")
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [6]:
df.isnull().sum()

customerID          0
gender              0
SeniorCitizen       0
Partner             0
Dependents          0
tenure              0
PhoneService        0
MultipleLines       0
InternetService     0
OnlineSecurity      0
OnlineBackup        0
DeviceProtection    0
TechSupport         0
StreamingTV         0
StreamingMovies     0
Contract            0
PaperlessBilling    0
PaymentMethod       0
MonthlyCharges      0
TotalCharges        0
Churn               0
dtype: int64

In [7]:
df.dtypes

customerID           object
gender               object
SeniorCitizen         int64
Partner              object
Dependents           object
tenure                int64
PhoneService         object
MultipleLines        object
InternetService      object
OnlineSecurity       object
OnlineBackup         object
DeviceProtection     object
TechSupport          object
StreamingTV          object
StreamingMovies      object
Contract             object
PaperlessBilling     object
PaymentMethod        object
MonthlyCharges      float64
TotalCharges         object
Churn                object
dtype: object

In [8]:
# Encode categorical variables
label_encoders = {}
for col in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

In [9]:
df.dtypes

customerID            int32
gender                int32
SeniorCitizen         int64
Partner               int32
Dependents            int32
tenure                int64
PhoneService          int32
MultipleLines         int32
InternetService       int32
OnlineSecurity        int32
OnlineBackup          int32
DeviceProtection      int32
TechSupport           int32
StreamingTV           int32
StreamingMovies       int32
Contract              int32
PaperlessBilling      int32
PaymentMethod         int32
MonthlyCharges      float64
TotalCharges          int32
Churn                 int32
dtype: object

In [10]:
scaler = StandardScaler()
standardized_data = scaler.fit_transform(df)
standardized_df = pd.DataFrame(standardized_data, columns=df.columns)

In [13]:
from sklearn.model_selection import train_test_split
X = df.drop(columns=['Churn'])
y = df['Churn']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [21]:
from sklearn.ensemble import BaggingClassifier,AdaBoostClassifier,GradientBoostingClassifier,RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report

#Bagging
bagging = BaggingClassifier(RandomForestClassifier(),n_estimators=50,random_state=42)
bagging.fit(X_train_scaled,y_train)
bagging_acc = accuracy_score(y_test,bagging.predict(X_test_scaled))

#AdaBoost
ada = AdaBoostClassifier(n_estimators=50,random_state=42)
ada.fit(X_train_scaled,y_train)
ada_acc = accuracy_score(y_test,ada.predict(X_test_scaled))

#Gradient Boosting
gb = GradientBoostingClassifier(n_estimators=100,learning_rate=0.1,random_state=42)
gb.fit(X_train_scaled,y_train)
gb_acc = accuracy_score(y_test,gb.predict(X_test_scaled))

#XGBoost
xgb = XGBClassifier(use_label_encoder=True,eval_metric='logloss',random_state=42)
xgb.fit(X_train_scaled,y_train)
xgb_acc = accuracy_score(y_test,xgb.predict(X_test_scaled))

#Show accuracy results
import pandas as pd
results_df = pd.DataFrame({
    "Model": ["Bagging", "AdaBoost", "Gradient Boosting", "XGBoosting"],
    "Accuracy": [bagging_acc, ada_acc, gb_acc, xgb_acc]
})
results_df


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Unnamed: 0,Model,Accuracy
0,Bagging,0.804826
1,AdaBoost,0.807665
2,Gradient Boosting,0.809084
3,XGBoosting,0.786373


In [22]:
# Model Evaluation
models = {'Bagging': bagging, 'AdaBoost': ada, 'Gradient Boosting': gb, 'XGBoost': xgb}

for name, model in models.items():
    y_pred = model.predict(X_test_scaled)
    print(f"\n{name} Model Evaluation:")
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))


Bagging Model Evaluation:
Accuracy: 0.8048
Confusion Matrix:
 [[945  91]
 [184 189]]
Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.91      0.87      1036
           1       0.68      0.51      0.58       373

    accuracy                           0.80      1409
   macro avg       0.76      0.71      0.73      1409
weighted avg       0.79      0.80      0.80      1409


AdaBoost Model Evaluation:
Accuracy: 0.8077
Confusion Matrix:
 [[930 106]
 [165 208]]
Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.90      0.87      1036
           1       0.66      0.56      0.61       373

    accuracy                           0.81      1409
   macro avg       0.76      0.73      0.74      1409
weighted avg       0.80      0.81      0.80      1409


Gradient Boosting Model Evaluation:
Accuracy: 0.8091
Confusion Matrix:
 [[944  92]
 [177 196]]
Classification Report:
      