In [23]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.preprocessing import LabelEncoder, StandardScaler
from mlxtend.plotting import plot_decision_regions


In [6]:
df = pd.read_csv("telecom.csv")
df

Unnamed: 0,ID,Subscription length,Charge amount,Seconds of use,Frequency of use,Frequency of SMS,Distinct called numbers,Call failures,Tariff plan,Status,Age group,Complaints,Churn
0,1,35,0,1420,41,36,20,1,A,Active,30-40,N,0
1,2,28,0,920,32,20,12,7,A,Active,Under 30,N,0
2,3,40,0,88,6,8,6,0,A,Inactive,30-40,N,1
3,4,38,0,13963,170,9,47,9,A,Active,30-40,N,0
4,5,38,0,13773,169,0,44,7,A,Active,30-40,N,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3145,3146,33,0,0,0,0,0,0,A,Inactive,Under 30,N,0
3146,3147,37,0,1370,40,24,7,5,A,Inactive,30-40,N,0
3147,3148,34,7,10320,88,3,43,18,A,Active,Over 40,N,0
3148,3149,23,0,1110,27,21,14,7,A,Active,Under 30,N,0


In [7]:
df = df.drop(columns=['ID', 'Tariff plan', 'Status', 'Age group'])
df

Unnamed: 0,Subscription length,Charge amount,Seconds of use,Frequency of use,Frequency of SMS,Distinct called numbers,Call failures,Complaints,Churn
0,35,0,1420,41,36,20,1,N,0
1,28,0,920,32,20,12,7,N,0
2,40,0,88,6,8,6,0,N,1
3,38,0,13963,170,9,47,9,N,0
4,38,0,13773,169,0,44,7,N,0
...,...,...,...,...,...,...,...,...,...
3145,33,0,0,0,0,0,0,N,0
3146,37,0,1370,40,24,7,5,N,0
3147,34,7,10320,88,3,43,18,N,0
3148,23,0,1110,27,21,14,7,N,0


In [8]:
df['Churn'] = 1 - df['Churn']
df

Unnamed: 0,Subscription length,Charge amount,Seconds of use,Frequency of use,Frequency of SMS,Distinct called numbers,Call failures,Complaints,Churn
0,35,0,1420,41,36,20,1,N,1
1,28,0,920,32,20,12,7,N,1
2,40,0,88,6,8,6,0,N,0
3,38,0,13963,170,9,47,9,N,1
4,38,0,13773,169,0,44,7,N,1
...,...,...,...,...,...,...,...,...,...
3145,33,0,0,0,0,0,0,N,1
3146,37,0,1370,40,24,7,5,N,1
3147,34,7,10320,88,3,43,18,N,1
3148,23,0,1110,27,21,14,7,N,1


In [9]:
df['Complaints'] = df['Complaints'].map({'N': 0, 'Y': 1})
df

Unnamed: 0,Subscription length,Charge amount,Seconds of use,Frequency of use,Frequency of SMS,Distinct called numbers,Call failures,Complaints,Churn
0,35,0,1420,41,36,20,1,0,1
1,28,0,920,32,20,12,7,0,1
2,40,0,88,6,8,6,0,0,0
3,38,0,13963,170,9,47,9,0,1
4,38,0,13773,169,0,44,7,0,1
...,...,...,...,...,...,...,...,...,...
3145,33,0,0,0,0,0,0,0,1
3146,37,0,1370,40,24,7,5,0,1
3147,34,7,10320,88,3,43,18,0,1
3148,23,0,1110,27,21,14,7,0,1


In [15]:
X = df[["Seconds of use", "Complaints"]]
y = df["Churn"]
X

Unnamed: 0,Seconds of use,Complaints
0,1420,0
1,920,0
2,88,0
3,13963,0
4,13773,0
...,...,...
3145,0,0
3146,1370,0
3147,10320,0
3148,1110,0


In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# AdaBoost Classifier
ada_clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1, random_state=42),
                             n_estimators=50,
                             algorithm="SAMME",
                             learning_rate=0.5)
ada_clf.fit(X_train, y_train)
ada_pred = ada_clf.predict(X_test)

print(f"AdaBoost Classification Report:")
print(classification_report(y_test, ada_pred), "\n")

AdaBoost Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.41      0.57       103
           1       0.90      1.00      0.94       527

    accuracy                           0.90       630
   macro avg       0.93      0.70      0.76       630
weighted avg       0.91      0.90      0.88       630
 





In [21]:
# full feature AdaBoost
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

ada_clf_full = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1, random_state=42),
                                  n_estimators=50,
                                  learning_rate=0.5)
ada_clf_full.fit(X_train, y_train)
ada_pred_full = ada_clf_full.predict(X_test)

print(f"AdaBoost (Full Features) Classification Report:")
print(classification_report(y_test, ada_pred_full), "\n")

AdaBoost (Full Features) Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.41      0.57       103
           1       0.90      1.00      0.94       527

    accuracy                           0.90       630
   macro avg       0.93      0.70      0.76       630
weighted avg       0.91      0.90      0.88       630
 

