In [1]:
import pandas as pd
from sklearn.metrics import roc_auc_score
from sklearn.metrics import RocCurveDisplay
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import MinMaxScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

In [2]:
dataset = pd.read_csv("TelcoCustomerChurnClean.csv")

In [3]:
scaler = MinMaxScaler()
dataset['tenure'] = scaler.fit_transform(dataset[['tenure']])
dataset['MonthlyCharges'] = scaler.fit_transform(dataset[['MonthlyCharges']])
dataset['TotalCharges'] = scaler.fit_transform(dataset[['TotalCharges']])


In [4]:
dataset.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,subscription,Multiple subscription,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,0,0,1,0,0.0,0,1,0,0,2,0,0,0,0,0,1,2,0.115423,0.001275,0
1,1,0,0,0,0.464789,1,0,0,2,0,2,0,0,0,1,0,3,0.385075,0.215867,0
2,1,0,0,0,0.014085,1,0,0,2,2,0,0,0,0,0,1,3,0.354229,0.01031,1
3,1,0,0,0,0.619718,0,1,0,2,0,2,2,0,0,1,0,0,0.239303,0.210241,0
4,0,0,0,0,0.014085,1,0,1,0,0,0,0,0,0,0,1,2,0.521891,0.01533,1


In [5]:
X = dataset.drop("Churn", axis = 1)
y = dataset["Churn"]

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 42)

### XG Boost Classifier

In [7]:
from xgboost import XGBClassifier
xgb = XGBClassifier(learning_rate= 0.01,max_depth = 3,n_estimators = 1000)

In [8]:
xgb.fit(X_train,y_train)
y_pred = xgb.predict(X_test)

In [9]:
print(classification_report(y_test,xgb.predict(X_test)))

              precision    recall  f1-score   support

           0       0.83      0.91      0.87      1300
           1       0.65      0.48      0.55       458

    accuracy                           0.80      1758
   macro avg       0.74      0.70      0.71      1758
weighted avg       0.79      0.80      0.79      1758



### RandomForest Classifier

In [10]:
rf = RandomForestClassifier(max_depth = 4,random_state = 0)

In [11]:
rf.fit(X_train,y_train)
y_pred = rf.predict(X_test)

In [12]:
print(classification_report(y_test,rf.predict(X_test)))

              precision    recall  f1-score   support

           0       0.81      0.93      0.87      1300
           1       0.67      0.39      0.50       458

    accuracy                           0.79      1758
   macro avg       0.74      0.66      0.68      1758
weighted avg       0.78      0.79      0.77      1758



### Decision Tree Classifier

In [13]:
dt = DecisionTreeClassifier(random_state = 1000,max_depth = 4,min_samples_leaf = 1)
dt.fit(X_train,y_train)
y_pred = dt.predict(X_test)

In [14]:
print(classification_report(y_test,dt.predict(X_test)))

              precision    recall  f1-score   support

           0       0.82      0.89      0.86      1300
           1       0.60      0.46      0.52       458

    accuracy                           0.78      1758
   macro avg       0.71      0.68      0.69      1758
weighted avg       0.77      0.78      0.77      1758



### KNN 

In [15]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
y_pred = dt.predict(X_test)

In [16]:
print(classification_report(y_test,dt.predict(X_test)))

              precision    recall  f1-score   support

           0       0.82      0.89      0.86      1300
           1       0.60      0.46      0.52       458

    accuracy                           0.78      1758
   macro avg       0.71      0.68      0.69      1758
weighted avg       0.77      0.78      0.77      1758

