In [1]:
import pandas as pd
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from imblearn.combine import SMOTEENN
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

In [2]:
df_ = pd.read_csv("tel_churn.csv")

In [3]:
df_.drop(columns="Unnamed: 0", inplace=True)

In [4]:
df_.head(3)

Unnamed: 0,SeniorCitizen,MonthlyCharges,TotalCharges,Churn,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,...,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_group_1 - 12,tenure_group_13 - 24,tenure_group_25 - 36,tenure_group_37 - 48,tenure_group_49 - 60,tenure_group_61 - 72
0,0,29.85,29.85,0,1,0,0,1,1,0,...,0,0,1,0,1,0,0,0,0,0
1,0,56.95,1889.5,0,0,1,1,0,1,0,...,0,0,0,1,0,0,1,0,0,0
2,0,53.85,108.15,1,0,1,1,0,1,0,...,0,0,0,1,1,0,0,0,0,0


In [5]:
pip show scikit-learn 

Name: scikit-learn
Version: 1.3.2
Summary: A set of python modules for machine learning and data mining
Home-page: http://scikit-learn.org
Author: 
Author-email: 
License: new BSD
Location: c:\anacondadown\lib\site-packages
Requires: joblib, numpy, scipy, threadpoolctl
Required-by: imbalanced-learn, scikit-learn-intelex
Note: you may need to restart the kernel to use updated packages.




In [6]:
y = df_["Churn"]
X = df_.drop(columns="Churn")

In [7]:
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.2)

In [8]:
sm = SMOTEENN()
X_resampled, y_resampled = sm.fit_resample(X,y)
xr_train,xr_test,yr_train,yr_test=train_test_split(X_resampled, y_resampled,test_size=0.2)

## Modelling

# 1. Decesion Tree

In [9]:
model_dt=DecisionTreeClassifier(criterion = "gini",random_state = 100,max_depth=6, min_samples_leaf=8)
model_dt.fit(x_train,y_train)
y_pred=model_dt.predict(x_test)

In [10]:
print(classification_report(y_test, y_pred, labels=[0,1]))

              precision    recall  f1-score   support

           0       0.83      0.90      0.86      1047
           1       0.62      0.46      0.53       360

    accuracy                           0.79      1407
   macro avg       0.72      0.68      0.70      1407
weighted avg       0.78      0.79      0.78      1407



In [11]:
model_dt_smote=DecisionTreeClassifier(criterion = "gini",random_state = 100,max_depth=6, min_samples_leaf=8)
model_dt_smote.fit(xr_train,yr_train)
yr_predict = model_dt_smote.predict(xr_test)

In [12]:
print(metrics.classification_report(yr_test, yr_predict))

              precision    recall  f1-score   support

           0       0.93      0.94      0.94       516
           1       0.95      0.95      0.95       648

    accuracy                           0.95      1164
   macro avg       0.94      0.95      0.95      1164
weighted avg       0.95      0.95      0.95      1164



## 2. Random Forest Classifier

In [13]:
model_rf=RandomForestClassifier(n_estimators=100, criterion='gini', random_state = 100,max_depth=6, min_samples_leaf=8)
model_rf.fit(x_train,y_train)
RandomForestClassifier(max_depth=6, min_samples_leaf=8, random_state=100)
y_pred=model_rf.predict(x_test)

In [14]:
print(classification_report(y_test, y_pred, labels=[0,1]))

              precision    recall  f1-score   support

           0       0.84      0.91      0.88      1047
           1       0.67      0.51      0.58       360

    accuracy                           0.81      1407
   macro avg       0.76      0.71      0.73      1407
weighted avg       0.80      0.81      0.80      1407



In [15]:
model_rf_smote=RandomForestClassifier(n_estimators=100, criterion='gini', random_state = 100,max_depth=6, min_samples_leaf=8)
model_rf_smote.fit(xr_train,yr_train)
yr_predict1 = model_rf_smote.predict(xr_test)

In [16]:
print(metrics.classification_report(yr_test, yr_predict1))

              precision    recall  f1-score   support

           0       0.96      0.91      0.93       516
           1       0.93      0.97      0.95       648

    accuracy                           0.94      1164
   macro avg       0.95      0.94      0.94      1164
weighted avg       0.94      0.94      0.94      1164



## 3. KNN Classifier

In [17]:
knn = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2)
knn.fit(x_train, y_train)
y_pred = knn.predict(x_test)

In [18]:
print(classification_report(y_test, y_pred, labels=[0,1]))

              precision    recall  f1-score   support

           0       0.83      0.87      0.85      1047
           1       0.57      0.49      0.52       360

    accuracy                           0.77      1407
   macro avg       0.70      0.68      0.69      1407
weighted avg       0.76      0.77      0.77      1407



In [19]:
knn_smote = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2)
knn_smote.fit(xr_train,yr_train)
yr_predict2 = model_rf_smote.predict(xr_test)

In [20]:
print(metrics.classification_report(yr_test, yr_predict2))

              precision    recall  f1-score   support

           0       0.96      0.91      0.93       516
           1       0.93      0.97      0.95       648

    accuracy                           0.94      1164
   macro avg       0.95      0.94      0.94      1164
weighted avg       0.94      0.94      0.94      1164



The results of Random Forest Classifier are best, hence we are going for it!

In [21]:
import pickle

In [22]:
with open('model.pkl', 'wb') as file:
    pickle.dump(model_rf_smote, file)

In [23]:
with open('model.pkl', 'rb') as file:
    loaded_model = pickle.load(file)

In [25]:
loaded_model.score(xr_test, yr_test)

0.9432989690721649

In [24]:
import pickle
filename = 'model.sav'
pickle.dump(model_rf_smote, open(filename, 'wb'))

In [27]:
load_model = pickle.load(open(filename, 'rb'))
load_model.score(xr_test, yr_test)

0.9432989690721649