Churn Prediction Model

In [31]:
import pandas as pd
from imblearn.combine import SMOTEENN
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import recall_score
from sklearn.model_selection import train_test_split

In [32]:
df = pd.read_csv('Telecom_churn.csv')

In [33]:
df.head()

Unnamed: 0.1,Unnamed: 0,SeniorCitizen,MonthlyCharges,TotalCharges,Churn,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,...,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_grp_1 - 12,tenure_grp_13 - 24,tenure_grp_25 - 36,tenure_grp_37 - 48,tenure_grp_49 - 60,tenure_grp_61 - 72
0,0,0,29.85,29.85,0,1,0,0,1,1,...,0,0,1,0,1,0,0,0,0,0
1,1,0,56.95,1889.5,0,0,1,1,0,1,...,0,0,0,1,0,0,1,0,0,0
2,2,0,53.85,108.15,1,0,1,1,0,1,...,0,0,0,1,1,0,0,0,0,0
3,3,0,42.3,1840.75,0,0,1,1,0,1,...,1,0,0,0,0,0,0,1,0,0
4,4,0,70.7,151.65,1,1,0,1,0,1,...,0,0,1,0,1,0,0,0,0,0


In [34]:
df = df.drop('Unnamed: 0', axis=1)

In [35]:
df.head()

Unnamed: 0,SeniorCitizen,MonthlyCharges,TotalCharges,Churn,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,...,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_grp_1 - 12,tenure_grp_13 - 24,tenure_grp_25 - 36,tenure_grp_37 - 48,tenure_grp_49 - 60,tenure_grp_61 - 72
0,0,29.85,29.85,0,1,0,0,1,1,0,...,0,0,1,0,1,0,0,0,0,0
1,0,56.95,1889.5,0,0,1,1,0,1,0,...,0,0,0,1,0,0,1,0,0,0
2,0,53.85,108.15,1,0,1,1,0,1,0,...,0,0,0,1,1,0,0,0,0,0
3,0,42.3,1840.75,0,0,1,1,0,1,0,...,1,0,0,0,0,0,0,1,0,0
4,0,70.7,151.65,1,1,0,1,0,1,0,...,0,0,1,0,1,0,0,0,0,0


In [36]:
# Creating X and Y variables

x = df.drop('Churn', axis=1)
print(x)

      SeniorCitizen  MonthlyCharges  TotalCharges  gender_Female  gender_Male  \
0                 0           29.85         29.85              1            0   
1                 0           56.95       1889.50              0            1   
2                 0           53.85        108.15              0            1   
3                 0           42.30       1840.75              0            1   
4                 0           70.70        151.65              1            0   
...             ...             ...           ...            ...          ...   
7027              0           84.80       1990.50              0            1   
7028              0          103.20       7362.90              1            0   
7029              0           29.60        346.45              1            0   
7030              1           74.40        306.60              0            1   
7031              0          105.65       6844.50              0            1   

      Partner_No  Partner_Y

In [37]:
y = df['Churn']
print(y)

0       0
1       0
2       1
3       0
4       1
       ..
7027    0
7028    0
7029    0
7030    1
7031    0
Name: Churn, Length: 7032, dtype: int64


In [38]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2)

Decision Tree Classifier

In [39]:
model_dt = DecisionTreeClassifier(criterion='gini', random_state=100, max_depth=6, min_samples_leaf=8)


In [40]:
model_dt.fit(x_train, y_train)

In [41]:
y_pred = model_dt.predict(x_test)

In [42]:
y_pred

array([0, 0, 1, ..., 0, 1, 0], dtype=int64)

In [43]:
print(classification_report(y_test, y_pred, labels=[0,1]))

              precision    recall  f1-score   support

           0       0.84      0.90      0.87      1016
           1       0.69      0.55      0.61       391

    accuracy                           0.81      1407
   macro avg       0.77      0.73      0.74      1407
weighted avg       0.80      0.81      0.80      1407



In [44]:
print(confusion_matrix(y_test, y_pred))

[[919  97]
 [175 216]]


In [45]:
sm = SMOTEENN()
X_resampled, y_resampled = sm.fit_resample(x,y)

In [46]:
xr_train, xr_test, yr_train, yr_test = train_test_split(X_resampled, y_resampled, test_size=0.2)


In [47]:
model_dt_smote = DecisionTreeClassifier(criterion='gini', random_state=100, max_depth=6, min_samples_leaf=8)


In [50]:
model_dt_smote.fit(xr_train, yr_train)

In [51]:
y_pred_smote = model_dt_smote.predict(xr_test)

In [52]:
print(classification_report(yr_test, y_pred_smote, labels=[0,1]))

              precision    recall  f1-score   support

           0       0.93      0.94      0.94       532
           1       0.95      0.94      0.95       647

    accuracy                           0.94      1179
   macro avg       0.94      0.94      0.94      1179
weighted avg       0.94      0.94      0.94      1179



In [53]:
print(confusion_matrix(yr_test, y_pred_smote))

[[502  30]
 [ 36 611]]


Random Forest Classifier


In [56]:
from sklearn.ensemble import RandomForestClassifier

In [57]:
model_rf = RandomForestClassifier(n_estimators = 100, criterion='gini', random_state=100, max_depth=6, min_samples_leaf=8)
model_rf.fit(x_train, y_train)  
y_pred_rf = model_rf.predict(x_test)

In [58]:
print(classification_report(y_test, y_pred_rf, labels=[0,1]))

              precision    recall  f1-score   support

           0       0.83      0.93      0.88      1016
           1       0.74      0.49      0.59       391

    accuracy                           0.81      1407
   macro avg       0.78      0.71      0.73      1407
weighted avg       0.80      0.81      0.80      1407



In [61]:
sm = SMOTEENN()
X_resampled_rf, y_resampled_rf = sm.fit_resample(x,y)

In [62]:
xr_train, xr_test, yr_train, yr_test = train_test_split(X_resampled_rf, y_resampled_rf, test_size=0.2)


In [63]:
model_smote_rf = RandomForestClassifier(n_estimators = 100, criterion='gini', random_state=100, max_depth=6, min_samples_leaf=8)

In [65]:
model_smote_rf.fit(xr_train, yr_train)

In [66]:
y_pred_smote_rf = model_smote_rf.predict(xr_test)

In [67]:
print(classification_report(yr_test, y_pred_smote_rf, labels=[0,1]))

              precision    recall  f1-score   support

           0       0.95      0.91      0.93       491
           1       0.94      0.97      0.95       688

    accuracy                           0.94      1179
   macro avg       0.94      0.94      0.94      1179
weighted avg       0.94      0.94      0.94      1179



In [68]:
print(confusion_matrix(yr_test, y_pred_smote_rf))

[[448  43]
 [ 24 664]]


In [69]:
# Saving the model
import pickle


In [70]:
file_name = 'model.sav'

In [71]:
pickle.dump(model_smote_rf, open(file_name, 'wb'))

In [73]:
load_model = pickle.load(open(file_name, 'rb'))

In [74]:
load_model.score(xr_test,yr_test)

0.9431721798134012