## Customer Churn Analysis MODEL Training

In [31]:
import pandas as pd
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from imblearn.combine import SMOTEENN


##### Recall Score

##### Classification report

##### SMOTEENN

In [2]:
df_tele=pd.read_csv('data/telecom_churn_data.csv')

In [3]:
df_tele.head()

Unnamed: 0.1,Unnamed: 0,SeniorCitizen,MonthlyCharges,TotalCharges,Churn,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,...,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_group_1 - 12,tenure_group_13 - 24,tenure_group_25 - 36,tenure_group_37 - 48,tenure_group_49 - 60,tenure_group_61 - 72
0,0,0,29.85,29.85,0,1,0,0,1,1,...,0,0,1,0,1,0,0,0,0,0
1,1,0,56.95,1889.5,0,0,1,1,0,1,...,0,0,0,1,0,0,1,0,0,0
2,2,0,53.85,108.15,1,0,1,1,0,1,...,0,0,0,1,1,0,0,0,0,0
3,3,0,42.3,1840.75,0,0,1,1,0,1,...,1,0,0,0,0,0,0,1,0,0
4,4,0,70.7,151.65,1,1,0,1,0,1,...,0,0,1,0,1,0,0,0,0,0


In [4]:
df_tele=df_tele.drop('Unnamed: 0', axis=1)

##### Creating the Independent and Dependent Variables

In [5]:
x=df_tele.drop('Churn', axis=1)
x

Unnamed: 0,SeniorCitizen,MonthlyCharges,TotalCharges,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,PhoneService_No,...,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_group_1 - 12,tenure_group_13 - 24,tenure_group_25 - 36,tenure_group_37 - 48,tenure_group_49 - 60,tenure_group_61 - 72
0,0,29.85,29.85,1,0,0,1,1,0,1,...,0,0,1,0,1,0,0,0,0,0
1,0,56.95,1889.50,0,1,1,0,1,0,0,...,0,0,0,1,0,0,1,0,0,0
2,0,53.85,108.15,0,1,1,0,1,0,0,...,0,0,0,1,1,0,0,0,0,0
3,0,42.30,1840.75,0,1,1,0,1,0,1,...,1,0,0,0,0,0,0,1,0,0
4,0,70.70,151.65,1,0,1,0,1,0,0,...,0,0,1,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7027,0,84.80,1990.50,0,1,0,1,0,1,0,...,0,0,0,1,0,1,0,0,0,0
7028,0,103.20,7362.90,1,0,0,1,0,1,0,...,0,1,0,0,0,0,0,0,0,1
7029,0,29.60,346.45,1,0,0,1,0,1,1,...,0,0,1,0,1,0,0,0,0,0
7030,1,74.40,306.60,0,1,0,1,1,0,0,...,0,0,0,1,1,0,0,0,0,0


In [6]:
y=df_tele['Churn']
y

0       0
1       0
2       1
3       0
4       1
       ..
7027    0
7028    0
7029    0
7030    1
7031    0
Name: Churn, Length: 7032, dtype: int64

##### Train data and Test data split

In [10]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)

### Decision Tree

In [11]:
model_dt=DecisionTreeClassifier(criterion = "gini",random_state = 100,max_depth=6, min_samples_leaf=8)
model_dt.fit(x_train,y_train)

In [14]:
y_pred=model_dt.predict(x_test)
y_pred

array([0, 0, 0, ..., 1, 0, 0], dtype=int64)

##### Classification Report

In [21]:
print(classification_report(y_test, y_pred, labels=[0,1]))

              precision    recall  f1-score   support

           0       0.82      0.92      0.87      1028
           1       0.67      0.45      0.53       379

    accuracy                           0.79      1407
   macro avg       0.74      0.68      0.70      1407
weighted avg       0.78      0.79      0.78      1407



In [22]:
print(confusion_matrix(y_test,y_pred))

[[944  84]
 [210 169]]


### SMOTEENN

In [24]:
sm = SMOTEENN()
x_resampled, y_resampled = sm.fit_resample(x,y)

In [25]:
xr_train,xr_test,yr_train,yr_test=train_test_split(x_resampled, y_resampled,test_size=0.2)
model_dt_smote=DecisionTreeClassifier(criterion = "gini",random_state = 100,max_depth=6, min_samples_leaf=8)

In [26]:
model_dt_smote.fit(xr_train, yr_train)

In [28]:
yr_pred=model_dt_smote.predict(xr_test)
yr_pred

array([1, 1, 0, ..., 1, 1, 1], dtype=int64)

In [29]:
print(classification_report(yr_test, yr_pred, labels=[0,1]))

              precision    recall  f1-score   support

           0       0.91      0.92      0.91       523
           1       0.94      0.92      0.93       655

    accuracy                           0.92      1178
   macro avg       0.92      0.92      0.92      1178
weighted avg       0.92      0.92      0.92      1178



In [30]:
print(confusion_matrix(yr_test,yr_pred))

[[483  40]
 [ 50 605]]


### Random Forest Classifier

In [32]:
model_rf=RandomForestClassifier(n_estimators=100, criterion = "gini",random_state = 100,max_depth=6, min_samples_leaf=8)
model_rf.fit(x_train,y_train)

In [34]:
y_pred_rf=model_rf.predict(x_test)
y_pred_rf

array([0, 0, 0, ..., 1, 0, 0], dtype=int64)

In [36]:
print(classification_report(y_test, y_pred_rf, labels=[0,1]))

              precision    recall  f1-score   support

           0       0.82      0.94      0.88      1028
           1       0.73      0.46      0.56       379

    accuracy                           0.81      1407
   macro avg       0.78      0.70      0.72      1407
weighted avg       0.80      0.81      0.79      1407



In [37]:
sm = SMOTEENN()
x_resampled, y_resampled = sm.fit_resample(x,y)

In [38]:
xr_train,xr_test,yr_train,yr_test=train_test_split(x_resampled, y_resampled,test_size=0.2)
model_rf_smote=RandomForestClassifier(n_estimators=100, criterion = "gini",random_state = 100,max_depth=6, min_samples_leaf=8)

In [39]:
model_rf_smote.fit(xr_train, yr_train)

In [40]:
yr_pred_rf=model_rf_smote.predict(xr_test)
yr_pred_rf

array([0, 1, 1, ..., 1, 0, 0], dtype=int64)

In [43]:
print(classification_report(yr_test, yr_pred_rf, labels=[0,1]))

              precision    recall  f1-score   support

           0       0.96      0.92      0.94       537
           1       0.94      0.97      0.95       640

    accuracy                           0.95      1177
   macro avg       0.95      0.95      0.95      1177
weighted avg       0.95      0.95      0.95      1177



In [44]:
print(confusion_matrix(yr_test,yr_pred_rf))

[[496  41]
 [ 21 619]]
