In [1]:
import pandas as pd
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from imblearn.combine import SMOTEENN

In [2]:
df=pd.read_csv('customer_churn.csv')

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,SeniorCitizen,MonthlyCharges,TotalCharges,Churn,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,...,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_group_1-12,tenure_group_13-24,tenure_group_25-36,tenure_group_37-48,tenure_group_49-60,tenure_group_61-72
0,0,0,29.85,29.85,0,1,0,0,1,1,...,0,0,1,0,1,0,0,0,0,0
1,1,0,56.95,1889.5,0,0,1,1,0,1,...,0,0,0,1,0,0,1,0,0,0
2,2,0,53.85,108.15,1,0,1,1,0,1,...,0,0,0,1,1,0,0,0,0,0
3,3,0,42.3,1840.75,0,0,1,1,0,1,...,1,0,0,0,0,0,0,1,0,0
4,4,0,70.7,151.65,1,1,0,1,0,1,...,0,0,1,0,1,0,0,0,0,0


In [4]:
df=df.drop('Unnamed: 0', axis='columns')

In [5]:
df.head()

Unnamed: 0,SeniorCitizen,MonthlyCharges,TotalCharges,Churn,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,...,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_group_1-12,tenure_group_13-24,tenure_group_25-36,tenure_group_37-48,tenure_group_49-60,tenure_group_61-72
0,0,29.85,29.85,0,1,0,0,1,1,0,...,0,0,1,0,1,0,0,0,0,0
1,0,56.95,1889.5,0,0,1,1,0,1,0,...,0,0,0,1,0,0,1,0,0,0
2,0,53.85,108.15,1,0,1,1,0,1,0,...,0,0,0,1,1,0,0,0,0,0
3,0,42.3,1840.75,0,0,1,1,0,1,0,...,1,0,0,0,0,0,0,1,0,0
4,0,70.7,151.65,1,1,0,1,0,1,0,...,0,0,1,0,1,0,0,0,0,0


In [6]:
X= df.drop('Churn', axis='columns')
X.shape

(7032, 50)

In [7]:
y=df['Churn']
y.shape

(7032,)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2)

# DECISION TREE CLASSIFIER

In [9]:
model_dt = DecisionTreeClassifier(criterion='gini',max_depth=6, random_state=100, min_samples_leaf=8)

In [10]:
model_dt.fit(X_train, y_train)

In [17]:
model_dt.score(X_test,y_test)

0.7853589196872779

In [18]:
y_pred=model_dt.predict(X_test)
y_pred

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [57]:
print(classification_report(y_test,y_pred, labels=[0,1]))

              precision    recall  f1-score   support

           0       0.82      0.90      0.86      1030
           1       0.64      0.47      0.54       377

    accuracy                           0.79      1407
   macro avg       0.73      0.68      0.70      1407
weighted avg       0.77      0.79      0.77      1407



In [20]:
confusion_matrix(y_test,y_pred)

array([[929, 101],
       [201, 176]], dtype=int64)

In [24]:
sm=SMOTEENN()
X_resampled, y_resampled = sm.fit_resample(X_train,y_train)

In [25]:
Xr_train, Xr_test, yr_train, yr_test = train_test_split(X_resampled, y_resampled, test_size=0.2)

In [26]:
model_dt_smote = DecisionTreeClassifier(criterion='gini',max_depth=6, random_state=100, min_samples_leaf=8)

In [27]:
model_dt_smote.fit(Xr_train, yr_train)

In [29]:
model_dt_smote.score(Xr_test,yr_test)

0.9492063492063492

In [30]:
ypred_smote=model_dt_smote.predict(Xr_test)

In [31]:
print(classification_report(yr_test,ypred_smote, labels=[0,1]))

              precision    recall  f1-score   support

           0       0.93      0.95      0.94       408
           1       0.96      0.95      0.95       537

    accuracy                           0.95       945
   macro avg       0.95      0.95      0.95       945
weighted avg       0.95      0.95      0.95       945



In [32]:
confusion_matrix(yr_test,ypred_smote)

array([[388,  20],
       [ 28, 509]], dtype=int64)

# RANDOM FOREST CLASSIFIER

In [33]:
from sklearn.ensemble import RandomForestClassifier

In [34]:
model_rf = RandomForestClassifier(n_estimators=100,criterion='gini',max_depth=6, random_state=100, min_samples_leaf=8)

In [35]:
model_rf.fit(X_train,y_train)

In [36]:
model_rf.score(X_test,y_test)

0.7981520966595593

In [37]:
ypred_rf=model_rf.predict(X_test)

In [38]:
print(classification_report(y_test,ypred_rf))

              precision    recall  f1-score   support

           0       0.82      0.93      0.87      1030
           1       0.69      0.44      0.54       377

    accuracy                           0.80      1407
   macro avg       0.76      0.68      0.70      1407
weighted avg       0.79      0.80      0.78      1407



In [39]:
sm=SMOTEENN()

In [40]:
X_resampled_rf, y_resampled_rf = sm.fit_resample(X_train, y_train)

In [41]:
Xr_train_rf, Xr_test_rf, yr_train_rf, yr_test_rf = train_test_split(X_resampled_rf, y_resampled_rf, test_size=0.2)

In [42]:
model_rf_smote = RandomForestClassifier(n_estimators=100,criterion='gini',max_depth=6, random_state=100, min_samples_leaf=8)

In [45]:
model_rf_smote.fit(Xr_train_rf,yr_train_rf)

In [46]:
model_rf_smote.score(Xr_test_rf,yr_test_rf)

0.926595744680851

In [47]:
ypred_rf_smote =model_rf_smote.predict(Xr_test_rf)

In [48]:
print(classification_report(yr_test_rf, ypred_rf_smote))

              precision    recall  f1-score   support

           0       0.94      0.89      0.92       425
           1       0.91      0.96      0.93       515

    accuracy                           0.93       940
   macro avg       0.93      0.92      0.93       940
weighted avg       0.93      0.93      0.93       940



In [49]:
confusion_matrix(yr_test_rf, ypred_rf_smote)

array([[378,  47],
       [ 22, 493]], dtype=int64)

In [52]:
import pickle

In [53]:
filename = 'model.sav'

In [54]:
pickle.dump(model_rf_smote,open(filename,'wb'))

In [55]:
load_model=pickle.load(open(filename,'rb'))

In [56]:
load_model.score(Xr_test_rf,yr_test_rf)

0.926595744680851