In [1]:
import pandas as pd
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score, classification_report, confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from imblearn.combine import SMOTEENN

In [2]:
data = pd.read_csv('tel_churn.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,SeniorCitizen,MonthlyCharges,TotalCharges,Churn,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,...,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_group_1 - 12,tenure_group_13 - 24,tenure_group_25 - 36,tenure_group_37 - 48,tenure_group_49 - 60,tenure_group_61 - 72
0,0,0,29.85,29.85,0,True,False,False,True,True,...,False,False,True,False,True,False,False,False,False,False
1,1,0,56.95,1889.5,0,False,True,True,False,True,...,False,False,False,True,False,False,True,False,False,False
2,2,0,53.85,108.15,1,False,True,True,False,True,...,False,False,False,True,True,False,False,False,False,False
3,3,0,42.3,1840.75,0,False,True,True,False,True,...,True,False,False,False,False,False,False,True,False,False
4,4,0,70.7,151.65,1,True,False,True,False,True,...,False,False,True,False,True,False,False,False,False,False


In [3]:
data = data.drop('Unnamed: 0', axis=1)
data.head()

Unnamed: 0,SeniorCitizen,MonthlyCharges,TotalCharges,Churn,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,...,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_group_1 - 12,tenure_group_13 - 24,tenure_group_25 - 36,tenure_group_37 - 48,tenure_group_49 - 60,tenure_group_61 - 72
0,0,29.85,29.85,0,True,False,False,True,True,False,...,False,False,True,False,True,False,False,False,False,False
1,0,56.95,1889.5,0,False,True,True,False,True,False,...,False,False,False,True,False,False,True,False,False,False
2,0,53.85,108.15,1,False,True,True,False,True,False,...,False,False,False,True,True,False,False,False,False,False
3,0,42.3,1840.75,0,False,True,True,False,True,False,...,True,False,False,False,False,False,False,True,False,False
4,0,70.7,151.65,1,True,False,True,False,True,False,...,False,False,True,False,True,False,False,False,False,False


In [4]:
X = data.drop('Churn', axis=1)
Y = data['Churn']

In [5]:
X.head()

Unnamed: 0,SeniorCitizen,MonthlyCharges,TotalCharges,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,PhoneService_No,...,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_group_1 - 12,tenure_group_13 - 24,tenure_group_25 - 36,tenure_group_37 - 48,tenure_group_49 - 60,tenure_group_61 - 72
0,0,29.85,29.85,True,False,False,True,True,False,True,...,False,False,True,False,True,False,False,False,False,False
1,0,56.95,1889.5,False,True,True,False,True,False,False,...,False,False,False,True,False,False,True,False,False,False
2,0,53.85,108.15,False,True,True,False,True,False,False,...,False,False,False,True,True,False,False,False,False,False
3,0,42.3,1840.75,False,True,True,False,True,False,True,...,True,False,False,False,False,False,False,True,False,False
4,0,70.7,151.65,True,False,True,False,True,False,False,...,False,False,True,False,True,False,False,False,False,False


In [6]:
Y.head()

0    0
1    0
2    1
3    0
4    1
Name: Churn, dtype: int64

### Create the Model with Decision Tree 

In [7]:
x_train,x_test,y_train,y_test = train_test_split(X, Y, test_size = 0.2) 

In [8]:
model = DecisionTreeClassifier(criterion = "gini", random_state = 100, max_depth=6, min_samples_leaf=8)

In [9]:
model.fit(x_train,y_train)

In [10]:
y_pred = model.predict(x_test)
y_pred

array([0, 0, 0, ..., 0, 0, 1], dtype=int64)

In [11]:
print(classification_report(y_test, y_pred, labels=[0,1]))

              precision    recall  f1-score   support

           0       0.85      0.89      0.87      1049
           1       0.64      0.54      0.59       358

    accuracy                           0.81      1407
   macro avg       0.74      0.72      0.73      1407
weighted avg       0.80      0.81      0.80      1407



In [12]:
print(confusion_matrix(y_test, y_pred))

[[938 111]
 [163 195]]


In [13]:
model.score(x_test, y_test)

0.8052594171997157

### Balance the Data using SMOTEENN

In [14]:
sm = SMOTEENN()
New_X, New_Y = sm.fit_resample(X, Y)

In [15]:
x_train1, x_test1, y_train1, y_test1 = train_test_split(New_X, New_Y, test_size = 0.2)

In [16]:
model_smote = DecisionTreeClassifier(criterion ='gini' , random_state = 100, max_depth = 6, min_samples_leaf = 8)
model_smote.fit(x_train1, y_train1)

In [17]:
y_pred1 = model_smote.predict(x_test1)
y_pred1

array([0, 1, 1, ..., 0, 0, 1], dtype=int64)

In [18]:
print(classification_report(y_test1, y_pred1))

              precision    recall  f1-score   support

           0       0.91      0.91      0.91       544
           1       0.93      0.93      0.93       642

    accuracy                           0.92      1186
   macro avg       0.92      0.92      0.92      1186
weighted avg       0.92      0.92      0.92      1186



In [19]:
print(confusion_matrix(y_test1, y_pred1))

[[497  47]
 [ 47 595]]


In [20]:
model_smote.score(x_test1, y_test1)

0.9207419898819561

### Create the Model with Random Forest

In [21]:
x_train1r, x_test1r, y_train1r, y_test1r = train_test_split(New_X, New_Y, test_size = 0.2)

In [22]:
model_smote_rf = RandomForestClassifier(n_estimators=100, criterion ='gini' , random_state = 100, max_depth = 6, min_samples_leaf = 8)
model_smote_rf.fit(x_train1r, y_train1r)

In [23]:
y_pred1r = model_smote_rf.predict(x_test1r)
y_pred1r

array([0, 1, 0, ..., 0, 0, 1], dtype=int64)

In [24]:
print(confusion_matrix(y_test1, y_pred1))

[[497  47]
 [ 47 595]]


In [25]:
model_smote_rf.score(x_test1r, y_test1r)

0.9418212478920742

### Save Model

In [26]:
import pickle

In [27]:
filename = 'model.sac'

In [28]:
pickle.dump(model_smote, open(filename, 'wb'))

In [29]:
load_model = pickle.load(open(filename, 'rb'))

In [30]:
load_model.score(x_test1, y_test1)

0.9207419898819561