### Model Building

In [93]:
import pandas as pd
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from imblearn.combine import SMOTEENN

In [44]:
data = pd.read_csv('C:/Users/jyoth/Documents/Telco_Customer_Churn/Telco_data_cleaned.csv')

In [45]:
data.head()

Unnamed: 0.1,Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,PhoneService,MultipleLines,OnlineSecurity,OnlineBackup,DeviceProtection,...,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_group_1 - 12,tenure_group_13 - 24,tenure_group_25 - 36,tenure_group_37 - 48,tenure_group_49 - 60,tenure_group_61 - 72
0,0,0,0,1,0,0,0,0,1,0,...,0,0,1,0,1,0,0,0,0,0
1,1,1,0,0,0,1,0,1,0,1,...,0,0,0,1,0,0,1,0,0,0
2,2,1,0,0,0,1,0,1,1,0,...,0,0,0,1,1,0,0,0,0,0
3,3,1,0,0,0,0,0,1,0,1,...,1,0,0,0,0,0,0,1,0,0
4,4,0,0,0,0,1,0,0,0,0,...,0,0,1,0,1,0,0,0,0,0


In [46]:
data = data.drop('Unnamed: 0',axis=1)

In [47]:
# Creating X and Y Variables

x = data.drop('Churn',axis=1)
x.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,PhoneService,MultipleLines,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,...,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_group_1 - 12,tenure_group_13 - 24,tenure_group_25 - 36,tenure_group_37 - 48,tenure_group_49 - 60,tenure_group_61 - 72
0,0,0,1,0,0,0,0,1,0,0,...,0,0,1,0,1,0,0,0,0,0
1,1,0,0,0,1,0,1,0,1,0,...,0,0,0,1,0,0,1,0,0,0
2,1,0,0,0,1,0,1,1,0,0,...,0,0,0,1,1,0,0,0,0,0
3,1,0,0,0,0,0,1,0,1,1,...,1,0,0,0,0,0,0,1,0,0
4,0,0,0,0,1,0,0,0,0,0,...,0,0,1,0,1,0,0,0,0,0


In [48]:
y = data['Churn']
y

0       0
1       0
2       1
3       0
4       1
       ..
7027    0
7028    0
7029    0
7030    1
7031    0
Name: Churn, Length: 7032, dtype: int64

In [49]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = 0.2)

### Decision Tree Classifier

In [50]:
model_dt = DecisionTreeClassifier(criterion='gini', random_state=100, max_depth=6, min_samples_leaf=8)

In [51]:
model_dt.fit(x_train,y_train)

In [52]:
y_pred = model_dt.predict(x_test)

In [53]:
y_pred

array([0, 1, 1, ..., 0, 0, 0], dtype=int64)

In [54]:
print(classification_report(y_test, y_pred, labels = [0,1]))

              precision    recall  f1-score   support

           0       0.87      0.87      0.87      1073
           1       0.59      0.60      0.60       334

    accuracy                           0.81      1407
   macro avg       0.73      0.74      0.73      1407
weighted avg       0.81      0.81      0.81      1407



In [55]:
print(confusion_matrix(y_test, y_pred))

[[935 138]
 [134 200]]


In [90]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.806680881307747


### Sampling(over/under) Using SMOTE ENN 

In [56]:
sm = SMOTEENN()
x_resampled, y_resampled = sm.fit_resample(x,y)

In [57]:
xr_train, xr_test, yr_train, yr_test = train_test_split(x_resampled,y_resampled, test_size = 0.2)

In [58]:
model_dt_smote = DecisionTreeClassifier(criterion='gini', random_state=100, max_depth=6, min_samples_leaf=8)

In [59]:
model_dt_smote.fit(xr_train,yr_train)

In [60]:
y_pred_smote = model_dt_smote.predict(xr_test)

In [61]:
print(classification_report(yr_test, y_pred_smote, labels = [0,1]))

              precision    recall  f1-score   support

           0       0.91      0.91      0.91       550
           1       0.92      0.92      0.92       608

    accuracy                           0.92      1158
   macro avg       0.92      0.92      0.92      1158
weighted avg       0.92      0.92      0.92      1158



In [62]:
print(confusion_matrix(yr_test, y_pred_smote))

[[502  48]
 [ 47 561]]


In [89]:
accuracy = accuracy_score(yr_test, y_pred_smote)
print("Accuracy:", accuracy)

Accuracy: 0.9179620034542314


In [95]:
# Calculate precision
precision = precision_score(yr_test, y_pred_smote)

# Calculate recall
recall = recall_score(yr_test, y_pred_smote)

# Calculate F1-score
f1 = f1_score(yr_test, y_pred_smote)

print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Precision: 0.9211822660098522
Recall: 0.9226973684210527
F1-score: 0.9219391947411668


### Random Forest Classifier

In [63]:
from sklearn.ensemble import RandomForestClassifier

In [64]:
model_rf = RandomForestClassifier(n_estimators=100, criterion='gini', random_state=100, max_depth=6, min_samples_leaf=8)
model_rf.fit(x_train,y_train)
y_pred_rf = model_rf.predict(x_test)

In [65]:
print(classification_report(y_test, y_pred_rf, labels = [0,1]))

              precision    recall  f1-score   support

           0       0.86      0.91      0.88      1073
           1       0.65      0.51      0.57       334

    accuracy                           0.82      1407
   macro avg       0.75      0.71      0.73      1407
weighted avg       0.81      0.82      0.81      1407



In [66]:
print(confusion_matrix(y_test, y_pred_rf))

[[980  93]
 [163 171]]


In [85]:
accuracy = accuracy_score(y_test, y_pred_rf)
print("Accuracy:", accuracy)

Accuracy: 0.8180525941719972


### Sampled

In [67]:
sm = SMOTEENN()
x_resampled_rf, y_resampled_rf = sm.fit_resample(x,y)
rf_xr_train, rf_xr_test, rf_yr_train, rf_yr_test = train_test_split(x_resampled_rf,y_resampled_rf, test_size = 0.2)

model_rf_smote = RandomForestClassifier(criterion='gini', random_state=100, max_depth=6, min_samples_leaf=8)
model_rf_smote.fit(rf_xr_train,rf_yr_train)
rf_y_pred_smote = model_rf_smote.predict(rf_xr_test)

In [68]:
print(classification_report(rf_yr_test, rf_y_pred_smote, labels = [0,1]))

              precision    recall  f1-score   support

           0       0.94      0.91      0.93       511
           1       0.93      0.96      0.95       657

    accuracy                           0.94      1168
   macro avg       0.94      0.93      0.94      1168
weighted avg       0.94      0.94      0.94      1168



In [87]:
print(confusion_matrix(rf_yr_test, rf_y_pred_smote))

[[466  45]
 [ 28 629]]


In [88]:
accuracy = accuracy_score(rf_yr_test, rf_y_pred_smote)
print("Accuracy:", accuracy)

Accuracy: 0.9375


In [94]:
# Calculate precision
precision = precision_score(rf_yr_test, rf_y_pred_smote)

# Calculate recall
recall = recall_score(rf_yr_test, rf_y_pred_smote)

# Calculate F1-score
f1 = f1_score(rf_yr_test, rf_y_pred_smote)

print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Precision: 0.9332344213649851
Recall: 0.9573820395738204
F1-score: 0.9451540195341849


#### Saving the Model

In [70]:
import pickle

In [71]:
filename = 'model.sav'

In [72]:
pickle.dump(model_rf_smote, open(filename,'wb'))

#### Loading the saved model to test

In [73]:
load_model = pickle.load(open(filename, 'rb'))

In [74]:
load_model.score(xr_test, yr_test)

0.9326424870466321