In [1]:
#!/usr/bin/env python
# coding: utf-8

# Importing Libraries
import pandas as pd
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from imblearn.combine import SMOTEENN
import pickle

# Reading CSV
df = pd.read_csv("tel_churn.csv")
df = df.drop('Unnamed: 0', axis=1)
x = df.drop('Churn', axis=1)
y = df['Churn']

# Train Test Split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

### Decision Tree Classifier ###

# Model initialization and training
model_dt = DecisionTreeClassifier(criterion="gini", random_state=100, max_depth=6, min_samples_leaf=8)
model_dt.fit(x_train, y_train)

# Predictions and evaluation
y_pred_dt = model_dt.predict(x_test)
print("Decision Tree Model Score:", model_dt.score(x_test, y_test))
print(classification_report(y_test, y_pred_dt, labels=[0, 1]))

# Applying SMOTEENN for imbalanced data handling
sm = SMOTEENN()
X_resampled, y_resampled = sm.fit_resample(x, y)
xr_train, xr_test, yr_train, yr_test = train_test_split(X_resampled, y_resampled, test_size=0.2)

# Training Decision Tree Classifier on SMOTEENN data
model_dt_smote = DecisionTreeClassifier(criterion="gini", random_state=100, max_depth=6, min_samples_leaf=8)
model_dt_smote.fit(xr_train, yr_train)
yr_pred_dt_smote = model_dt_smote.predict(xr_test)
print("Decision Tree with SMOTEENN Model Score:", model_dt_smote.score(xr_test, yr_test))
print(classification_report(yr_test, yr_pred_dt_smote))
print("Confusion Matrix:\n", confusion_matrix(yr_test, yr_pred_dt_smote))

# Pickling the Decision Tree model
filename_dt = 'model_dt.sav'
pickle.dump(model_dt_smote, open(filename_dt, 'wb'))
print("Decision Tree model saved as model_dt.sav")


### Random Forest Classifier ###

# Model initialization and training
model_rf = RandomForestClassifier(n_estimators=100, criterion='gini', random_state=100, max_depth=6, min_samples_leaf=8)
model_rf.fit(x_train, y_train)

# Predictions and evaluation
y_pred_rf = model_rf.predict(x_test)
print("Random Forest Model Score:", model_rf.score(x_test, y_test))
print(classification_report(y_test, y_pred_rf, labels=[0, 1]))

# Training Random Forest Classifier on SMOTEENN data
X_resampled_rf, y_resampled_rf = sm.fit_resample(x, y)
xr_train_rf, xr_test_rf, yr_train_rf, yr_test_rf = train_test_split(X_resampled_rf, y_resampled_rf, test_size=0.2)
model_rf_smote = RandomForestClassifier(n_estimators=100, criterion='gini', random_state=100, max_depth=6, min_samples_leaf=8)
model_rf_smote.fit(xr_train_rf, yr_train_rf)

# Predictions and evaluation
yr_pred_rf_smote = model_rf_smote.predict(xr_test_rf)
print("Random Forest with SMOTEENN Model Score:", model_rf_smote.score(xr_test_rf, yr_test_rf))
print(classification_report(yr_test_rf, yr_pred_rf_smote))
print("Confusion Matrix:\n", confusion_matrix(yr_test_rf, yr_pred_rf_smote))

# Pickling the Random Forest model
filename_rf = 'model_rf.sav'
pickle.dump(model_rf_smote, open(filename_rf, 'wb'))
print("Random Forest model saved as model_rf.sav")


Decision Tree Model Score: 0.7995735607675906
              precision    recall  f1-score   support

           0       0.86      0.88      0.87      1056
           1       0.60      0.57      0.59       351

    accuracy                           0.80      1407
   macro avg       0.73      0.72      0.73      1407
weighted avg       0.80      0.80      0.80      1407

Decision Tree with SMOTEENN Model Score: 0.9190110826939472
              precision    recall  f1-score   support

           0       0.91      0.91      0.91       521
           1       0.93      0.93      0.93       652

    accuracy                           0.92      1173
   macro avg       0.92      0.92      0.92      1173
weighted avg       0.92      0.92      0.92      1173

Confusion Matrix:
 [[472  49]
 [ 46 606]]
Decision Tree model saved as model_dt.sav
Random Forest Model Score: 0.806680881307747
              precision    recall  f1-score   support

           0       0.83      0.93      0.88      1056
  