### Importing the required Libraries

In [18]:
import pandas as pd
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from imblearn.combine import SMOTEENN

In [7]:
df = pd.read_csv('tel_churn.csv')
df.drop('Unnamed: 0',axis=1,inplace=True)

In [9]:
x=df.drop('Churn',axis=1)
y=df['Churn']

In [10]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)

### Decision Tree Classifier

In [11]:
model_dt=DecisionTreeClassifier(criterion = "gini",random_state = 100,max_depth=6, min_samples_leaf=8)

In [12]:
model_dt.fit(x_train,y_train)

In [13]:
y_pred = model_dt.predict(x_test)

In [19]:
accuracy_score(y_test,y_pred)

0.7789623312011372

In [17]:
print(classification_report(y_test,y_pred,labels=[0,1]))

              precision    recall  f1-score   support

           0       0.83      0.88      0.85      1020
           1       0.62      0.52      0.56       387

    accuracy                           0.78      1407
   macro avg       0.72      0.70      0.71      1407
weighted avg       0.77      0.78      0.77      1407



As you can see that the accuracy is quite low, and as it's an imbalanced dataset, we shouldn't consider Accuracy as our metrics to measure the model, as Accuracy is cursed in imbalanced datasets.

Hence, we need to check recall, precision & f1 score for the minority class, and it's quite evident that the precision, recall & f1 score is too low for Class 1, i.e. churned customers.

Hence, moving ahead to call SMOTEENN (UpSampling + ENN)

In [24]:
smote = SMOTEENN(random_state=42)
X_resampled, y_resampled = smote.fit_resample(x, y)

In [25]:
xr_train,xr_test,yr_train,yr_test=train_test_split(X_resampled, y_resampled,test_size=0.2)

In [26]:
model_dt_smote=DecisionTreeClassifier(criterion = "gini",random_state = 100,max_depth=6, min_samples_leaf=8)

In [28]:
model_dt_smote.fit(xr_train,yr_train)

In [30]:
yr_pred = model_dt_smote.predict(xr_test)

In [31]:
accuracy_score(yr_test,yr_pred)

0.928087986463621

In [33]:
print(classification_report(yr_test,yr_pred))

              precision    recall  f1-score   support

           0       0.95      0.88      0.91       513
           1       0.92      0.96      0.94       669

    accuracy                           0.93      1182
   macro avg       0.93      0.92      0.93      1182
weighted avg       0.93      0.93      0.93      1182



In [34]:
print(confusion_matrix(yr_test,yr_pred))

[[454  59]
 [ 26 643]]


### Random Forest Classifier

In [35]:
from sklearn.ensemble import RandomForestClassifier


In [36]:
model_rf = RandomForestClassifier(n_estimators=100,criterion='gini',random_state=100,max_depth=6,min_samples_leaf=8)