### Importing Libraries

In [None]:
import pandas as pd
import sklearn
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from imblearn.combine import SMOTEENN

In [None]:
print(sklearn.__version__)


1.2.2


#### Reading csv

In [None]:
df=pd.read_csv("tel_churn.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,SeniorCitizen,MonthlyCharges,TotalCharges,Churn,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,...,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_group_1 - 12,tenure_group_13 - 24,tenure_group_25 - 36,tenure_group_37 - 48,tenure_group_49 - 60,tenure_group_61 - 72
0,0,0,29.85,29.85,0,True,False,False,True,True,...,False,False,True,False,True,False,False,False,False,False
1,1,0,56.95,1889.5,0,False,True,True,False,True,...,False,False,False,True,False,False,True,False,False,False
2,2,0,53.85,108.15,1,False,True,True,False,True,...,False,False,False,True,True,False,False,False,False,False
3,3,0,42.3,1840.75,0,False,True,True,False,True,...,True,False,False,False,False,False,False,True,False,False
4,4,0,70.7,151.65,1,True,False,True,False,True,...,False,False,True,False,True,False,False,False,False,False


In [None]:
df=df.drop('Unnamed: 0',axis=1)

In [None]:
x=df.drop('Churn',axis=1)
x

Unnamed: 0,SeniorCitizen,MonthlyCharges,TotalCharges,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,PhoneService_No,...,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_group_1 - 12,tenure_group_13 - 24,tenure_group_25 - 36,tenure_group_37 - 48,tenure_group_49 - 60,tenure_group_61 - 72
0,0,29.85,29.85,True,False,False,True,True,False,True,...,False,False,True,False,True,False,False,False,False,False
1,0,56.95,1889.50,False,True,True,False,True,False,False,...,False,False,False,True,False,False,True,False,False,False
2,0,53.85,108.15,False,True,True,False,True,False,False,...,False,False,False,True,True,False,False,False,False,False
3,0,42.30,1840.75,False,True,True,False,True,False,True,...,True,False,False,False,False,False,False,True,False,False
4,0,70.70,151.65,True,False,True,False,True,False,False,...,False,False,True,False,True,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7027,0,84.80,1990.50,False,True,False,True,False,True,False,...,False,False,False,True,False,True,False,False,False,False
7028,0,103.20,7362.90,True,False,False,True,False,True,False,...,False,True,False,False,False,False,False,False,False,True
7029,0,29.60,346.45,True,False,False,True,False,True,True,...,False,False,True,False,True,False,False,False,False,False
7030,1,74.40,306.60,False,True,False,True,True,False,False,...,False,False,False,True,True,False,False,False,False,False


In [None]:
y=df['Churn']
y

0       0
1       0
2       1
3       0
4       1
       ..
7027    0
7028    0
7029    0
7030    1
7031    0
Name: Churn, Length: 7032, dtype: int64

##### Train Test Split

In [None]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)

#### Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
model_rf=RandomForestClassifier(n_estimators=100, random_state = 100,max_depth=6, min_samples_leaf=8)

In [None]:
model_rf.fit(x_train,y_train)

In [None]:
y_pred=model_rf.predict(x_test)

In [None]:
model_rf.score(x_test,y_test)

0.7981520966595593

In [None]:
print(classification_report(y_test, y_pred, labels=[0,1]))

              precision    recall  f1-score   support

           0       0.83      0.91      0.87      1043
           1       0.65      0.48      0.55       364

    accuracy                           0.80      1407
   macro avg       0.74      0.69      0.71      1407
weighted avg       0.79      0.80      0.79      1407



In [None]:
sm = SMOTEENN()
X_resampled1, y_resampled1 = sm.fit_resample(x,y)

In [None]:
xr_train1,xr_test1,yr_train1,yr_test1=train_test_split(X_resampled1, y_resampled1,test_size=0.2)

In [None]:
model_rf_smote=RandomForestClassifier(n_estimators=100, criterion='gini', random_state = 42,max_depth=6, min_samples_leaf=8)

In [None]:
model_rf_smote.fit(xr_train1,yr_train1)

In [None]:
yr_predict1 = model_rf_smote.predict(xr_test1)

In [None]:
model_score_r1 = model_rf_smote.score(xr_test1, yr_test1)

In [None]:
print(model_score_r1)
print(metrics.classification_report(yr_test1, yr_predict1))

0.9369676320272572
              precision    recall  f1-score   support

           0       0.95      0.91      0.93       534
           1       0.93      0.96      0.94       640

    accuracy                           0.94      1174
   macro avg       0.94      0.93      0.94      1174
weighted avg       0.94      0.94      0.94      1174



In [None]:
print(metrics.confusion_matrix(yr_test1, yr_predict1))

[[486  48]
 [ 26 614]]


#### Pickling the model

In [None]:
import pickle

In [None]:
filename = 'new.pkl'

In [None]:
pickle.dump(model_rf_smote, open(filename, 'wb'))

In [None]:
load_model = pickle.load(open(filename, 'rb'))

In [None]:
model_score_r1 = load_model.score(xr_test1, yr_test1)

In [None]:
model_score_r1

0.9369676320272572

##### Our final model i.e. RF Classifier with SMOTEENN, is now ready and dumped in model.sav, which we will use and prepare API's so that we can access our model from UI.

In [None]:
x_test


Unnamed: 0,SeniorCitizen,MonthlyCharges,TotalCharges,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,PhoneService_No,...,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_group_1 - 12,tenure_group_13 - 24,tenure_group_25 - 36,tenure_group_37 - 48,tenure_group_49 - 60,tenure_group_61 - 72
1947,0,95.50,1916.20,True,False,False,True,False,True,False,...,True,False,False,False,False,True,False,False,False,False
5974,0,50.35,1098.85,True,False,True,False,True,False,True,...,False,False,True,False,False,True,False,False,False,False
818,0,106.35,6751.35,True,False,False,True,True,False,False,...,False,False,True,False,False,False,False,False,False,True
5706,0,108.15,3432.90,False,True,True,False,True,False,False,...,False,True,False,False,False,False,True,False,False,False
5970,0,19.65,978.00,False,True,True,False,True,False,False,...,True,False,False,False,False,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3854,0,105.45,2715.30,True,False,True,False,False,True,False,...,False,False,True,False,False,False,True,False,False,False
6683,0,48.25,1293.80,True,False,True,False,True,False,True,...,False,False,True,False,False,False,True,False,False,False
4446,0,19.40,93.40,False,True,True,False,True,False,False,...,False,False,False,True,True,False,False,False,False,False
2101,0,59.45,3157.00,False,True,False,True,False,True,False,...,False,True,False,False,False,False,False,False,True,False


In [None]:
yr_predict1

array([1, 1, 0, ..., 1, 1, 1])

**Decision Tree**

In [None]:
from sklearn.tree import DecisionTreeClassifier

clf = DecisionTreeClassifier()

# Train Decision Tree Classifer
clf = clf.fit(xr_train1,yr_train1)

#Predict the response for test dataset
yr_pred = clf.predict(xr_test1)


In [None]:
New_model_score_r1 = clf.score(xr_test1, yr_test1)

In [None]:
print(New_model_score_r1)
print(metrics.classification_report(yr_test1, yr_pred))

0.9378194207836457
              precision    recall  f1-score   support

           0       0.92      0.94      0.93       534
           1       0.95      0.93      0.94       640

    accuracy                           0.94      1174
   macro avg       0.94      0.94      0.94      1174
weighted avg       0.94      0.94      0.94      1174



In [None]:
y = clf.prdict(xr_test1[1])

AttributeError: ignored