## Importing the Libraries

In [1]:
import pandas as pd
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from imblearn.combine import SMOTEENN

In [2]:
#Reading csv
df=pd.read_csv("tel_churn.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,SeniorCitizen,MonthlyCharges,TotalCharges,Churn,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,...,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_group_1 - 12,tenure_group_13 - 24,tenure_group_25 - 36,tenure_group_37 - 48,tenure_group_49 - 60,tenure_group_61 - 72
0,0,0,29.85,29.85,0,1,0,0,1,1,...,0,0,1,0,1,0,0,0,0,0
1,1,0,56.95,1889.5,0,0,1,1,0,1,...,0,0,0,1,0,0,1,0,0,0
2,2,0,53.85,108.15,1,0,1,1,0,1,...,0,0,0,1,1,0,0,0,0,0
3,3,0,42.3,1840.75,0,0,1,1,0,1,...,1,0,0,0,0,0,0,1,0,0
4,4,0,70.7,151.65,1,1,0,1,0,1,...,0,0,1,0,1,0,0,0,0,0


In [3]:
df=df.drop('Unnamed: 0',axis=1)

In [4]:
x=df.drop('Churn',axis=1)
x

Unnamed: 0,SeniorCitizen,MonthlyCharges,TotalCharges,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,PhoneService_No,...,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_group_1 - 12,tenure_group_13 - 24,tenure_group_25 - 36,tenure_group_37 - 48,tenure_group_49 - 60,tenure_group_61 - 72
0,0,29.85,29.85,1,0,0,1,1,0,1,...,0,0,1,0,1,0,0,0,0,0
1,0,56.95,1889.50,0,1,1,0,1,0,0,...,0,0,0,1,0,0,1,0,0,0
2,0,53.85,108.15,0,1,1,0,1,0,0,...,0,0,0,1,1,0,0,0,0,0
3,0,42.30,1840.75,0,1,1,0,1,0,1,...,1,0,0,0,0,0,0,1,0,0
4,0,70.70,151.65,1,0,1,0,1,0,0,...,0,0,1,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7027,0,84.80,1990.50,0,1,0,1,0,1,0,...,0,0,0,1,0,1,0,0,0,0
7028,0,103.20,7362.90,1,0,0,1,0,1,0,...,0,1,0,0,0,0,0,0,0,1
7029,0,29.60,346.45,1,0,0,1,0,1,1,...,0,0,1,0,1,0,0,0,0,0
7030,1,74.40,306.60,0,1,0,1,1,0,0,...,0,0,0,1,1,0,0,0,0,0


In [5]:
y=df['Churn']
y

0       0
1       0
2       1
3       0
4       1
       ..
7027    0
7028    0
7029    0
7030    1
7031    0
Name: Churn, Length: 7032, dtype: int64

In [6]:
#Train Test Split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)

**Decision Tree Classifier**

In [7]:
model_dt=DecisionTreeClassifier(criterion = "gini",random_state = 100,max_depth=6, min_samples_leaf=8)

In [8]:
model_dt.fit(x_train,y_train)

In [9]:
y_pred=model_dt.predict(x_test)
y_pred

array([0, 1, 1, ..., 0, 0, 0])

In [10]:
model_dt.score(x_test,y_test)

0.7782515991471215

In [11]:
print(classification_report(y_test, y_pred, labels=[0,1]))

              precision    recall  f1-score   support

           0       0.82      0.88      0.85      1025
           1       0.61      0.50      0.55       382

    accuracy                           0.78      1407
   macro avg       0.72      0.69      0.70      1407
weighted avg       0.77      0.78      0.77      1407



In [13]:
from imblearn.combine import SMOTEENN

# Assuming x (features) and y (labels) are already defined
sm = SMOTEENN()
X_resampled, y_resampled = sm.fit_resample(x, y)


In [14]:
xr_train,xr_test,yr_train,yr_test=train_test_split(X_resampled, y_resampled,test_size=0.2)

In [15]:
model_dt_smote=DecisionTreeClassifier(criterion = "gini",random_state = 100,max_depth=6, min_samples_leaf=8)

In [16]:
model_dt_smote.fit(xr_train,yr_train)
yr_predict = model_dt_smote.predict(xr_test)
model_score_r = model_dt_smote.score(xr_test, yr_test)
print(model_score_r)
print(metrics.classification_report(yr_test, yr_predict))

0.9294117647058824
              precision    recall  f1-score   support

           0       0.94      0.90      0.91       505
           1       0.93      0.95      0.94       685

    accuracy                           0.93      1190
   macro avg       0.93      0.92      0.93      1190
weighted avg       0.93      0.93      0.93      1190



In [17]:
print(metrics.confusion_matrix(yr_test, yr_predict))

[[452  53]
 [ 31 654]]


Now we see the accuracy of 92 percent and a very good recall,f1 score and precision for minority class.So we can see better results

Trying out some other classifiers

### Random Forest Classifier

In [18]:
from sklearn.ensemble import RandomForestClassifier

In [19]:
model_rf=RandomForestClassifier(n_estimators=100, criterion='gini', random_state = 100,max_depth=6, min_samples_leaf=8)

In [20]:
model_rf.fit(x_train,y_train)

In [21]:
y_pred=model_rf.predict(x_test)

In [22]:
model_rf.score(x_test,y_test)

0.7839374555792467

In [23]:
print(classification_report(y_test, y_pred, labels=[0,1]))

              precision    recall  f1-score   support

           0       0.82      0.90      0.86      1025
           1       0.64      0.47      0.54       382

    accuracy                           0.78      1407
   macro avg       0.73      0.69      0.70      1407
weighted avg       0.77      0.78      0.77      1407



In [25]:
from imblearn.combine import SMOTEENN

# Assuming x (features) and y (labels) are already defined
sm = SMOTEENN()
X_resampled1, y_resampled1 = sm.fit_resample(x, y)


In [26]:
xr_train1,xr_test1,yr_train1,yr_test1=train_test_split(X_resampled1, y_resampled1,test_size=0.2)

In [27]:
model_rf_smote=RandomForestClassifier(n_estimators=100, criterion='gini', random_state = 100,max_depth=6, min_samples_leaf=8)

In [28]:
model_rf_smote.fit(xr_train1,yr_train1)

In [29]:
yr_predict1 = model_rf_smote.predict(xr_test1)

In [30]:
model_score_r1 = model_rf_smote.score(xr_test1, yr_test1)

In [31]:
print(model_score_r1)
print(metrics.classification_report(yr_test1, yr_predict1))

0.9314481576692374
              precision    recall  f1-score   support

           0       0.95      0.89      0.92       508
           1       0.92      0.96      0.94       659

    accuracy                           0.93      1167
   macro avg       0.93      0.93      0.93      1167
weighted avg       0.93      0.93      0.93      1167



In [32]:
print(metrics.confusion_matrix(yr_test1, yr_predict1))

[[454  54]
 [ 26 633]]


Here also we are able to get results with Random Forest,infact better than Decision Tree

# **Performing PCA**

In [33]:
#Here we apply PCA
from sklearn.decomposition import PCA
pca = PCA(0.9)
xr_train_pca = pca.fit_transform(xr_train1)
xr_test_pca = pca.transform(xr_test1)
explained_variance = pca.explained_variance_ratio_

In [34]:
model=RandomForestClassifier(n_estimators=100, criterion='gini', random_state = 100,max_depth=6, min_samples_leaf=8)

In [35]:
model.fit(xr_train_pca,yr_train1)

In [36]:
yr_predict_pca = model.predict(xr_test_pca)

In [37]:
model_score_r_pca = model.score(xr_test_pca, yr_test1)

In [38]:
print(model_score_r_pca)
print(metrics.classification_report(yr_test1, yr_predict_pca))

0.7446443873179092
              precision    recall  f1-score   support

           0       0.73      0.65      0.69       508
           1       0.75      0.82      0.78       659

    accuracy                           0.74      1167
   macro avg       0.74      0.73      0.74      1167
weighted avg       0.74      0.74      0.74      1167



With The help of PCA we are not able toget better results,so here we finalise the model which was created by RF model.
We will use the saved model so that we can use it further

### Pickling the model

In [39]:
import pickle

In [40]:
filename = 'model.sav'


In [41]:
pickle.dump(model_rf_smote, open(filename, 'wb'))

In [42]:
load_model = pickle.load(open(filename, 'rb'))

In [43]:
model_score_r1 = load_model.score(xr_test1, yr_test1)

In [44]:
model_score_r1

0.9314481576692374

The final model is Random Forest Classifier with SMOTEENN and is now ready and dumped in model.sav, which we will be using and prepare API's so that we can access our model from UI.