In [486]:
# Importing essential libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, classification_report

In [487]:
# Loading the dataset
dataset=pd.read_csv('churn.csv')

In [488]:
dataset.head()

Unnamed: 0,State,Account Length,Area Code,Phone,Int Plan,VMail Plan,VMail Message,Day Mins,Day Calls,Day Charge,...,Eve Calls,Eve Charge,Night Mins,Night Calls,Night Charge,Intl Mins,Intl Calls,Intl Charge,CustServ Calls,Churn?
0,KS,128,415,382-4657,no,yes,25,265.1,110,45.07,...,99,16.78,244.7,91,11.01,10.0,3,2.7,1,False.
1,OH,107,415,371-7191,no,yes,26,161.6,123,27.47,...,103,16.62,254.4,103,11.45,13.7,3,3.7,1,False.
2,NJ,137,415,358-1921,no,no,0,243.4,114,41.38,...,110,10.3,162.6,104,7.32,12.2,5,3.29,0,False.
3,OH,84,408,375-9999,yes,no,0,299.4,71,50.9,...,88,5.26,196.9,89,8.86,6.6,7,1.78,2,False.
4,OK,75,415,330-6626,yes,no,0,166.7,113,28.34,...,122,12.61,186.9,121,8.41,10.1,3,2.73,3,False.


In [489]:
dataset.tail()

Unnamed: 0,State,Account Length,Area Code,Phone,Int Plan,VMail Plan,VMail Message,Day Mins,Day Calls,Day Charge,...,Eve Calls,Eve Charge,Night Mins,Night Calls,Night Charge,Intl Mins,Intl Calls,Intl Charge,CustServ Calls,Churn?
1993,IN,86,510,357-7893,no,no,0,216.3,96,36.77,...,77,22.64,214.0,110,9.63,4.5,3,1.22,0,False.
1994,MD,84,510,369-2899,no,no,0,169.5,96,28.82,...,94,13.4,98.2,70,4.42,10.6,7,2.86,0,False.
1995,NV,118,510,381-1026,no,yes,35,256.3,119,43.57,...,91,21.94,215.5,130,9.7,11.7,1,3.16,1,False.
1996,CO,89,415,388-8722,no,no,0,179.7,128,30.55,...,92,25.48,185.3,120,8.34,7.6,3,2.05,1,False.
1997,KS,93,415,418-3135,no,no,0,266.0,120,45.22,...,84,11.06,165.8,63,7.46,13.1,6,3.54,3,False.


In [490]:
# Checking the presence of empty or null Values
null_values = dataset.isnull().sum()
print(null_values)

State             0
Account Length    0
Area Code         0
Phone             0
Int Plan          0
VMail Plan        0
VMail Message     0
Day Mins          0
Day Calls         0
Day Charge        0
Eve Mins          0
Eve Calls         0
Eve Charge        0
Night Mins        0
Night Calls       0
Night Charge      0
Intl Mins         0
Intl Calls        0
Intl Charge       0
CustServ Calls    0
Churn?            0
dtype: int64


In [491]:
# Dropping the useless features
X=dataset.drop(['State','Phone','Int Plan','VMail Plan','Churn?'],axis=1)
y=dataset['Churn?']

In [492]:
X

Unnamed: 0,Account Length,Area Code,VMail Message,Day Mins,Day Calls,Day Charge,Eve Mins,Eve Calls,Eve Charge,Night Mins,Night Calls,Night Charge,Intl Mins,Intl Calls,Intl Charge,CustServ Calls
0,128,415,25,265.1,110,45.07,197.4,99,16.78,244.7,91,11.01,10.0,3,2.70,1
1,107,415,26,161.6,123,27.47,195.5,103,16.62,254.4,103,11.45,13.7,3,3.70,1
2,137,415,0,243.4,114,41.38,121.2,110,10.30,162.6,104,7.32,12.2,5,3.29,0
3,84,408,0,299.4,71,50.90,61.9,88,5.26,196.9,89,8.86,6.6,7,1.78,2
4,75,415,0,166.7,113,28.34,148.3,122,12.61,186.9,121,8.41,10.1,3,2.73,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1993,86,510,0,216.3,96,36.77,266.3,77,22.64,214.0,110,9.63,4.5,3,1.22,0
1994,84,510,0,169.5,96,28.82,157.6,94,13.40,98.2,70,4.42,10.6,7,2.86,0
1995,118,510,35,256.3,119,43.57,258.1,91,21.94,215.5,130,9.70,11.7,1,3.16,1
1996,89,415,0,179.7,128,30.55,299.8,92,25.48,185.3,120,8.34,7.6,3,2.05,1


In [493]:
X.info

<bound method DataFrame.info of       Account Length  Area Code  VMail Message  Day Mins  Day Calls  \
0                128        415             25     265.1        110   
1                107        415             26     161.6        123   
2                137        415              0     243.4        114   
3                 84        408              0     299.4         71   
4                 75        415              0     166.7        113   
...              ...        ...            ...       ...        ...   
1993              86        510              0     216.3         96   
1994              84        510              0     169.5         96   
1995             118        510             35     256.3        119   
1996              89        415              0     179.7        128   
1997              93        415              0     266.0        120   

      Day Charge  Eve Mins  Eve Calls  Eve Charge  Night Mins  Night Calls  \
0          45.07     197.4         99

In [494]:
y.info

<bound method Series.info of 0       False.
1       False.
2       False.
3       False.
4       False.
         ...  
1993    False.
1994    False.
1995    False.
1996    False.
1997    False.
Name: Churn?, Length: 1998, dtype: object>

In [495]:
# Splitting the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=42)

In [496]:
x_train.shape

(1598, 16)

In [497]:
y_train.shape

(1598,)

In [498]:
x_test.shape

(400, 16)

In [499]:
y_test.shape

(400,)

In [500]:
# Traning the Model with RandomForestClassifier
cls=RandomForestClassifier()
cls.fit(x_train,y_train)

In [501]:
y_pred=cls.predict(x_test)

In [502]:
print(y_test.shape)
print(y_pred.shape)

(400,)
(400,)


In [503]:
len(y_pred)==len(y_test)

True

In [504]:
print('Accuracy_score :',accuracy_score(y_test,y_pred))

Accuracy_score : 0.9075


In [505]:
conf_matrix = confusion_matrix(y_test, y_pred)
print('Confusion Matrix :\n', conf_matrix)

Confusion Matrix :
 [[345   2]
 [ 35  18]]


In [506]:
print("Precision : ",precision_score(y_test,y_pred,average=None))

Precision :  [0.90789474 0.9       ]


In [507]:
print("Recall : ",recall_score(y_test,y_pred,average=None))

Recall :  [0.99423631 0.33962264]


In [508]:
print("F1 score : ",f1_score(y_test,y_pred,average=None))

F1 score :  [0.94910591 0.49315068]


In [509]:
print("The Classification report is : ",classification_report(y_test,y_pred))

The Classification report is :                precision    recall  f1-score   support

      False.       0.91      0.99      0.95       347
       True.       0.90      0.34      0.49        53

    accuracy                           0.91       400
   macro avg       0.90      0.67      0.72       400
weighted avg       0.91      0.91      0.89       400



In [510]:
# Prediction with new data sample
x_new=np.array([[120,19,13,356,1,42,0,10,34,1,10,15,0,5,15,3]])

In [511]:
print(x_new.shape)

(1, 16)


In [513]:
prediction = cls.predict(x_new)



In [514]:
print("Churn status =",prediction)

Churn status = ['False.']
