In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.preprocessing import LabelEncoder
Label = LabelEncoder()
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score


import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv')
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


# Basic data exploration

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   customerID        7043 non-null   object 
 1   gender            7043 non-null   object 
 2   SeniorCitizen     7043 non-null   int64  
 3   Partner           7043 non-null   object 
 4   Dependents        7043 non-null   object 
 5   tenure            7043 non-null   int64  
 6   PhoneService      7043 non-null   object 
 7   MultipleLines     7043 non-null   object 
 8   InternetService   7043 non-null   object 
 9   OnlineSecurity    7043 non-null   object 
 10  OnlineBackup      7043 non-null   object 
 11  DeviceProtection  7043 non-null   object 
 12  TechSupport       7043 non-null   object 
 13  StreamingTV       7043 non-null   object 
 14  StreamingMovies   7043 non-null   object 
 15  Contract          7043 non-null   object 
 16  PaperlessBilling  7043 non-null   object 


In [4]:
df.describe(include = 'all')

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
count,7043,7043,7043.0,7043,7043,7043.0,7043,7043,7043,7043,...,7043,7043,7043,7043,7043,7043,7043,7043.0,7043.0,7043
unique,7043,2,,2,2,,2,3,3,3,...,3,3,3,3,3,2,4,,6531.0,2
top,5364-EVNIB,Male,,No,No,,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,,20.2,No
freq,1,3555,,3641,4933,,6361,3390,3096,3498,...,3095,3473,2810,2785,3875,4171,2365,,11.0,5174
mean,,,0.162147,,,32.371149,,,,,...,,,,,,,,64.761692,,
std,,,0.368612,,,24.559481,,,,,...,,,,,,,,30.090047,,
min,,,0.0,,,0.0,,,,,...,,,,,,,,18.25,,
25%,,,0.0,,,9.0,,,,,...,,,,,,,,35.5,,
50%,,,0.0,,,29.0,,,,,...,,,,,,,,70.35,,
75%,,,0.0,,,55.0,,,,,...,,,,,,,,89.85,,


In [5]:
df.isna().sum()

customerID          0
gender              0
SeniorCitizen       0
Partner             0
Dependents          0
tenure              0
PhoneService        0
MultipleLines       0
InternetService     0
OnlineSecurity      0
OnlineBackup        0
DeviceProtection    0
TechSupport         0
StreamingTV         0
StreamingMovies     0
Contract            0
PaperlessBilling    0
PaymentMethod       0
MonthlyCharges      0
TotalCharges        0
Churn               0
dtype: int64

In [6]:
df.isnull().sum()

customerID          0
gender              0
SeniorCitizen       0
Partner             0
Dependents          0
tenure              0
PhoneService        0
MultipleLines       0
InternetService     0
OnlineSecurity      0
OnlineBackup        0
DeviceProtection    0
TechSupport         0
StreamingTV         0
StreamingMovies     0
Contract            0
PaperlessBilling    0
PaymentMethod       0
MonthlyCharges      0
TotalCharges        0
Churn               0
dtype: int64

In [7]:
df.shape

(7043, 21)

In [8]:
df1 = df.drop_duplicates()

In [9]:
df1.shape

(7043, 21)

In [10]:
ND = df1.select_dtypes(np.number)
CD = df1.select_dtypes(object)

In [11]:
ND.head()

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges
0,0,1,29.85
1,0,34,56.95
2,0,2,53.85
3,0,45,42.3
4,0,2,70.7


In [12]:
CD.head()

Unnamed: 0,customerID,gender,Partner,Dependents,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,TotalCharges,Churn
0,7590-VHVEG,Female,Yes,No,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,No
1,5575-GNVDE,Male,No,No,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,1889.5,No
2,3668-QPYBK,Male,No,No,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,108.15,Yes
3,7795-CFOCW,Male,No,No,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),1840.75,No
4,9237-HQITU,Female,No,No,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,151.65,Yes


In [13]:
CD.nunique()  #BIKE.nunique()

customerID          7043
gender                 2
Partner                2
Dependents             2
PhoneService           2
MultipleLines          3
InternetService        3
OnlineSecurity         3
OnlineBackup           3
DeviceProtection       3
TechSupport            3
StreamingTV            3
StreamingMovies        3
Contract               3
PaperlessBilling       2
PaymentMethod          4
TotalCharges        6531
Churn                  2
dtype: int64

In [14]:
CD1 = CD.drop(['customerID'], axis = 1)

In [15]:
CD1.nunique() 

gender                 2
Partner                2
Dependents             2
PhoneService           2
MultipleLines          3
InternetService        3
OnlineSecurity         3
OnlineBackup           3
DeviceProtection       3
TechSupport            3
StreamingTV            3
StreamingMovies        3
Contract               3
PaperlessBilling       2
PaymentMethod          4
TotalCharges        6531
Churn                  2
dtype: int64

In [16]:
# lets drop Total charges for now.
# we need to round up the TotalCharges
CD2 = CD1.drop(['TotalCharges'], axis = 1)
TC = CD1['TotalCharges']

In [17]:
#Tc = pd.to_numeric(TC)
#TC.astype(int) #.astype(float)

In [18]:
CD2.nunique() 

gender              2
Partner             2
Dependents          2
PhoneService        2
MultipleLines       3
InternetService     3
OnlineSecurity      3
OnlineBackup        3
DeviceProtection    3
TechSupport         3
StreamingTV         3
StreamingMovies     3
Contract            3
PaperlessBilling    2
PaymentMethod       4
Churn               2
dtype: int64

In [19]:
CD3 = CD2.apply(Label.fit_transform)
CD3.head()

Unnamed: 0,gender,Partner,Dependents,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,Churn
0,0,1,0,0,1,0,0,2,0,0,0,0,0,1,2,0
1,1,0,0,1,0,0,2,0,2,0,0,0,1,0,3,0
2,1,0,0,1,0,0,2,2,0,0,0,0,0,1,3,1
3,1,0,0,0,1,0,2,0,2,2,0,0,1,0,0,0
4,0,0,0,1,0,1,0,0,0,0,0,0,0,1,2,1


In [20]:
data = pd.concat([ND, CD3, TC], axis = 1 )
data.head()

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges,gender,Partner,Dependents,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,Churn,TotalCharges
0,0,1,29.85,0,1,0,0,1,0,0,2,0,0,0,0,0,1,2,0,29.85
1,0,34,56.95,1,0,0,1,0,0,2,0,2,0,0,0,1,0,3,0,1889.5
2,0,2,53.85,1,0,0,1,0,0,2,2,0,0,0,0,0,1,3,1,108.15
3,0,45,42.3,1,0,0,0,1,0,2,0,2,2,0,0,1,0,0,0,1840.75
4,0,2,70.7,0,0,0,1,0,1,0,0,0,0,0,0,0,1,2,1,151.65


In [21]:
Data = data.round()

In [22]:
Data1 = Data.drop(['TotalCharges'], axis =1)
Data1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7043 entries, 0 to 7042
Data columns (total 19 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   SeniorCitizen     7043 non-null   int64  
 1   tenure            7043 non-null   int64  
 2   MonthlyCharges    7043 non-null   float64
 3   gender            7043 non-null   int32  
 4   Partner           7043 non-null   int32  
 5   Dependents        7043 non-null   int32  
 6   PhoneService      7043 non-null   int32  
 7   MultipleLines     7043 non-null   int32  
 8   InternetService   7043 non-null   int32  
 9   OnlineSecurity    7043 non-null   int32  
 10  OnlineBackup      7043 non-null   int32  
 11  DeviceProtection  7043 non-null   int32  
 12  TechSupport       7043 non-null   int32  
 13  StreamingTV       7043 non-null   int32  
 14  StreamingMovies   7043 non-null   int32  
 15  Contract          7043 non-null   int32  
 16  PaperlessBilling  7043 non-null   int32  


# Train Test Split

In [23]:
X = Data1.drop(['Churn'], axis = 1)
y = Data1[['Churn']]

In [24]:
X_train,X_test,y_train,y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [25]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(5634, 18)
(1409, 18)
(5634, 1)
(1409, 1)


In [26]:
from sklearn.linear_model import LogisticRegression
Lr =  LogisticRegression()
Lr.fit(X_train, y_train)
y_pred = Lr.predict(X_test)
print("Logistic Regression train score:",Lr.score(X_train,y_train))
print("Logistic Regression test score:",Lr.score(X_test,y_test))

Logistic Regression train score: 0.8061767838125665
Logistic Regression test score: 0.7963094393186657


In [27]:
print("precision_score is: ", precision_score(y_test,y_pred, average = 'weighted'))
print("recall_score is :", recall_score(y_test,y_pred, average = 'weighted'))
print("accuracy_score is:     ", accuracy_score(y_test,y_pred))
ac1 = accuracy_score(y_test,y_pred)

precision_score is:  0.7867506737274975
recall_score is : 0.7963094393186657
accuracy_score is:      0.7963094393186657


In [28]:
from sklearn.neighbors import KNeighborsClassifier

KNN = KNeighborsClassifier()
KNN.fit(X_train,y_train)
y_pred2 = KNN.predict(X_test)
print("KNN train score:",KNN.score(X_train,y_train))
print("KNN test score:",KNN.score(X_test,y_test))

KNN train score: 0.8432729854455094
KNN test score: 0.7622427253371186


In [29]:
print("precision_score is: ", precision_score(y_test,y_pred2, average = 'weighted'))
print("recall_score is :", recall_score(y_test,y_pred2, average = 'weighted'))
print("f1_score is:     ", f1_score(y_test,y_pred2,average='weighted'))
print("accuracy_score is:     ", accuracy_score(y_test,y_pred2))
ac2 = accuracy_score(y_test,y_pred2)

precision_score is:  0.7538540166042594
recall_score is : 0.7622427253371186
f1_score is:      0.7572719615837388
accuracy_score is:      0.7622427253371186


In [30]:
from sklearn.svm import SVC

SVC = SVC()
SVC.fit(X_train,y_train)
y_pred3 = SVC.predict(X_test)
print("SVC train score:",SVC.score(X_train,y_train))
print("SVC test score:",SVC.score(X_test,y_test))

SVC train score: 0.7912673056443025
SVC test score: 0.7828246983676366


In [31]:
print("precision_score is: ", precision_score(y_test,y_pred3, average = 'weighted'))
print("recall_score is :", recall_score(y_test,y_pred3, average = 'weighted'))
print("f1_score is:     ", f1_score(y_test,y_pred3,average='weighted'))
print("accuracy_score is:     ", accuracy_score(y_test,y_pred3))
ac3 =  accuracy_score(y_test,y_pred3)

precision_score is:  0.7650224158357783
recall_score is : 0.7828246983676366
f1_score is:      0.7611874770891676
accuracy_score is:      0.7828246983676366


In [32]:
from sklearn.naive_bayes import GaussianNB

NB = GaussianNB()
NB.fit(X_train,y_train)
y_pred4 = NB.predict(X_test)
print("NB train score:",NB.score(X_train,y_train))
print("NB test score:",NB.score(X_test,y_test))

NB train score: 0.7589634362797302
NB test score: 0.7437899219304471


In [33]:
print("precision_score is: ", precision_score(y_test,y_pred4, average = 'weighted'))
print("recall_score is :", recall_score(y_test,y_pred4, average = 'weighted'))
print("f1_score is:     ", f1_score(y_test,y_pred4,average='weighted'))
print("accuracy_score is:     ", accuracy_score(y_test,y_pred4))
ac4 = accuracy_score(y_test,y_pred4)

precision_score is:  0.7827587020172057
recall_score is : 0.7437899219304471
f1_score is:      0.7553845711656383
accuracy_score is:      0.7437899219304471


In [34]:
from sklearn.tree import DecisionTreeClassifier

DTC = DecisionTreeClassifier(criterion='gini', splitter='best', 
      max_depth=5000, min_samples_split=2, min_samples_leaf=1, 
      min_weight_fraction_leaf=0.0, max_features=None, 
      random_state=None, max_leaf_nodes=None, 
      min_impurity_decrease=0, min_impurity_split=None, 
      class_weight=None, ccp_alpha=0)

DTC.fit(X_train,y_train)
y_pred5 = DTC.predict(X_test)
print("DTC train score:",DTC.score(X_train,y_train))
print("DTC test score:",DTC.score(X_test,y_test))

DTC train score: 0.9889953851615193
DTC test score: 0.7196593328601846


In [35]:
print("precision_score is: ", precision_score(y_test,y_pred5, average = 'weighted'))
print("recall_score is :", recall_score(y_test,y_pred5, average = 'weighted'))
print("f1_score is:     ", f1_score(y_test,y_pred5,average='weighted'))
print("accuracy_score is:     ", accuracy_score(y_test,y_pred5))
ac5 = accuracy_score(y_test,y_pred5)

precision_score is:  0.7254022427843918
recall_score is : 0.7196593328601846
f1_score is:      0.722341150065372
accuracy_score is:      0.7196593328601846


In [36]:
from sklearn.ensemble import RandomForestClassifier

RFC = RandomForestClassifier(n_estimators=500, criterion='gini', max_depth=None, 
                             min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, 
                             max_features='log2', max_leaf_nodes=None, min_impurity_decrease=0.0, 
                             min_impurity_split=None, bootstrap=True, oob_score=False, n_jobs=-1, 
                             random_state=None, verbose=0, warm_start=False, class_weight=None, 
                             ccp_alpha=0.0, max_samples=None)

RFC.fit(X_train,y_train)
y_pred6 = RFC.predict(X_test)
print("RFC train score:",RFC.score(X_train,y_train))
print("RFC test score:",RFC.score(X_test,y_test))

RFC train score: 0.9889953851615193
RFC test score: 0.7750177430801988


In [37]:
print("precision_score is: ", precision_score(y_test,y_pred6, average = 'weighted'))
print("recall_score is :", recall_score(y_test,y_pred6, average = 'weighted'))
print("f1_score is:     ", f1_score(y_test,y_pred6,average='weighted'))
print("accuracy_score is:     ", accuracy_score(y_test,y_pred6))
ac6 =  accuracy_score(y_test,y_pred6)

precision_score is:  0.7621117112941853
recall_score is : 0.7750177430801988
f1_score is:      0.7658858209241406
accuracy_score is:      0.7750177430801988


In [38]:
from sklearn.ensemble import BaggingClassifier

BC = BaggingClassifier(base_estimator = DTC)
BC.fit(X_train,y_train)
y_pred7 = BC.predict(X_test)
print("BC train score:",BC.score(X_train,y_train))
print("BC test score:",BC.score(X_test,y_test))

BC train score: 0.971068512602059
BC test score: 0.7714691270404542


In [39]:
print("precision_score is: ", precision_score(y_test,y_pred7, average = 'macro'))
print("recall_score is :", recall_score(y_test,y_pred7, average = 'macro'))
print("f1_score is:     ", f1_score(y_test,y_pred7,average='macro'))
print("accuracy_score is:     ", accuracy_score(y_test,y_pred7))
ac7 = accuracy_score(y_test,y_pred7)

precision_score is:  0.700326408590342
recall_score is : 0.6714201436745604
f1_score is:      0.6821222584062463
accuracy_score is:      0.7714691270404542


In [40]:
from sklearn.ensemble import AdaBoostClassifier

ADB = AdaBoostClassifier()
ADB.fit(X_train, y_train)
y_pred8 = ADB.predict(X_test)
print("ADB train score:",ADB.score(X_train,y_train))
print("ADB test score:",ADB.score(X_test,y_test))

ADB train score: 0.81008164714235
ADB test score: 0.78708303761533


In [41]:
print("precision_score is: ", precision_score(y_test,y_pred8, average = 'weighted'))
print("recall_score is :", recall_score(y_test,y_pred8, average = 'weighted'))
print("f1_score is:     ", f1_score(y_test,y_pred8,average='weighted'))
print("accuracy_score is:     ", accuracy_score(y_test,y_pred8))
ac8 = accuracy_score(y_test,y_pred8)

precision_score is:  0.779402562111041
recall_score is : 0.78708303761533
f1_score is:      0.7822870696969536
accuracy_score is:      0.78708303761533


In [42]:
from sklearn.linear_model import Perceptron
PR = Perceptron()
PR.fit(X_train,y_train)
y_pred9 = PR.predict(X_test)
print("PR train score:",PR.score(X_train,y_train))
print("PR test score:",PR.score(X_test,y_test))

PR train score: 0.7335818246361377
PR test score: 0.7388218594748048


In [43]:
print("accuracy_score is:     ", accuracy_score(y_test,y_pred9))
ac9 = accuracy_score(y_test,y_pred9)

accuracy_score is:      0.7388218594748048


In [44]:
from sklearn.ensemble import GradientBoostingClassifier

GBC = GradientBoostingClassifier()
GBC.fit(X_train, y_train)
y_pred10 = GBC.predict(X_test)
print("GBC train score:",GBC.score(X_train,y_train))
print("GBC test score:",GBC.score(X_test,y_test))

GBC train score: 0.8237486687965921
GBC test score: 0.7892122072391767


In [45]:
print("precision_score is: ", precision_score(y_test,y_pred10, average = 'weighted'))
print("recall_score is :", recall_score(y_test,y_pred10, average = 'weighted'))
print("f1_score is:     ", f1_score(y_test,y_pred10 ,average='weighted'))
print("accuracy_score is:     ", accuracy_score(y_test,y_pred10))
ac10 = accuracy_score(y_test,y_pred10)

precision_score is:  0.7768494244277035
recall_score is : 0.7892122072391767
f1_score is:      0.779585147036755
accuracy_score is:      0.7892122072391767


In [46]:
from sklearn.linear_model import SGDClassifier

SGD = SGDClassifier()
SGD.fit(X_train,y_train)
y_pred11 = SGD.predict(X_test)
print("SGD train score:",SGD.score(X_train,y_train))
print("SGD test score:",SGD.score(X_test,y_test))

SGD train score: 0.8059992900248492
SGD test score: 0.7856635911994322


In [47]:
print("precision_score is: ", precision_score(y_test,y_pred11, average = 'weighted'))
print("recall_score is :", recall_score(y_test,y_pred11, average = 'weighted'))
print("f1_score is:     ", f1_score(y_test,y_pred11 ,average='weighted'))
print("accuracy_score is:     ", accuracy_score(y_test,y_pred11))
ac11 = accuracy_score(y_test,y_pred11)

precision_score is:  0.7725839034632751
recall_score is : 0.7856635911994322
f1_score is:      0.7754558761282052
accuracy_score is:      0.7856635911994322


In [48]:
Models = pd.DataFrame({
    'Model' : ['Logistic Regression','KNN or k-Nearest Neighbors','Support Vector Machines',
              'Naive Bayes classifier', 'Decision Tree', 'Random Forrest', 'BaggingClassifier',
              'AdaBoostClassifier','Perceptron','Gradient Boosting Classifier',
              'Stochastic Gradient Descent'],
    'Score' : [ac1,ac2,ac3,ac4,ac5,ac6,ac7,ac8,ac9,ac10,ac11]
})

Models.sort_values(by = 'Score', ascending = False)

Unnamed: 0,Model,Score
0,Logistic Regression,0.796309
9,Gradient Boosting Classifier,0.789212
7,AdaBoostClassifier,0.787083
10,Stochastic Gradient Descent,0.785664
2,Support Vector Machines,0.782825
5,Random Forrest,0.775018
6,BaggingClassifier,0.771469
1,KNN or k-Nearest Neighbors,0.762243
3,Naive Bayes classifier,0.74379
8,Perceptron,0.738822


In [49]:
from sklearn.ensemble import VotingClassifier
VotingClassifierModel = VotingClassifier(estimators=[('Logistic model',Lr),('KNN',KNN),('Support Vector Machines',SVC),
                                                     ('Naive Bayes classifier',NB),('Decision Tree',DTC),('Random Forrest',RFC),
                                                     ('BaggingClassifier',BC),('AdaBoostClassifier',ADB),('Perceptron',PR),
                                                    ('Gradient Boosting Classifier',GBC), ('Stochastic Gradient Descent',SGD)],
voting='hard')
VotingClassifierModel.fit(X_test, y_test)
print('VotingClassifierModel Train Score is : ' , VotingClassifierModel.score(X_train, y_train))
print('VotingClassifierModel Test Score is : ' , VotingClassifierModel.score(X_test, y_test))

VotingClassifier(estimators=[('Logistic model', LogisticRegression()),
                             ('KNN', KNeighborsClassifier()),
                             ('Support Vector Machines', SVC()),
                             ('Naive Bayes classifier', GaussianNB()),
                             ('Decision Tree',
                              DecisionTreeClassifier(ccp_alpha=0,
                                                     max_depth=5000,
                                                     min_impurity_decrease=0)),
                             ('Random Forrest',
                              RandomForestClassifier(max_features='log2',
                                                     n_estimators=500,
                                                     n_jobs=-1)),
                             ('BaggingClassifier',
                              BaggingClassifier(base_estimator=DecisionTreeClassifier(ccp_alpha=0,
                                                            

In [51]:
y_pred12 = VotingClassifierModel.predict(X_test)
print('Predicted Value for VotingClassifierModel is : ' , y_pred12)

Predicted Value for VotingClassifierModel is :  [0 0 0 ... 0 0 0]


In [52]:
print("precision_score is: ", precision_score(y_test,y_pred12, average = 'weighted'))
print("recall_score is :", recall_score(y_test,y_pred12, average = 'weighted'))
print("f1_score is:     ", f1_score(y_test,y_pred12 ,average='weighted'))
print("accuracy_score is:     ", accuracy_score(y_test,y_pred12))
ac12 = accuracy_score(y_test,y_pred12)

precision_score is:  0.8736323601348445
recall_score is : 0.872959545777147
f1_score is:      0.8641427866868001
accuracy_score is:      0.872959545777147


In [53]:
Models = pd.DataFrame({
    'Model' : ['Logistic Regression','KNN or k-Nearest Neighbors','Support Vector Machines',
              'Naive Bayes classifier', 'Decision Tree', 'Random Forrest', 'BaggingClassifier',
              'AdaBoostClassifier','Perceptron','Gradient Boosting Classifier',
              'Stochastic Gradient Descent','VotingClassifier'],
    'Score' : [ac1,ac2,ac3,ac4,ac5,ac6,ac7,ac8,ac9,ac10,ac11,ac12]
})

Models.sort_values(by = 'Score', ascending = False)

Unnamed: 0,Model,Score
11,VotingClassifier,0.87296
0,Logistic Regression,0.796309
9,Gradient Boosting Classifier,0.789212
7,AdaBoostClassifier,0.787083
10,Stochastic Gradient Descent,0.785664
2,Support Vector Machines,0.782825
5,Random Forrest,0.775018
6,BaggingClassifier,0.771469
1,KNN or k-Nearest Neighbors,0.762243
3,Naive Bayes classifier,0.74379
