# Import packages

In [54]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

#  Load Dataset

In [2]:
 # URL of the dataset
url = 'https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/master/data/Telco-Customer-Churn.csv'

# Load the dataset
data = pd.read_csv(url)

In [4]:
data.drop(columns=['customerID'],inplace=True)

In [5]:
data.head(2)

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,Male,0,No,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No


In [20]:
data["TotalCharges"]=data["TotalCharges"].replace(" ",np.nan)
data["TotalCharges"]=data["TotalCharges"].astype("float64")

In [21]:
data.dropna(inplace=True)

# Saperate Feature and Label

In [22]:
feature=data.drop(columns=['Churn'])
label=data['Churn']

# Encode label

In [23]:
label=label.map({
    "Yes":1,
    "No":0
})

# Train Test split

In [24]:
from sklearn.model_selection import train_test_split

In [25]:
x_train,x_test,y_train,y_test=train_test_split(feature,label,test_size=0.2,random_state=43)

In [26]:
x_train.shape

(5625, 19)

In [27]:
# Build a Numerical pipeline

In [29]:
num_col=feature.select_dtypes("number").columns
cat_col=feature.select_dtypes("object").columns

In [30]:
cat_col

Index(['gender', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines',
       'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection',
       'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract',
       'PaperlessBilling', 'PaymentMethod'],
      dtype='object')

In [32]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler,OneHotEncoder

In [33]:
num_pipe=Pipeline(steps=[
    ('Scale',StandardScaler())
])
num_pipe

In [34]:
cat_pipe=Pipeline(steps=[
    ('encode',OneHotEncoder(sparse_output=False,drop='first',handle_unknown='ignore'))
])
cat_pipe

# Build transformer

In [35]:
from sklearn.compose import ColumnTransformer

In [37]:
proces=ColumnTransformer(transformers=[
    ("num_sacle",num_pipe,num_col),
    ("encode",cat_pipe,cat_col)
],remainder="passthrough")
proces

In [38]:
x_train_trans=proces.fit_transform(x_train)
x_test_trans=proces.transform(x_test)

# Fit the models

In [67]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

In [68]:
estimatores=[
    ("LR",LogisticRegression()),
    ("tree",DecisionTreeClassifier()),
    ("SVC",SVC())
]
estimatores

[('LR', LogisticRegression()),
 ('tree', DecisionTreeClassifier()),
 ('SVC', SVC())]

In [69]:
from sklearn.model_selection import cross_val_score

In [70]:
for est in estimatores:
    x=cross_val_score(est[1],x_train_trans,y_train,cv=5,scoring="accuracy").mean()
    print(est[0],x)

LR 0.8053333333333335
tree 0.7235555555555556
SVC 0.7998222222222223


# Now apply voting classifier

In [71]:
from sklearn.ensemble import VotingClassifier

# Hard Voting

In [72]:
clf=VotingClassifier(estimatores,voting="hard",n_jobs=-1)
clf

In [73]:
cross_val_score(clf,x_train_trans,y_train,cv=5,scoring='accuracy').mean()

0.8007111111111112

- **We can get 80% accracy and we can also see that LogisticRegression also give me the `accuracy` of `80%`**
- **Now Try Soft voting and see the result again**

# Soft Voting

In [74]:
clf2=VotingClassifier(estimatores,voting="soft",n_jobs=-1)
clf2

In [75]:
cross_val_score(clf2,x_train_trans,y_train,cv=5,scoring='accuracy').mean()

nan

**This error occur b/c SCV doesnot support probality prediction but soft voting work on probolity**

# Weights

In [78]:
for i in range(1,4):
    for j in range(1,4):
        for k in range(1,4):
            clf3=VotingClassifier(estimatores,voting="hard",n_jobs=-1,weights=[i,j,k])
            x=cross_val_score(clf3,x_train_trans,y_train,scoring='accuracy',cv=5).mean()
            print(f"for i={i} j={j} k={k}  result =>  {x}")

for i=1 j=1 k=1  result =>  0.7998222222222223
for i=1 j=1 k=2  result =>  0.8007111111111112
for i=1 j=1 k=3  result =>  0.7998222222222223
for i=1 j=2 k=1  result =>  0.7845333333333333
for i=1 j=2 k=2  result =>  0.8010666666666666
for i=1 j=2 k=3  result =>  0.8001777777777777
for i=1 j=3 k=1  result =>  0.7217777777777779
for i=1 j=3 k=2  result =>  0.7847111111111111
for i=1 j=3 k=3  result =>  0.8008888888888889
for i=2 j=1 k=1  result =>  0.8021333333333333
for i=2 j=1 k=2  result =>  0.8001777777777779
for i=2 j=1 k=3  result =>  0.8003555555555556
for i=2 j=2 k=1  result =>  0.8007111111111112
for i=2 j=2 k=2  result =>  0.8014222222222221
for i=2 j=2 k=3  result =>  0.7998222222222223
for i=2 j=3 k=1  result =>  0.7861333333333334
for i=2 j=3 k=2  result =>  0.8
for i=2 j=3 k=3  result =>  0.8001777777777779
for i=3 j=1 k=1  result =>  0.8053333333333335
for i=3 j=1 k=2  result =>  0.8019555555555555
for i=3 j=1 k=3  result =>  0.8010666666666667
for i=3 j=2 k=1  result =>  

**we can see that we can get `80.5%` accuracy at this point `for i=3 j=1 k=1  result =>  0.8053333333333335`**