# Importing Data

In [1]:
import pandas as pd 
import numpy as np

In [2]:
data=pd.read_csv(r'classification.csv')

In [3]:
data.head()

Unnamed: 0,Age,EstimatedSalary,Purchased
0,19,19000,0
1,35,20000,0
2,26,43000,0
3,27,57000,0
4,19,76000,0


# Scaling and splitting into training and testing data

In [43]:
x=data.iloc[:,:-1]
y=data.iloc[:,-1]
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x=scaler.fit_transform(x)
from sklearn.model_selection import train_test_split

In [44]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42,stratify=y)

# Creating models and hyperparameter tuning for all models using GridSearchCV

## KNN Classifier

In [45]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

In [46]:
knn = KNeighborsClassifier()

In [47]:
params_knn={'n_neighbors':np.arange(1,25)}
knn_gs=GridSearchCV(knn,params_knn,cv=5)

In [48]:
knn_gs.fit(X_train,y_train)

In [49]:
knn_best=knn_gs.best_estimator_

## Random Forest Classifier

In [50]:
from sklearn.ensemble import RandomForestClassifier
rf= RandomForestClassifier()

In [51]:
params_rf={'n_estimators':[50,100,200],'criterion' :["gini", "entropy", "log_loss"] }

In [52]:
rf_gs=GridSearchCV(rf,params_rf,cv=5)
rf_gs.fit(X_train,y_train)

In [53]:
rf_best=rf_gs.best_estimator_

## Logistic Regression

In [54]:
from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression()

In [55]:
log_reg.fit(X_train,y_train)

In [56]:
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()

In [57]:
nb.fit(X_train,y_train)

## SVC

In [58]:
from sklearn.svm import SVC

In [59]:
svc=SVC()
params_svc={'kernel':['linear', 'poly', 'rbf', 'sigmoid']}
svc_gs= GridSearchCV(svc,params_svc,cv=5)                  

In [60]:
svc_gs.fit(X_train,y_train)

In [61]:
svc_best=svc_gs.best_estimator_

## Majority Voting using VotingClassifier

In [62]:
from sklearn.ensemble import VotingClassifier

estimators=[('knn',knn_best),('rf',rf_best),('log_reg',log_reg),('nb',nb),('svc',svc_best)]

ensemble = VotingClassifier(estimators, voting='hard')

In [63]:
ensemble.fit(X_train,y_train)

In [64]:
ensemble.score(X_test,y_test)

0.9242424242424242

In [65]:
knn_best.score(X_test,y_test)

0.9318181818181818

In [66]:
rf_best.score(X_test,y_test)

0.9090909090909091

In [67]:
log_reg.score(X_test,y_test)

0.8484848484848485

In [68]:
nb.score(X_test,y_test)

0.8787878787878788

In [69]:
svc_best.score(X_test,y_test)

0.9166666666666666

### The ensemble model performs with an accuracy if 92%, which is higher than all the models except for the knn model