In [7]:
import pandas as pd
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV

In [2]:
X, y = make_classification(n_samples=1000, n_features=20, n_informative=15, n_redundant=5, n_classes=2, random_state=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

In [4]:
gnb = GaussianNB()
gnb.fit(X_train, y_train)
gnb_score = cross_val_score(gnb, X_train, y_train, cv=5).mean()
print(f'GaussianNB Score: {gnb_score}')

svm = SVC(probability=True)
svm.fit(X_train, y_train)
svm_score = cross_val_score(svm, X_train, y_train, cv=5).mean()
print(f'SVM Score: {svm_score}')

rf = RandomForestClassifier()
rf.fit(X_train, y_train)
rf_score = cross_val_score(rf, X_train, y_train, cv=5).mean()
print(f'RandomForest Score: {rf_score}')

dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
dt_score = cross_val_score(dt, X_train, y_train, cv=5).mean()
print(f'DecisionTree Score: {dt_score}')

GaussianNB Score: 0.8271428571428571
SVM Score: 0.9514285714285714
RandomForest Score: 0.9071428571428571
DecisionTree Score: 0.7971428571428572


In [5]:
voting = VotingClassifier(estimators=[('gnb', gnb), ('svm', svm), ('rf', rf), ('dt', dt)], voting='soft')
voting.fit(X_train, y_train)

score = cross_val_score(voting, X_train, y_train, cv=5).mean()
print(f'Voting Score: {score}')
accuracy = voting.score(X_test, y_test)
print(f'Voting Accuracy: {accuracy}')

Voting Score: 0.9028571428571428
Voting Accuracy: 0.9033333333333333


In [8]:
parag_grid = {
    'voting': ['soft', 'hard'],
    'weights': [[1, 1, 1, 1], [1, 2, 3, 4], [4, 3, 2, 1], [2, 2, 1, 1]]
}

vc2 = GridSearchCV(estimator=voting, param_grid=parag_grid, cv=5, n_jobs=-1, verbose=1, scoring='accuracy')
vc2.fit(X_train, y_train)

print(f'Best Score: {vc2.best_score_}')
print(f'Best Parameters: {vc2.best_params_}')

accuracy = vc2.score(X_test, y_test)
print(f'Voting Accuracy: {accuracy}')
# Fitting 5 folds for each of 8 candidates, totalling 40 fits
# Best Score: 0.9228571428571429
# Best Parameters: {'voting': 'hard', 'weights': [1, 2, 3, 4]}
# Voting Accuracy: 0.9133333333333333

Fitting 5 folds for each of 8 candidates, totalling 40 fits
Best Score: 0.9228571428571429
Best Parameters: {'voting': 'hard', 'weights': [1, 2, 3, 4]}
Voting Accuracy: 0.9133333333333333
