In [9]:
import numpy as np
import pandas as pd
import pickle
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import datasets
from sklearn.ensemble import VotingClassifier, BaggingClassifier, RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier

In [7]:
data_breast_cancer = datasets.load_breast_cancer(as_frame=True)

X = data_breast_cancer['data'][['mean texture', 'mean symmetry']]
y = data_breast_cancer['target']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [44]:
# Voting Classifier

tree_clf = DecisionTreeClassifier()
reg_log = LogisticRegression()
knn_clf = KNeighborsClassifier()

voting_clf_hard = VotingClassifier(
    estimators=[('tc', tree_clf),
                ('rl', reg_log), 
                ('kc', knn_clf)],
    voting='hard'
)
voting_clf_soft = VotingClassifier(
    estimators=[('tc', tree_clf),
                ('rl', reg_log), 
                ('kc', knn_clf)],
    voting='soft'
)

models = {tree_clf : "DecisionTreeClassifier", reg_log : "LogisticRegression", knn_clf : "KNeighborsClassifier", voting_clf_hard : "VotingClassifierHard", voting_clf_soft : "VotingClassifierSoft"}
acc_results = []

for clf, name in models.items():
    clf.fit(X_train, y_train)
    y_pred_train = clf.predict(X_train)
    y_pred_test = clf.predict(X_test)
    acc_train = accuracy_score(y_train, y_pred_train)
    acc_test = accuracy_score(y_test, y_pred_test)
    acc_results.append((acc_train, acc_test))
    print(f'==== {name} ==== \ntrain data: {acc_train}, \ntest data: {acc_test}\n')

==== DecisionTreeClassifier ==== 
train data: 1.0, 
test data: 0.6491228070175439

==== LogisticRegression ==== 
train data: 0.7296703296703296, 
test data: 0.6754385964912281

==== KNeighborsClassifier ==== 
train data: 0.7736263736263737, 
test data: 0.6842105263157895

==== VotingClassifierHard ==== 
train data: 0.8395604395604396, 
test data: 0.7105263157894737

==== VotingClassifierSoft ==== 
train data: 0.9648351648351648, 
test data: 0.6754385964912281



In [51]:
with open("acc_vote.pkl", "wb") as f:
    pickle.dump(acc_results, f)
    
with open("vote.pkl", "wb") as f:
    pickle.dump(list(models.keys()), f)

with open("acc_vote.pkl", "rb") as f:
    print(pickle.load(f))
    
with open("vote.pkl", "rb") as f:
    print(pickle.load(f))

[(1.0, 0.6491228070175439), (0.7296703296703296, 0.6754385964912281), (0.7736263736263737, 0.6842105263157895), (0.8395604395604396, 0.7105263157894737), (0.9648351648351648, 0.6754385964912281)]
[DecisionTreeClassifier(), LogisticRegression(), KNeighborsClassifier(), VotingClassifier(estimators=[('tc', DecisionTreeClassifier()),
                             ('rl', LogisticRegression()),
                             ('kc', KNeighborsClassifier())]), VotingClassifier(estimators=[('tc', DecisionTreeClassifier()),
                             ('rl', LogisticRegression()),
                             ('kc', KNeighborsClassifier())],
                 voting='soft')]
