In [39]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.utils import resample
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import roc_auc_score, precision_score, recall_score, log_loss, accuracy_score
from sklearn.metrics import RocCurveDisplay
from sklearn.tree import DecisionTreeClassifier

Dataset

In [57]:
iris = load_iris()
X, y = iris.data, iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

Models

In [58]:
svm_clf = SVC(kernel='poly', probability=True, degree=3)
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
lr_clf = LogisticRegression(max_iter=1000)

Train models in sampled data

In [68]:
X_train_subsets = []
y_train_subsets = []
for _ in range(3):  # тренуємо 3 моделі
    X_subset, y_subset = resample(X_train, y_train, n_samples=len(X_train), random_state=42, stratify=y_train)
    X_train_subsets.append(X_subset)
    y_train_subsets.append(y_subset)

In [60]:
for i in range(3):
    if i == 0:
        svm_clf.fit(X_train_subsets[i], y_train_subsets[i])
    elif i == 1:
        rf_clf.fit(X_train_subsets[i], y_train_subsets[i])
    elif i == 2:
        lr_clf.fit(X_train_subsets[i], y_train_subsets[i])

In [61]:
voting_clf = VotingClassifier(estimators=[('svm', svm_clf), ('rf', rf_clf), ('lr', lr_clf)], voting='hard')

In [62]:
voting_clf.fit(X_train, y_train)
y_pred = voting_clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
classification_report = classification_report(y_test, y_pred)
score = voting_clf.score(X_test, y_test)

In [63]:
print("VotingClassifier accuracy:", accuracy)
print("VotingClassifier Score:", score) 
print("VotingClassifier Confusion Matrix:")
print(conf_matrix)
print("VotingClassifier Classification Report:")
print(classification_report)

VotingClassifier accuracy: 1.0
VotingClassifier Score: 1.0
VotingClassifier Confusion Matrix:
[[23  0  0]
 [ 0 19  0]
 [ 0  0 18]]
VotingClassifier Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       1.00      1.00      1.00        19
           2       1.00      1.00      1.00        18

    accuracy                           1.00        60
   macro avg       1.00      1.00      1.00        60
weighted avg       1.00      1.00      1.00        60



In [64]:
y_pred1 = svm_clf.predict(X_test)

accuracy_score(y_test, y_pred1)

0.9833333333333333

In [65]:
y_pred2 = rf_clf.predict(X_test)

accuracy_score(y_test, y_pred2)

0.9833333333333333

In [66]:
y_pred3 = lr_clf.predict(X_test)

accuracy_score(y_test, y_pred3)

0.9833333333333333

Make them binary

In [74]:
dfi = load_iris()

In [76]:
X = dfi.data
y = dfi.target

In [79]:
from collections import Counter


def ensemble_predict(models, X):
    predictions = []

    for model in models:
        y_pred_proba = model.predict(X)
        predictions.append(y_pred_proba)
        
    transformed = []
    
    for subarray in list(zip(*predictions)):
        
        element_counts = Counter(subarray)
        most_common_element = max(element_counts, key=element_counts.get)
        transformed.append(most_common_element)
        
    return transformed

In [85]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

class_names = [0, 1, 2]
class_pairs = [(class_names[i], class_names[j]) for i in range(len(class_names)) for j in range(i+1, len(class_names))]

models = []

for i, class_pair in enumerate(class_pairs):
    class_1, class_2 = class_pair
    X_train_pair = X_train[(y_train == class_1) | (y_train == class_2)]
    y_train_pair = y_train[(y_train == class_1) | (y_train == class_2)]

    if i == 0:
        svm_clf = SVC(kernel='linear', probability=True)
        svm_clf.fit(X_train_pair, y_train_pair)
        models.append(svm_clf)
    elif i == 1:
        rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
        rf_clf.fit(X_train_pair, y_train_pair)
        models.append(rf_clf)
    elif i == 2:
        lr_clf = LogisticRegression(max_iter=1000)
        lr_clf.fit(X_train_pair, y_train_pair)
        models.append(lr_clf)


In [86]:
accuracy_score(ensemble_predict(models, X_test), y_test)

0.9333333333333333

Meta-model

In [104]:
from collections import Counter


def ensemble_predictions(models, X):
    predictions = []

    for model in models:
        y_pred_proba = model.predict(X)
        predictions.append(y_pred_proba)
        
    return predictions

In [91]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

class_names = [0, 1, 2]
class_pairs = [(class_names[i], class_names[j]) for i in range(len(class_names)) for j in range(i+1, len(class_names))]

models = []

for i, class_pair in enumerate(class_pairs):
    class_1, class_2 = class_pair
    X_train_pair = X_train[(y_train == class_1) | (y_train == class_2)]
    y_train_pair = y_train[(y_train == class_1) | (y_train == class_2)]

    if i == 0:
        svm_clf = SVC(kernel='linear', probability=True)
        svm_clf.fit(X_train_pair, y_train_pair)
        models.append(svm_clf)
    elif i == 1:
        rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
        rf_clf.fit(X_train_pair, y_train_pair)
        models.append(rf_clf)
    elif i == 2:
        lr_clf = LogisticRegression(max_iter=1000)
        lr_clf.fit(X_train_pair, y_train_pair)
        models.append(lr_clf)


In [113]:
from sklearn.ensemble import AdaBoostClassifier
import numpy as np


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

ens_x = np.array( ensemble_predictions(models, X_train) ).T
ens_y = np.array(y_train)



In [114]:
ens_x.shape

(90, 3)

In [115]:
ens_y.shape

(90,)

In [123]:
ada_model = AdaBoostClassifier()

ada_model.fit(ens_x, ens_y)


ens_x_pred = ensemble_predictions(models, X_test)

In [125]:
y_pred_ada = ada_model.predict(np.array(ens_x_pred).T)

In [129]:
y_pred_ada.shape

(60,)

In [131]:
y_test.shape

(60,)

In [133]:
ada_accuracy = accuracy_score(y_test, y_pred_ada)
ada_conf_matrix = confusion_matrix(y_test, y_pred_ada)

In [134]:
print("Ada-boost accuracy:", ada_accuracy)
print("Ada-boost Confusion Matrix:")
print(ada_conf_matrix)

Ada-boost accuracy: 0.9666666666666667
Ada-boost Confusion Matrix:
[[18  0  0]
 [ 0 20  2]
 [ 0  0 20]]
