In [21]:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
import numpy as np


In [22]:
# Load the Iris dataset
iris = load_iris()

In [23]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42)


In [24]:
# Define the 4 machine learning models
model1 = DecisionTreeClassifier()
model2 = RandomForestClassifier(n_estimators=10)
model3 = KNeighborsClassifier(n_neighbors=3)
model4 = SVC(kernel='linear', C=1, gamma='auto')

In [25]:
# Train the models on the training set
model1.fit(X_train, y_train)
model2.fit(X_train, y_train)
model3.fit(X_train, y_train)
model4.fit(X_train, y_train)

In [48]:
for model in [model1, model2, model3, model4]:
    scores = cross_val_score(model, X_test, y_test, cv=5)
    print("Accuracy of %s: %0.2f%%" % (type(model).__name__, scores.mean() * 100))

Accuracy of DecisionTreeClassifier: 96.67%
Accuracy of RandomForestClassifier: 100.00%
Accuracy of KNeighborsClassifier: 93.33%
Accuracy of SVC: 96.67%


In [28]:
# Apply K-Folds cross-validation
kf = KFold(n_splits=5, shuffle=True)
X = X_train
y = y_train
meta_train = np.zeros((X.shape[0], 4))

In [29]:
for train_index, test_index in kf.split(X):
    X_train_kf, X_test_kf = X[train_index], X[test_index]
    y_train_kf, y_test_kf = y[train_index], y[test_index]
    model1.fit(X_train_kf, y_train_kf)
    model2.fit(X_train_kf, y_train_kf)
    model3.fit(X_train_kf, y_train_kf)
    model4.fit(X_train_kf, y_train_kf)
    meta_train[test_index, 0] = model1.predict(X_test_kf)
    meta_train[test_index, 1] = model2.predict(X_test_kf)
    meta_train[test_index, 2] = model3.predict(X_test_kf)
    meta_train[test_index, 3] = model4.predict(X_test_kf)

In [30]:
# Define the meta-model for stacking
meta_model = DecisionTreeClassifier()

In [31]:
# Train the meta-model on the meta features
meta_model.fit(meta_train, y)

In [32]:
# Make predictions on the testing set using the stacked model
meta_test = np.column_stack((model1.predict(X_test), model2.predict(X_test), model3.predict(X_test), model4.predict(X_test)))
y_pred = meta_model.predict(meta_test)

In [33]:
# Compute the accuracy of the stacked model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy of stacked model: %.2f%%" % (accuracy * 100))

Accuracy of stacked model: 100.00%


In [34]:
# Print GT and prediction
import pandas as pd
df = pd.DataFrame({"Ground Truth": y_test, "Prediction": y_pred})
print(df)


    Ground Truth  Prediction
0              1           1
1              0           0
2              2           2
3              1           1
4              1           1
5              0           0
6              1           1
7              2           2
8              1           1
9              1           1
10             2           2
11             0           0
12             0           0
13             0           0
14             0           0
15             1           1
16             2           2
17             1           1
18             1           1
19             2           2
20             0           0
21             2           2
22             0           0
23             2           2
24             2           2
25             2           2
26             2           2
27             2           2
28             0           0
29             0           0
