<a href="https://colab.research.google.com/github/Matteriegray/Ensemble/blob/main/Stacking.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
from sklearn.datasets import load_iris
X, y = load_iris(return_X_y=True)


In [14]:
from sklearn.feature_selection import VarianceThreshold

selection = VarianceThreshold(threshold=(0.1))
X = selection.fit_transform(X)
print(X.shape)

(150, 4)


In [15]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size = 25, random_state=23)
X_train.shape , X_test.shape

((125, 4), (25, 4))

In [16]:
#importing the metrics
from sklearn.metrics import accuracy_score
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import f1_score

In [17]:
#Lets build the models from here

In [18]:
#KNN
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(3)
knn.fit(X_train, y_train)

#Make predictions
y_train_pred = knn.predict(X_train)
y_test_pred = knn.predict(X_test)


In [19]:
#See the score of the train data
knn_train_acc = accuracy_score(y_train, y_train_pred)
knn_train_mcc = matthews_corrcoef(y_train, y_train_pred)
knn_train_f1 = f1_score(y_train, y_train_pred, average = 'weighted')
print('Model performance for Training set')
print('- Accuracy: %s' % knn_train_acc)
print('- MCC: %s' % knn_train_mcc)
print('- F1 score: %s' % knn_train_f1)

Model performance for Training set
- Accuracy: 0.96
- MCC: 0.9400576542369509
- F1 score: 0.9600110787979504


In [20]:
#See the score of test data
knn_test_acc =  accuracy_score(y_test, y_test_pred)
knn_test_mcc = matthews_corrcoef(y_test, y_test_pred)
knn_test_f1 = f1_score(y_test, y_test_pred, average = 'weighted')
print('Model performance for Test set')
print('- Accuracy: %s' % knn_test_acc)
print('- MCC: %s' % knn_test_mcc)
print('- F1 score: %s' % knn_test_f1)

Model performance for Test set
- Accuracy: 0.96
- MCC: 0.9414746160644475
- F1 score: 0.9603619909502262


In [21]:
# Decision Tree

from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier(max_depth=5)
dt.fit(X_train, y_train)

#Make predictions
y_train_pred = dt.predict(X_train)
y_test_pred = dt.predict(X_test)

In [22]:
#See the score of the train data
dt_train_acc = accuracy_score(y_train, y_train_pred)
dt_train_mcc = matthews_corrcoef(y_train, y_train_pred)
dt_train_f1 = f1_score(y_train, y_train_pred, average='weighted')
print('Model performance for Training set')
print('- Accuracy: %s' % dt_train_acc)
print('- MCC: %s' % dt_train_mcc)
print('- F1 score: %s' % dt_train_f1)

Model performance for Training set
- Accuracy: 0.992
- MCC: 0.9880815801394235
- F1 score: 0.9919955611041753


In [23]:
#See the score of the test data
dt_test_acc = accuracy_score(y_test, y_test_pred)
dt_test_mcc = matthews_corrcoef(y_test, y_test_pred)
dt_test_f1 = f1_score(y_test, y_test_pred, average='weighted')
print('Model performance for Test set')
print('- Accuracy: %s' % dt_test_acc)
print('- MCC: %s' % dt_test_mcc)
print('- F1 score: %s' % dt_test_f1)

Model performance for Test set
- Accuracy: 0.96
- MCC: 0.9414746160644475
- F1 score: 0.9603619909502262


In [24]:
# RandomForest
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=10)
rf.fit(X_train, y_train)

#Make predictions
y_train_pred = rf.predict(X_train)
y_test_pred = rf.predict(X_test)

In [25]:
#See the score of the train data
rf_train_accuracy = accuracy_score(y_train, y_train_pred)
rf_train_mcc = matthews_corrcoef(y_train, y_train_pred)
rf_train_f1 = f1_score(y_train, y_train_pred, average='weighted')
print('Model performance for Training set')
print('- Accuracy: %s' % rf_train_accuracy)
print('- MCC: %s' % rf_train_mcc)
print('- F1 score: %s' % rf_train_f1)


Model performance for Training set
- Accuracy: 0.992
- MCC: 0.9880883947967788
- F1 score: 0.9920022157595901


In [26]:
#See the score of the test data
rf_test_accuracy = accuracy_score(y_test, y_test_pred)
rf_test_mcc = matthews_corrcoef(y_test, y_test_pred)
rf_test_f1 = f1_score(y_test, y_test_pred, average='weighted')
print('Model performance for Test set')
print('- Accuracy: %s' % rf_test_accuracy)
print('- MCC: %s' % rf_test_mcc)
print('- F1 score: %s' % rf_test_f1)

Model performance for Test set
- Accuracy: 1.0
- MCC: 1.0
- F1 score: 1.0


In [27]:
#Stacking

from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression

estimator_list = [
    ('knn',knn),
    ('dt',dt),
    ('rf',rf)]

In [28]:
# Building the stack model
stack_model = StackingClassifier(
    estimators=estimator_list, final_estimator=LogisticRegression()
)


In [29]:
#Training the stack model
stack_model.fit(X_train, y_train)

#Predicting using stacked model
y_train_pred = stack_model.predict(X_train)
y_test_pred = stack_model.predict(X_test)

In [30]:
stack_model_train_acc = accuracy_score(y_train, y_train_pred)
stack_model_train_mcc = matthews_corrcoef(y_train, y_train_pred)
stack_model_train_f1 = f1_score(y_train, y_train_pred, average='weighted')
print('Model performance for Training set')
print('- Accuracy: %s' % stack_model_train_acc)
print('- MCC: %s' % stack_model_train_mcc)
print('- F1 score: %s' % stack_model_train_f1)

Model performance for Training set
- Accuracy: 0.984
- MCC: 0.9759800153727901
- F1 score: 0.984


In [31]:
stack_model_test_acc = accuracy_score(y_test, y_test_pred)
stack_model_test_mcc = matthews_corrcoef(y_test, y_test_pred)
stack_model_test_f1 = f1_score(y_test, y_test_pred, average='weighted')
print('Model performance for Test set')
print('- Accuracy: %s' % stack_model_test_acc)
print('- MCC: %s' % stack_model_test_mcc)
print('- F1 score: %s' % stack_model_test_f1)

Model performance for Test set
- Accuracy: 0.96
- MCC: 0.9414746160644475
- F1 score: 0.9603619909502262
