# Importing required Libraries

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder
from sklearn.metrics import accuracy_score
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.base import clone
from sklearn.ensemble import StackingClassifier
from sklearn.model_selection import GridSearchCV

from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings('ignore')


# Feature selection and Modelling

In [None]:
#reading the data 
iris = pd.read_csv("/kaggle/input/iris/Iris.csv")
iris.shape

In [None]:
X = iris.drop("Species",axis=1)

# encoding 'Species' columns

oe = OrdinalEncoder()

iris['Species'] = oe.fit_transform(iris['Species'].values.reshape(-1,1))
y = iris['Species']


In [None]:
iris.sample(5)

# Applying Cross Validation

In [None]:
#making stratisfied kfold object
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=10)

for train_idx, test_idx in kf.split(X,y):
    X_train, X_test, y_train, y_test = X.iloc[train_idx], X.iloc[test_idx], y.iloc[train_idx], y.iloc[test_idx]

In [None]:
# making a functions for various scores

def get_score(model, X_train, X_test, y_train, y_test):
    model.fit(X_train,y_train)
    
    y_test_pred = model.predict(X_test)
   
    test_accuracy = accuracy_score(y_test, y_test_pred) # Calculate Accuracy
    
    test_mcc = matthews_corrcoef(y_test, y_test_pred) # Calculate MCC
    
    test_f1 = f1_score(y_test, y_test_pred, average='weighted') # Calculate F1-score
    
    return test_accuracy, test_mcc, test_f1

In [None]:
#intial code for giving us result of kfold cv

test_ac_sc = []
test_mcc_sc = []
test_f1_sc = []

scores = []

for train_idx, test_idx in kf.split(X,y):
    X_train, X_test, y_train, y_test = X.iloc[train_idx], X.iloc[test_idx], y.iloc[train_idx], y.iloc[test_idx]
    scores.append(get_score(SVC(), X_train, X_test, y_train, y_test))
    test_ac_sc.append(get_score(SVC(), X_train, X_test, y_train, y_test)[0])
    test_mcc_sc.append(get_score(SVC(), X_train, X_test, y_train, y_test)[1])
    test_f1_sc.append(get_score(SVC(), X_train, X_test, y_train, y_test)[2])
    

In [None]:
#scores
#test_ac_sc
#test_mcc_sc
#test_f1_sc

In [None]:
#making a list of models so as to loop the get_score functions
models = [SVC(), KNeighborsClassifier(), DecisionTreeClassifier(), MLPClassifier(),
          RandomForestClassifier() , LogisticRegression(), GradientBoostingClassifier()]

# Scores of all models present in the models list

In [None]:

for model in models:
    test_ac_sc = []
    test_mcc_sc = []
    test_f1_sc = []

    #scores = []

    for train_idx, test_idx in kf.split(X,y):
        X_train, X_test, y_train, y_test = X.iloc[train_idx], X.iloc[test_idx], y.iloc[train_idx], y.iloc[test_idx]
        #scores.append(get_score(model, X_train, X_test, y_train, y_test))
        test_ac_sc.append(get_score(model, X_train, X_test, y_train, y_test)[0])
        test_mcc_sc.append(get_score(model, X_train, X_test, y_train, y_test)[1])
        test_f1_sc.append(get_score(model, X_train, X_test, y_train, y_test)[2])


    print(f'for model: {model}')
    print('accuracy score :',np.mean(test_ac_sc))
    print('mcc score :',np.mean(test_mcc_sc))
    print('f1 score :',np.mean(test_f1_sc))
    print('\n')

In [None]:
estimators = [
    ('lr', LogisticRegression()),
    ('knn', KNeighborsClassifier()),
    ('gbdt',GradientBoostingClassifier()),
    ('MLP',MLPClassifier()),
    ('DTC', DecisionTreeClassifier())
    
]

In [None]:
clf = StackingClassifier(
    estimators=estimators, 
    final_estimator=RandomForestClassifier(),
    cv=10
)

# Making simple train test split

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=8,stratify=y)

In [None]:
param_grid = {
    'n_estimators': [25,50, 100, 150],
    'max_depth': [2, 4, 6],
    'min_samples_split': [2, 4, 6]
}

In [None]:
grid_search = GridSearchCV(estimator=clf.final_estimator, param_grid=param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

In [None]:
best_params = grid_search.best_params_
best_params


In [None]:
clf.final_estimator.set_params(**best_params)
clf.fit(X_train, y_train)

In [None]:
y_pred = clf.predict(X_test)
accuracy_score(y_test,y_pred)

In [None]:
confusion_matrix(y_test,y_pred)


I made this notebook to practise my skills making a stacking classifier on a small dataset before I use it on my 'titanic competition'
notebook. I'm still a begineer in coding this was the best method I could come up with at the moment. A upvote/ like is highly appreciated if you like what I did here ...regards TM Kartikey