# Stack Classifier Approach for optimising accuracy in Machine Learning Models

In [2]:

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import model_selection
from mlxtend.classifier import StackingClassifier

import warnings
warnings.filterwarnings("ignore")

In [4]:
import pandas as pd

# Importing the dataset
dataset = pd.read_csv("phishyFeatures.csv")
dataset = dataset.drop('id', axis=1) # removing unwanted column

x = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [5]:
# Splitting the dataset into training set and test set
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=0)

In [20]:
# Base Learner/Classifier Model defn
svc = LinearSVC()
knc = KNeighborsClassifier()
dtc = DecisionTreeClassifier()
lrc = LogisticRegression()
rfc = RandomForestClassifier()

In [21]:
# Meta Model Defn
metamodel = LogisticRegression()

In [22]:
# Stacking Classifier Defn
stack_classifier = StackingClassifier(
    classifiers=[svc, knc, dtc, lrc, rfc],
    meta_classifier=metamodel
)

In [23]:
# List of classifiers and their labels
classifiers = [svc, knc, dtc, lrc, rfc, stack_classifier]
labels = ['SVC', 'KNN', 'DT', 'LR', 'RF', 'Stacking']

# Evaluate each classifier
for clf, label in zip(classifiers, labels):
    scores = model_selection.cross_val_score(clf, x_train, y_train, cv=5)
    print("Accuracy: %7f (+/- %0.2f) [%s]" % (scores.mean()*100, scores.std(), label))

Accuracy: 92.932159 (+/- 0.01) [SVC]
Accuracy: 94.077841 (+/- 0.01) [KNN]
Accuracy: 96.007688 (+/- 0.01) [DT]
Accuracy: 93.016598 (+/- 0.01) [LR]
Accuracy: 97.177569 (+/- 0.01) [RF]
Accuracy: 96.779594 (+/- 0.00) [Stacking]
