In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import sklearn.datasets as datasets
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score
import plotly.express as px

In [2]:
names = ["Nearest Neighbors", 
         "Linear SVM", 
         "RBF SVM", 
         "Decision Tree", 
         "Random Forest", 
         "Neural Net", 
         "AdaBoost",
         "Naive Bayes", 
         "QDA"]

classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(kernel="rbf", gamma=2, C=1),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1, max_iter=1000),
    AdaBoostClassifier(),
    GaussianNB(),
    QuadraticDiscriminantAnalysis()]

In [3]:
def run_sl_models(X, y):
    results = list()
    X = StandardScaler().fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4, random_state=42)

    for name, clf in zip(names, classifiers):
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        # display(f"{name}: Confusion Matrix")
        # z_confusion = pd.crosstab(y_test, y_pred)
        # display(z_confusion.T)
        result = {
            "model": name,
            "accuracy": accuracy_score(y_test, y_pred),
            "f1_score": f1_score(y_test, y_pred, average="macro"),
            "recall": recall_score(y_test, y_pred, average="macro")
        }
        results.append(result)
    return pd.DataFrame(results)

In [4]:
data_set = datasets.load_wine()
X = data_set.data
y = data_set.target
res = run_sl_models(X, y)
display(res)
px.bar(res, x="model", y="accuracy", color="accuracy")

Unnamed: 0,model,accuracy,f1_score,recall
0,Nearest Neighbors,0.944444,0.949524,0.950617
1,Linear SVM,0.986111,0.987421,0.987654
2,RBF SVM,0.375,0.181818,0.333333
3,Decision Tree,0.930556,0.934965,0.931174
4,Random Forest,0.944444,0.949668,0.949668
5,Neural Net,0.986111,0.985164,0.987654
6,AdaBoost,0.944444,0.94133,0.935023
7,Naive Bayes,1.0,1.0,1.0
8,QDA,0.972222,0.969577,0.964912


In [5]:
data_set = datasets.load_digits()
X = data_set.data
y = data_set.target
res = run_sl_models(X, y)
display(res)
px.bar(res, x="model", y="accuracy", color="accuracy")


Variables are collinear



Unnamed: 0,model,accuracy,f1_score,recall
0,Nearest Neighbors,0.973574,0.973547,0.974278
1,Linear SVM,0.980529,0.980344,0.98062
2,RBF SVM,0.119611,0.054797,0.121679
3,Decision Tree,0.687065,0.670068,0.687283
4,Random Forest,0.838665,0.83612,0.840525
5,Neural Net,0.977747,0.977962,0.97812
6,AdaBoost,0.301808,0.246202,0.312478
7,Naive Bayes,0.770515,0.765796,0.771093
8,QDA,0.752434,0.735972,0.757876


In [6]:
data_set = datasets.load_iris()
X = data_set.data
y = data_set.target
res = run_sl_models(X, y)
display(res)
px.bar(res, x="model", y="accuracy", color="accuracy")

Unnamed: 0,model,accuracy,f1_score,recall
0,Nearest Neighbors,0.983333,0.981929,0.981481
1,Linear SVM,0.883333,0.872382,0.876218
2,RBF SVM,0.983333,0.981929,0.981481
3,Decision Tree,0.983333,0.981929,0.981481
4,Random Forest,1.0,1.0,1.0
5,Neural Net,0.983333,0.981929,0.981481
6,AdaBoost,0.933333,0.926587,0.925926
7,Naive Bayes,0.966667,0.963938,0.963938
8,QDA,0.983333,0.981929,0.981481


In [7]:
data_set = datasets.load_breast_cancer()
X = data_set.data
y = data_set.target
res = run_sl_models(X, y)
display(res)
px.bar(res, x="model", y="accuracy", color="accuracy")

Unnamed: 0,model,accuracy,f1_score,recall
0,Nearest Neighbors,0.95614,0.951858,0.951858
1,Linear SVM,0.986842,0.985516,0.984122
2,RBF SVM,0.649123,0.393617,0.5
3,Decision Tree,0.925439,0.918391,0.919595
4,Random Forest,0.951754,0.94689,0.945608
5,Neural Net,0.982456,0.980743,0.980743
6,AdaBoost,0.95614,0.952389,0.957601
7,Naive Bayes,0.942982,0.937234,0.93598
8,QDA,0.969298,0.966396,0.967736
