In [21]:
import pandas as pd
import joblib
import time
import warnings

from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

warnings.filterwarnings('ignore')

In [2]:
thresholded_train_size32 = pd.read_pickle("../data/processed/thresholded_train_size32.pkl")
thresholded_val_size32 = pd.read_pickle("../data/processed/thresholded_val_size32.pkl")

grayscaled_train_size32 = pd.read_pickle("../data/processed/grayscaled_train_size32.pkl")
grayscaled_val_size32 = pd.read_pickle("../data/processed/grayscaled_val_size32.pkl")

In [5]:
X_train = grayscaled_train_size32.drop('label', axis=1)
y_train = grayscaled_train_size32['label']

X_test = grayscaled_val_size32.drop('label', axis=1)
y_test = grayscaled_val_size32['label']

In [17]:
scaler = StandardScaler()

X_train_scaler = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_test_scaler = pd.DataFrame(scaler.fit_transform(X_test), columns=X_test.columns)

pca = PCA(0.95)

X_scaler = pd.concat([X_train_scaler, X_test_scaler], axis=0)

pca.fit(X_scaler)

X_train_scaler_pca = pd.DataFrame(pca.transform(X_train_scaler))
X_test_scaler_pca = pd.DataFrame(pca.transform(X_test_scaler))


In [18]:
logR = LogisticRegression(multi_class='multinomial')
nb = GaussianNB()
knn = KNeighborsClassifier(n_neighbors=7)

In [26]:
def train_model(model, X_train, y_train, X_test, y_test):
    start_time = time.time()
    model.fit(X_train, y_train)
    end_time = time.time()
    y_pred = model.predict(X_test)
    print("Train time:", end_time-start_time)
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Precision:", precision_score(y_test, y_pred, average='weighted'))
    print("Recall:", recall_score(y_test, y_pred, average='weighted'))
    print(f"Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

In [30]:
train_model(logR, X_train_scaler_pca, y_train, X_test_scaler_pca, y_test)

Train time: 1.4367256164550781
Accuracy: 0.768073032321737
Precision: 0.7706060596558123
Recall: 0.768073032321737
Confusion Matrix:
 [[112   0   1   4   3   2   1   0   6   1   0   2   2   0   1   3   0   2
    0   0   1   0   9   4]
 [  1 139   0   7   8   1   1   0   1   2   1   0   0   2   0   0   0   2
    3   0   2   2   2   0]
 [  2   3 144  10   0   1   3   0   0   1   0   0   0   4   0   0   0   2
    3   0   0   0   0   0]
 [  1   4   6 188   5   0   0   7   0   6   1   1   1   4   0   0   0   1
    4   2   0   0   4   2]
 [  6   2   3   5 163   2   0   0   1   8   1   3   1   0   0   5   0   5
    3   2   0   2   2   0]
 [  2   2   2   4   8 142   5   0   0   3   1   0   1   0   1   2   2   0
    0   2   2   1   6   2]
 [  2   1   4   0   0   0 104  16   0   5   0   0   3   0   6   2   6   4
    0   0   0   0   1  11]
 [  1   0   2   0   0   0   7 137   0   0   0   0   0   2   0   0   3   0
    0   1   1   0   0   0]
 [  0   0   0   2   3   0   0   0 135   2   0   1   2   5 

In [31]:
train_model(nb, X_train_scaler_pca, y_train, X_test_scaler_pca, y_test)

Train time: 0.031633615493774414
Accuracy: 0.5938810757463607
Precision: 0.6616229229198187
Recall: 0.5938810757463607
Confusion Matrix:
 [[106   1   0  11   8   0   3   0   0   0   0   1   2   1   3   0   0   0
   12   0   6   0   0   0]
 [ 10 104   6   5  15   0   3   1   0   1   0  12   0   5   2   7   0   1
    2   0   0   0   0   0]
 [ 11  14 106   3   0   0   3   0   0   1   0   0   4   6   5   3   0   0
   14   2   1   0   0   0]
 [  4   3   0 114  27   5   0   0   0   5   1  44   0   0  11  16   2   2
    0   0   0   0   3   0]
 [ 10   2   0   0 146   0   5   0   0   4   1  18   0   2  13   1   0   1
    9   0   0   2   0   0]
 [  4  29   2   5   3 107   4   0   0   7   0  15   0   0   2   0   2   1
    1   0   2   0   0   4]
 [  4   3   2   0   3   1  94  25   0   0   0   6   0   1  10   2   1   4
    0   0   0   0   2   7]
 [  0   3   0   0   0   0   7 126   0   3   0   0   1   0  12   0   0   2
    0   0   0   0   0   0]
 [  0   0   1   4  11   0  10   0  88   0   0  28   0 

In [32]:
train_model(knn, X_train_scaler_pca, y_train, X_test_scaler_pca, y_test)

Train time: 0.014775991439819336
Accuracy: 0.9928448063163089
Precision: 0.992896710680754
Recall: 0.9928448063163089
Confusion Matrix:
 [[154   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0]
 [  0 174   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0]
 [  0   0 173   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0]
 [  0   0   0 236   0   0   0   0   0   0   0   0   0   1   0   0   0   0
    0   0   0   0   0   0]
 [  1   0   0   0 208   0   0   0   0   0   0   2   1   0   0   0   0   0
    0   2   0   0   0   0]
 [  0   0   0   0   0 188   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0]
 [  0   0   0   0   0   0 164   1   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0]
 [  0   0   0   0   0   0   0 154   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0 161   0   0   0   0  