In [4]:
#import library
import os
import numpy as np
import pickle
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.pipeline import Pipeline
from tqdm import tqdm
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

In [5]:
#load the pickle file
with open('../data/external/combined_augmented_data.pkl','rb') as f:
    X_aug_train,y_aug_train, X_aug_test, y_aug_test = pickle.load(f)

Applying naive bayes


In [None]:
gnb = GaussianNB()

In [None]:
#reshape to 784
X_aug_train =X_aug_train.reshape(X_aug_train.shape[0], -1)
X_aug_test =X_aug_test.reshape(X_aug_test.shape[0], -1)


# Define the class names
letters = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y']

In [None]:
# applying NB 
y_pred_sc = gnb.fit(X_aug_train, y_aug_train).predict(X_aug_test)
y_pred_train = gnb.predict(X_aug_train)


print(f"Accuracy Test: {accuracy_score(y_aug_test, y_pred_sc)}")
print(f"Accuracy Train: {accuracy_score(y_aug_train, y_pred_train)}")

print("Classification report:")
print(classification_report(y_aug_test, y_pred_sc, target_names=letters))

In [None]:
lr = LogisticRegression()

In [None]:
# testing logistic regression on test data
y_pred_lr_sc = lr.fit(X_aug_train, y_aug_train).predict(X_aug_test)

print(f"Accuracy Test: {accuracy_score(y_aug_test, y_pred_lr_sc)}")
print(f"Accuracy Train: {accuracy_score(y_aug_train, y_pred_train)}")

print("Classification report:")
print(classification_report(y_aug_test, y_pred_lr_sc, target_names=letters))

In [None]:
X_train,y_train, X_test, y_test =X_aug_train,y_aug_train, X_aug_test, y_aug_test

In [None]:
#define sklearn LDA object 
lda = LinearDiscriminantAnalysis()
#fit on training data 
lda.fit(X_train,y_train)

In [None]:
#getting explained variance ratio from the lda model 
evr = lda.explained_variance_ratio_
components = range(1, len(evr) + 1)

#plotting scree plot
fig, ax = plt.subplots(figsize = (8,5))
ax.bar(x = components, height = evr, label = 'Explained Variance');
plt.plot(components, np.cumsum(evr), marker = '.', color = 'orange', label = 'Cumulative Explained Variance')
plt.axhline(y = .95, color = 'r', linestyle = '--', label = '0.95 Explained Variance')
plt.xticks(range(1, len(evr)+1));
plt.title('LDA: Explained Variance');
plt.xlabel('Component');
plt.ylabel('Explained Variance');
plt.legend(fontsize = 9);

In [None]:
#fit on training data and transform
X_train_lda = lda.fit_transform(X_train,y_train)
X_test_lda = lda.transform(X_test)

In [None]:
import seaborn as sns
fig, ax = plt.subplots(figsize = (8,8))
ax = sns.scatterplot(x = X_train_lda[:,0], y = X_train_lda[:,1], hue = y_train, palette = 'pastel');
handler, _ = ax.get_legend_handles_labels();
plt.legend(handler, letters, bbox_to_anchor = (1, 1));
plt.title('2D Embedding of Sign Language Images')
plt.xlabel('Linear Discriminant 1');
plt.ylabel('Linear Discriminant 2');

In [None]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVC
#SVM

#defining support vector machine parameters
parameters = {'kernel':['rbf'],'gamma':['auto'],'C':np.linspace(2.62,2.7,10)}

# defining randomized grid search cv 
clf = RandomizedSearchCV(SVC(), param_distributions=parameters, random_state=99, verbose=3)
#fitting to LDA transformed training data 
search_lda = clf.fit(X_train_lda, y_aug_train)