AUTHORS: Ludwig Wideskär (luai18@student.bth.se), Akshaya Bathula (akba21@student.bth.se)

---
Import libraries:

In [None]:
## Import and install libraries:

!python --version
!pip install scikit-posthocs

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import time

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC

from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D

from tabulate import tabulate

from scipy.stats import friedmanchisquare
from scikit_posthocs import posthoc_nemenyi_friedman

In [None]:
## Import train and test datasets (~ 80:20 split of entries).
# Also sets headers from first row of dataframe.
# Make sure the database can be read before executing.

df_train = pd.read_csv("/content/sign_mnist_train.csv", header=[0])
df_test = pd.read_csv("/content/sign_mnist_test.csv", header=[0])

# ---
# Concatenating (without duplicates) and resplit datasets due to fear of overfitting
#https://www.kaggle.com/datasets/datamunge/sign-language-mnist/discussion/379925

df_all = pd.concat([df_train, df_test], ignore_index=True).drop_duplicates()

# Determine X_train, y_train, X_test, and y_test
# Use all columns except for the first one as X (only use the pixel values)
df_X = df_all.iloc[:,1:]

# Use only the first column as y (label, ie. sign language letter
# represented as a number (0-25, 9 and 25 is skipped).
df_y = df_all.iloc[:,0]

# ---
# Perform train test split
X_train, X_test, y_train, y_test = train_test_split(df_X, df_y, test_size=0.2, random_state=42, stratify=df_y)

# ---
# Reset index
X_train = X_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)
y_train = y_train.reset_index(drop=True)
y_test = y_test.reset_index(drop=True)


In [None]:
## Public variables
CR_LETTERS = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M',
              'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y']

In [None]:
## Visualize some of the entries
number_of_entries_show = 3
for i in range(0, number_of_entries_show):
  img = X_train.iloc[i].to_numpy()
  img = img.reshape((28,28))
  plt.imshow(img, cmap='gray')
  plt.title(f"{y_train[i]} : {chr(ord('@') + y_train[i] + 1)}")
  plt.axis("off")
  plt.show()

In [None]:
def render_confusion_matrix(y_true, y_pred):
  # Compute the confusion matrix
  confusion_mtx = confusion_matrix(y_true, y_pred) 

  # ---
  # Plot the confusion matrix
  f, ax = plt.subplots(figsize=(16, 16))
  
  sns.heatmap(confusion_mtx, annot=True, linewidths=0.01, cmap="Greens",
              linecolor="gray", fmt= '.1f', ax=ax)

  plt.xlabel("Predicted Label")
  plt.ylabel("True Label")

  # ---
  # Set ticks on axis to letters instead of numbers
  for axis in [ax.xaxis, ax.yaxis]:
      axis.set(ticks=np.arange(0.5, len(CR_LETTERS)), ticklabels=CR_LETTERS)

  plt.title("Confusion Matrix for predicting the test dataset")
  plt.show()

---
K nearest neighbors (K-NN) (Supervised)

In [None]:
def KNN_algorithm(X_train, X_test, y_train, y_test):

  # Dataset is already split into train and test parts (80:20).

  # Normalize / scale the data (Preprocessing)
  scaler = MinMaxScaler()
  X_train = scaler.fit_transform(X_train)
  X_test = scaler.transform(X_test)

  # ---
  # Define some hyperparameters
  N_NEIGHBORS = 5 # Default

  # ---
  # Lists of performance metrics for training
  list_cv_train_time = []
  list_cv_accuracy = []
  list_cv_precision = []
  list_cv_recall = []
  list_cv_f1_score = []

  # ---
  # Stratified K-fold cross validation on the training set
  skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

  fold = 1
  for train_skf, val_skf in skf.split(X_train, y_train):
    print(f"Fold #{fold}")
    X_train_fold, y_train_fold = X_train[train_skf], y_train[train_skf]
    X_val_fold, y_val_fold = X_train[val_skf], y_train[val_skf]

    # Define KNN model (for cross validation)
    model = KNeighborsClassifier(n_neighbors=N_NEIGHBORS) 

    # Fit model
    start_time = time.time()
    model.fit(X_train_fold, y_train_fold)
    end_time = time.time()
    train_time = end_time - start_time

    y_pred = model.predict(X_val_fold)

    # Calculate performance metrics
    accuracy = accuracy_score(y_val_fold, y_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(y_val_fold, y_pred, average='weighted')

    list_cv_train_time.append(train_time)
    list_cv_accuracy.append(accuracy)
    list_cv_precision.append(precision)
    list_cv_recall.append(recall)
    list_cv_f1_score.append(f1)

    print(f"Training time: {train_time}")
    print(f"Validation accuracy: {accuracy}")
    print(f"Validation precision: {precision}")
    print(f"Validation recall: {recall}")
    print(f"Validation F1-score: {f1}")

    fold += 1

  cv_eval_metrics = [list_cv_train_time, list_cv_accuracy,  list_cv_precision, list_cv_recall, list_cv_f1_score]

  # ---
  # Define KNN model
  model = KNeighborsClassifier(n_neighbors=N_NEIGHBORS) 

  # ---
  # Fit model to training set and evaluate the model on the test set
  model.fit(X_train, y_train)
  y_pred = model.predict(X_test)
  print("\nResults for predicting the test dataset:")
  print("=====================================================")
  print(classification_report(y_test, y_pred, target_names=CR_LETTERS, digits=6))
  print("=====================================================")

  # Print Confusion matrix of predicting the test set
  render_confusion_matrix(y_test, y_pred)

  return cv_eval_metrics

In [None]:
knn_cv_results = KNN_algorithm(X_train, X_test, y_train, y_test)

---
Support Vector Machine (SVM) (Supervised)

In [None]:
def SVM_algorithm(X_train, X_test, y_train, y_test):

  # Dataset is already split into train and test parts (80:20).

  # Normalize / scale the data (Preprocessing)
  scaler = MinMaxScaler()
  X_train = scaler.fit_transform(X_train)
  X_test = scaler.transform(X_test)
  
  # ---
  # Define some hyperparameters
  KERNEL = 'linear' # Default is 'rbf'
  C = 1.0 # Default
  
  # ---
  # Lists of performance metrics for training
  list_cv_train_time = []
  list_cv_accuracy = []
  list_cv_precision = []
  list_cv_recall = []
  list_cv_f1_score = []

  # ---
  # Stratified K-fold cross validation on the training set
  skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

  fold = 1
  for train_skf, val_skf in skf.split(X_train, y_train):
    print(f"Fold #{fold}")
    X_train_fold, y_train_fold = X_train[train_skf], y_train[train_skf]
    X_val_fold, y_val_fold = X_train[val_skf], y_train[val_skf]

    # Define SVM model (for cross validation)
    model = SVC(kernel=KERNEL, C=C)

    # Fit model
    start_time = time.time()
    model.fit(X_train_fold, y_train_fold)
    end_time = time.time()
    train_time = end_time - start_time

    y_pred = model.predict(X_val_fold)
    #y_pred = np.argmax(model.predict(X_val_fold), axis=-1)

    # Calculate performance metrics
    accuracy = accuracy_score(y_val_fold, y_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(y_val_fold, y_pred, average='weighted')

    list_cv_train_time.append(train_time)
    list_cv_accuracy.append(accuracy)
    list_cv_precision.append(precision)
    list_cv_recall.append(recall)
    list_cv_f1_score.append(f1)

    print(f"Training time: {train_time}")
    print(f"Validation accuracy: {accuracy}")
    print(f"Validation precision: {precision}")
    print(f"Validation recall: {recall}")
    print(f"Validation F1-score: {f1}")

    fold += 1

  cv_eval_metrics = [list_cv_train_time, list_cv_accuracy,  list_cv_precision, list_cv_recall, list_cv_f1_score]

  # ---
  # Define SVM model
  model = SVC(kernel=KERNEL, C=C) 

  # ---
  # Fit model to training set and evaluate the model on the test set
  model.fit(X_train, y_train)
  y_pred = model.predict(X_test)
  print("\nResults for predicting the test dataset:")
  print("=====================================================")
  print(classification_report(y_test, y_pred, target_names=CR_LETTERS, digits=6))
  print("=====================================================")

  # Print Confusion matrix of predicting the test set
  render_confusion_matrix(y_test, y_pred)

  return cv_eval_metrics

In [None]:
svm_cv_results = SVM_algorithm(X_train, X_test, y_train, y_test)

---
Convolutional Neural Network (CNN) (Deep Learning)

In [None]:
def CNN_algorithm(X_train, X_test, y_train, y_test):

  # Dataset is already split into train and test parts (80:20).

  # Normalize / scale the data (Preprocessing)
  X_train = X_train / 255
  X_test = X_test / 255

  # ---
  # Reshape (Preprocessing)
  X_train = X_train.values.reshape(-1,28,28,1)
  X_test = X_test.values.reshape(-1,28,28,1)

  # ---
  # Define some hyperparameters
  IMAGE_SIZE = (28,28,1)
  OUTPUT = 26 # Number of potential classes, 0-25, but 9(J) and 25(Z) are not used.
  KERNEL = (3,3)
  POOL_SIZE = (2,2)
  MAX_NEURONS = 120
  MAX_FILTERS = 64
  LOSS_FUNCTION = 'sparse_categorical_crossentropy'
  OPTIMIZER = 'adam'

  EPOCHS = 10
  BATCH_SIZE = 256

  # ---
  # Lists of performance metrics for training
  list_cv_train_time = []
  list_cv_accuracy = []
  list_cv_precision = []
  list_cv_recall = []
  list_cv_f1_score = []

  # ---
  # Stratified K-fold cross validation on the training set
  skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

  fold = 1
  for train_skf, val_skf in skf.split(X_train, y_train):
    print(f"Fold #{fold}")
    X_train_fold, y_train_fold = X_train[train_skf], y_train[train_skf]
    X_val_fold, y_val_fold = X_train[val_skf], y_train[val_skf]

    # Define and compile CNN model (for cross validation)
    model = Sequential([
      Conv2D(filters=(MAX_FILTERS/2), kernel_size=KERNEL, activation='relu', input_shape=IMAGE_SIZE),
      MaxPooling2D(pool_size=POOL_SIZE),
      Conv2D(filters=MAX_FILTERS, kernel_size=KERNEL, activation='relu'),
      MaxPooling2D(pool_size=POOL_SIZE),
      Flatten(),
      Dense(units=MAX_NEURONS, activation='relu'),
      Dense(units=(MAX_NEURONS/2), activation='relu'),
      Dense(units=26, activation='softmax')
    ])
    model.compile(optimizer=OPTIMIZER, loss=LOSS_FUNCTION, metrics=['accuracy'])

    # Fit model
    start_time = time.time()
    model.fit(X_train_fold, y_train_fold, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=1)
    end_time = time.time()
    train_time = end_time - start_time

    y_pred = np.argmax(model.predict(X_val_fold), axis=-1)

    # Calculate performance metrics
    _, accuracy = model.evaluate(X_train_fold, y_train_fold, verbose=0)
    precision, recall, f1, _ = precision_recall_fscore_support(y_val_fold, y_pred, average='weighted')

    list_cv_train_time.append(train_time)
    list_cv_accuracy.append(accuracy)
    list_cv_precision.append(precision)
    list_cv_recall.append(recall)
    list_cv_f1_score.append(f1)

    print(f"Training time: {train_time}")
    print(f"Validation accuracy: {accuracy}")
    print(f"Validation precision: {precision}")
    print(f"Validation recall: {recall}")
    print(f"Validation F1-score: {f1}")
    
    fold += 1

  cv_eval_metrics = [list_cv_train_time, list_cv_accuracy,  list_cv_precision, list_cv_recall, list_cv_f1_score]

  # Define and compile CNN model
  model = Sequential([
    Conv2D(filters=(MAX_FILTERS/2), kernel_size=KERNEL, activation='relu', input_shape=IMAGE_SIZE),
    MaxPooling2D(pool_size=POOL_SIZE),
    Conv2D(filters=MAX_FILTERS, kernel_size=KERNEL, activation='relu'),
    MaxPooling2D(pool_size=POOL_SIZE),
    Flatten(),
    Dense(units=MAX_NEURONS, activation='relu'),
    Dense(units=(MAX_NEURONS/2), activation='relu'),
    Dense(units=26, activation='softmax')
  ])
  model.compile(optimizer=OPTIMIZER, loss=LOSS_FUNCTION, metrics=['accuracy'])

  model.summary()

  # Fit model to training set and evaluate the model on the test set
  model.fit(X_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=1)
  y_pred = np.argmax(model.predict(X_test), axis=1)
  print("\nResults for predicting the test dataset:")
  print("=====================================================")
  print(classification_report(y_test, y_pred, target_names=CR_LETTERS, digits=6))
  print("=====================================================")

  # Print Confusion matrix of predicting the test set
  render_confusion_matrix(y_test, y_pred)

  return cv_eval_metrics

In [None]:
cnn_cv_results = CNN_algorithm(X_train, X_test, y_train, y_test)

In [None]:
def cv_tabulate(cv_results):
  HEADER = ["Fold", "K-NN", "SVM", "CNN"]

  cv_knn = cv_results[0]
  cv_svm = cv_results[1]
  cv_cnn = cv_results[2]

  dict_data={HEADER[1]:cv_knn, HEADER[2]:cv_svm, HEADER[3]:cv_cnn}
  df_data=pd.DataFrame(dict_data)

  df_data.index = np.arange(1, len(df_data) + 1)

  df_data[HEADER[0]]=df_data.index
  col_fold = df_data.pop(HEADER[0])
  df_data.insert(0, HEADER[0], col_fold)


  min_knn = min(cv_knn)
  min_svm = min(cv_svm)
  min_cnn = min(cv_cnn)

  max_knn = max(cv_knn)
  max_svm = max(cv_svm)
  max_cnn = max(cv_cnn)

  avg_knn = sum(cv_knn)/len(cv_knn)
  avg_svm = sum(cv_svm)/len(cv_svm)
  avg_cnn = sum(cv_cnn)/len(cv_cnn)

  stdev_knn = df_data[HEADER[1]].std()
  stdev_svm = df_data[HEADER[2]].std()
  stdev_cnn = df_data[HEADER[3]].std()

  df_avg_and_stdev = pd.DataFrame([
      ['-', '-', '-', '-'],
      ['min', min_knn, min_svm, min_cnn],
      ['max', max_knn, max_svm, max_cnn],
      ['-', '-', '-', '-'],
      ['avg', avg_knn, avg_svm, avg_cnn],
      ['stdev', stdev_knn, stdev_svm, stdev_cnn]],
      columns=HEADER)

  df_result = pd.concat([df_data, df_avg_and_stdev])

  print(tabulate(df_result, headers=HEADER, showindex=False, tablefmt="rst"))

In [None]:
def Friedman_and_Nemenyi_tests(cv_list, highest_is_best):
  cv_knn = cv_list[0]
  cv_svm = cv_list[1]
  cv_cnn = cv_list[2]

  ALPHA = 0.05

  # Friedman test
  statistic, pvalue = friedmanchisquare(cv_knn, cv_svm, cv_cnn)
  print(f"\nFriedman statistic (ratio): {statistic}")
  print(f"P-value: {pvalue}")
  print(f"Alpha: {ALPHA}")

  if pvalue < ALPHA:
    
    # ---
    # Nemenyi post-hoc test
    print("\nThe p-value is less than alpha (significance level).")
    print("The difference between some of the averages is statistically significant.")
    print("The null hypothesis (H0) is rejected!")
    df_nemenyi = posthoc_nemenyi_friedman(np.array([cv_knn, cv_svm, cv_cnn]).T)
    print("")
    print(posthoc_nemenyi_friedman(np.array([cv_knn, cv_svm, cv_cnn]).T))
    print("")

    # ---
    # P-values after pair-wise comparisons
    p_knn_svm = df_nemenyi[0].iloc[1]
    p_knn_cnn = df_nemenyi[0].iloc[2]
    p_svm_cnn = df_nemenyi[1].iloc[2]

    if highest_is_best == True:
      if p_knn_svm < ALPHA:
        print("The algorithm SVM performs significantly better than K-NN!")

      if p_knn_cnn < ALPHA:
        print("The algorithm CNN performs significantly better than K-NN!")

      if p_svm_cnn < ALPHA:
        print("The algorithm CNN performs significantly better than SVM!")

    else: # highest_is_best == False
      if p_knn_svm < ALPHA:
        print("The algorithm K-NN performs significantly better than SVM!")

      if p_knn_cnn < ALPHA:
        print("The algorithm K-NN performs significantly better than CNN!")

      if p_svm_cnn < ALPHA:
        print("The algorithm SVM performs significantly better than CNN!")

  else:
    print("\nThe p-value is greater than alpha (significance level).")
    print("The difference between the averages are NOT statistically signifiant.")

---
Cross-validation followed by Friedman and Nemenyi tests for each performance metric:

In [None]:
# Training time
training_time_knn = knn_cv_results[0]
training_time_svm = svm_cv_results[0]
training_time_cnn = cnn_cv_results[0]
cv_training_time = [training_time_knn, training_time_svm, training_time_cnn]

print("Cross-validation results of computational performance in terms of training time:")
cv_tabulate(cv_training_time)

Friedman_and_Nemenyi_tests(cv_training_time, False)

In [None]:
# Accuracy score
accuracy_knn = knn_cv_results[1]
accuracy_svm = svm_cv_results[1]
accuracy_cnn = cnn_cv_results[1]
cv_accuracy = [accuracy_knn, accuracy_svm, accuracy_cnn]

print("Cross-validation results of predictive performance based on accuracy:")
cv_tabulate(cv_accuracy)

Friedman_and_Nemenyi_tests(cv_accuracy, True)

In [None]:
# Precision score
precision_knn = knn_cv_results[2]
precision_svm = svm_cv_results[2]
precision_cnn = cnn_cv_results[2]
cv_precision = [precision_knn, precision_svm, precision_cnn]

print("Cross-validation results of predictive performance based on precision:")
cv_tabulate(cv_precision)


Friedman_and_Nemenyi_tests(cv_precision, True)

In [None]:
# Recall score
recall_knn = knn_cv_results[3]
recall_svm = svm_cv_results[3]
recall_cnn = cnn_cv_results[3]
cv_recall = [recall_knn, recall_svm, recall_cnn]
print("Cross-validation results of predictive performance based on recall:")
cv_tabulate(cv_recall)

Friedman_and_Nemenyi_tests(cv_recall, True)

In [None]:
# F1-score / F-Measure
f1_score_knn = knn_cv_results[4]
f1_score_svm = svm_cv_results[4]
f1_score_cnn = cnn_cv_results[4]
cv_f1_score = [f1_score_knn, f1_score_svm, f1_score_cnn]
print("Cross-validation results of predictive performance based on F1-score / F-measure:")
cv_tabulate(cv_f1_score)

Friedman_and_Nemenyi_tests(cv_f1_score, True)