In [None]:
import random

import numpy as np
import cv2
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, log_loss, confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import StandardScaler

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator


In [None]:
X_raw = np.load("dataset/X.npy")

y_raw = np.load("dataset/Y.npy")
# transform the y into a (m, 1) matrix
y_raw = np.argmax(y_raw,axis=1).reshape(y_raw.shape[0], 1)

n_classes = np.unique(y_raw).size

print("X shape is ", X_raw.shape, "", sep="")
print("y shape is ", y_raw.shape, "", sep="")
print("Number of classes is", n_classes)


In [None]:
def pltImage(img, grid=None, title=None, figsize=(5,5)):
    # Plots one or multiple images
    #  - one image if grid is none
    #  - a grid of images if grid is not null, it must be a tuple (y, x)

    if grid == None:
        plt.imshow(img, cmap='gray')
        if title:
            plt.title(title)
        plt.axis('off')
    else:
        if grid[0]*grid[1] < len(img):
            raise ValueError('Insufficient grid size')
        fig, axes = plt.subplots(grid[0], grid[1], figsize=figsize)
        ax = axes.flat
        for i in range(len(img)):
            ax[i].imshow(img[i], cmap='gray', vmin=0, vmax=1)
            ax[i].axis('off')
            if title:
                ax[i].set_title(title[i])
        plt.tight_layout()

    plt.show()


# test func pltImage
pltImage(X_raw[0], title='X_raw[0]')
pltImage(X_raw[0:10], grid=(2,5), figsize=(10,5), title=[f"Raw {i}" for i in range(10)])

In [None]:
# augment raw data
def augment(X, y, rotation = [0, 0], scaling = [1, 1], mult=2):
    # augments a image by getting random mutations of it
    # the number of mutations is given by mult*mult

    (h, w) = X[0].shape[:2]
    center = (w // 2, h // 2)
    new_X = []
    new_y = []

    for _ in range(mult): # angle
        for _ in range(mult): # scale
            for i in range(X.shape[0]):
                M = cv2.getRotationMatrix2D(center, random.uniform(*rotation), random.uniform(*scaling))
                transformed = cv2.warpAffine(X[i], M, (w, h), borderMode=cv2.BORDER_REPLICATE)
                new_X.append(transformed)
                new_y.append(y[i])

    return np.array(new_X), np.array(new_y)

# test augment (run multiple times for different outputs)
X, y = augment(X_raw[:1], y_raw[:1], rotation=[-45, 45], scaling=[0.5, 1.5], mult=3)
pltImage(X, grid=(3,3), figsize=(5,5))

In [None]:
# transform raw data into edges
def get_edges(X):
    X_edges = (X*255.0).astype(np.uint8)
    for i in range(len(X_edges)):
        X_edges[i] = cv2.GaussianBlur(X_edges[i], (5, 5), 0)
        X_edges[i] = cv2.Canny(X_edges[i], 50, 150)
    return X_edges

# test func edges
X = get_edges(X_raw[0:5])
pltImage([x for pair in zip(X_raw[0:5], X) for x in pair], grid=(5, 2), figsize=(5,10))

In [None]:
# divide X by corresponding y class
def splitXbyClass(X, y):
    new_X = [[] for i in range(np.unique(y).size)] # create array of arrays of size 10
    for i in range(X.shape[0]):
        _y = y[i][0]
        new_X[_y].append(X[i])
    return new_X

# test the function
X = splitXbyClass(X_raw, y_raw)
imgs = []
for i in range(10):
    print(f"Class {i} has {len(X[i])} images")
    imgs.append(X[i][0])
pltImage(imgs, grid=(2,5), title=[str(i) for i in range(10)], figsize=(10,5))



In [None]:
def create_train_test_split(X_classes, train_ratio=0.8):
    X_train, y_train, X_test, y_test = [], [], [], []

    for i, c in enumerate(X_classes):
        size = int(len(c)*train_ratio)
        X_train.extend(c[0:size])
        y_train.extend([i] * size)
        X_test.extend(c[size:])
        y_test.extend([i] * (len(c) - size))

    # make sure y matrices are (n, 1)
    y_train = np.array(y_train)
    y_test = np.array(y_test)
    y_train = y_train.reshape(y_train.shape[0], 1)
    y_test = y_test.reshape(y_test.shape[0], 1)

    return np.array(X_train), y_train, np.array(X_test), y_test

In [None]:
def reshape(a):
    return a.reshape(a.shape[0], -1)

In [None]:
# Implementation of Logistic Regression by hand
def sigmoid(x):
    epsilon = 1.0e-15
    x = np.clip(x, -500, 500)
    return np.clip(1/(1+np.exp(-x)), 0 + epsilon,  1 - epsilon)
def logisticRegression(X, y, num_iterations, learning_rate):
    m, n = X.shape
    theta = np.zeros((n, 1))
    b = 0
    cost_history = []
    for i in range(num_iterations):
        h = sigmoid(np.dot(X, theta)) # forward pass
        cost = 1/m * np.sum(-y * np.log(h) - (1 - y) * np.log(1 - h)) # cost calculation
        grad = (1/m) * np.dot(X.T, (h - y)) # backwards pass
        theta -= learning_rate * grad # update weights and bias
        cost_history += [cost]
        # print("\rCost (",i,"):", cost_history[-1], end="")
    return theta, cost_history
def logisticRegressionReg(X, y, num_iterations, learning_rate, reg_strength):
    m, n = X.shape
    theta = np.zeros((n, 1))
    b = 0
    cost_history = []
    for i in range(num_iterations):
        # forward pass
        h = sigmoid(np.dot(X, theta) + b) # forward pass
        # cost calculation with regularization
        reg_term = (reg_strength / (2*m)) * np.sum(np.square(theta))
        cost = 1/m * np.sum(-y * np.log(h) - (1 - y) * np.log(1 - h)) + reg_term # cost calculation
        # backward pass
        grad = (1/m) * np.dot(X.T, (h - y)) + (reg_strength/m)*theta # backwards pass
        db = (1/m) * np.sum(h - y)
        # update weight and bias
        theta -= learning_rate * grad # update weights and bias
        b -= learning_rate * db
        # track cost history
        cost_history += [cost]
        # print("\rCost (",i,"):", cost_history[-1], end="")
    return theta, cost_history
def oneVsAllLogisticRegression(X, y, num_iterations, learning_rate, regularization_rate=0):
    m, n = X.shape
    k = (np.min(y), np.max(y))
    # print(k)
    all_theta = []
    all_cost = []
    for i in range(k[0], k[1]+1):
        # print("K",i)
        if regularization_rate==0:
            theta, cost_history = logisticRegression(X, np.where(y == i, 1, 0), num_iterations, learning_rate)
        else:
            theta, cost_history = logisticRegressionReg(X, np.where(y == i, 1, 0), num_iterations, learning_rate, regularization_rate)
        all_theta.append(theta)
        all_cost.extend(cost_history)
        # print("")
    return all_theta, all_cost
def predictOneVsAllLogisticRegression(all_theta, X):
    predictions = np.dot(X, all_theta)
    return np.argmax(predictions, axis=1)

In [None]:
# Test 1: Custom Logistic Regression implementation (With raw data)
X_train, y_train, X_test, y_test = create_train_test_split(splitXbyClass(X_raw, y_raw), train_ratio=0.8)
lr_all_theta1, lr_all_cost1 = oneVsAllLogisticRegression(reshape(X_train), y_train,  1000, 0.01, 0)

predict = predictOneVsAllLogisticRegression(lr_all_theta1, reshape(X_train))
print("Test 1 Pred Training: ", sum(predict==y_train)[0]/len(y_train)*100, "%")
predict = predictOneVsAllLogisticRegression(lr_all_theta1, reshape(X_test))
print("Test 1 Pred Test: ", sum(predict==y_test)[0]/len(y_test)*100, "%")
plt.plot(lr_all_cost1)
plt.show()

# Test 2: Custom Logistic Regression implementation (With edges data)
X_train, y_train, X_test, y_test = create_train_test_split(splitXbyClass(get_edges(X_raw), y_raw), train_ratio=0.8)
lr_all_theta2, lr_all_cost2 = oneVsAllLogisticRegression(reshape(X_train), y_train,  1000, 0.01, 0.1)

predict = predictOneVsAllLogisticRegression(lr_all_theta2, reshape(X_train))
print("Test 2 Pred Training: ", sum(predict==y_train)[0]/len(y_train)*100, "%")
predict = predictOneVsAllLogisticRegression(lr_all_theta2, reshape(X_test))
print("Test 2 Pred Test: ", sum(predict==y_test)[0]/len(y_test)*100, "%")
plt.plot(lr_all_cost2)
plt.show()

In [None]:
# Basic Logistic Regression (With raw data)
X_train, y_train, X_test, y_test = create_train_test_split(splitXbyClass(X_raw, y_raw), train_ratio=0.9)
model = LogisticRegression(max_iter=1000)
model.fit(reshape(X_train), y_train.ravel())

# accuracy analises
y_pred = model.predict(reshape(X_train))
a = accuracy_score(y_train.ravel(), y_pred)
print(f"Accuracy on training: {a * 100:.2f}%")
y_pred = model.predict(reshape(X_test))
a = accuracy_score(y_test.ravel(), y_pred)
print(f"Accuracy on test: {a * 100:.2f}%")

In [None]:
# Basic Logistic Regression (With edge detection)
X = get_edges(X_raw)
X_train, y_train, X_test, y_test = create_train_test_split(splitXbyClass(X, y_raw), train_ratio=0.9)
model = LogisticRegression(max_iter=1000)
model.fit(reshape(X_train), y_train.ravel())

# accuracy analises
y_pred = model.predict(reshape(X_train))
a = accuracy_score(y_train.ravel(), y_pred)
print(f"Accuracy on training: {a * 100:.2f}%")
y_pred = model.predict(reshape(X_test))
a = accuracy_score(y_test.ravel(), y_pred)
print(f"Accuracy on test: {a * 100:.2f}%")

In [None]:
# Basic Logistic Regression (With augmentation)
X, y = augment(X_raw, y_raw, [-10, 10], [0.8, 1.2], 2)
X_train, y_train, X_test, y_test = create_train_test_split(splitXbyClass(X, y), train_ratio=0.9)
model = LogisticRegression(max_iter=1000)
model.fit(reshape(X_train), y_train.ravel())

# accuracy analises
y_pred = model.predict(reshape(X_train))
a = accuracy_score(y_train.ravel(), y_pred)
print(f"Accuracy on training: {a * 100:.2f}%")
y_pred = model.predict(reshape(X_test))
a = accuracy_score(y_test.ravel(), y_pred)
print(f"Accuracy on test: {a * 100:.2f}%")

In [None]:
# Basic Logistic Regression (With edge detection and augmentation)
X, y = augment(X_raw, y_raw, [-10, 10], [0.8, 1.2], 3)
X = get_edges(X)
X_train, y_train, X_test, y_test = create_train_test_split(splitXbyClass(X, y), train_ratio=0.9)
model = LogisticRegression(max_iter=1000, C=0.1, solver='lbfgs')
model.fit(reshape(X_train), y_train.ravel())

# accuracy analises
y_pred = model.predict(reshape(X_train))
a = accuracy_score(y_train.ravel(), y_pred)
print(f"Accuracy on training: {a * 100:.2f}%")
y_pred = model.predict(reshape(X_test))
a = accuracy_score(y_test.ravel(), y_pred)
print(f"Accuracy on test: {a * 100:.2f}%")

In [None]:
# Grid search
param_grid = {
    'C': [0.001, 0.01, 0.1, 1, 10],
    'solver': ['lbfgs']
}
model = LogisticRegression(max_iter=100)
grid_model = GridSearchCV(estimator=model, cv=5, scoring='accuracy', param_grid=param_grid)

X_train, y_train, X_test, y_test = create_train_test_split(splitXbyClass(X_raw, y_raw), train_ratio=0.9)
grid_model.fit(X_train.reshape(X_train.shape[0], -1), y_train.ravel())

# Best parameters and best cross-validated score
print("Best parameters:", grid_model.best_params_)
print("Best cross-validated score:", grid_model.best_score_)

# Test set evaluation
best_model = grid_model.best_estimator_
y_test_pred = best_model.predict(reshape(X_test))
test_accuracy = accuracy_score(y_test, y_test_pred)
print("Test set accuracy:", test_accuracy)

results = grid_model.cv_results_
plt.plot(param_grid['C'], grid_model.cv_results_['mean_test_score'])
plt.plot(param_grid['C'], grid_model.cv_results_['mean_test_score'])
plt.xscale('log')
plt.xlabel('C (Inverse of Regularization Strength)')
plt.ylabel('Mean Cross-Validated Accuracy')
plt.title('Grid Search Results for Logistic Regression')

# Evaluate
train_accuracy = grid_model.score(X_train.reshape(X_train.shape[0], -1), y_train)
test_accuracy = grid_model.score(X_test.reshape(X_test.shape[0], -1), y_test)
print("\nTrain Accuracy:", round(train_accuracy, 4), "%")
print("Test Accuracy:", round(test_accuracy, 4), "%")

In [None]:
# Confusion Matrix of the best Logistic Regression found
y_test_pred = best_model.predict(reshape(X_test))
cm = confusion_matrix(y_test.ravel(), y_test_pred, normalize='true')
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=range(10))
disp.plot(cmap=plt.cm.Blues, values_format=".0%")
plt.show()


# Using TensorFow/Keras

In [None]:
# No hidden layers, similar to logistic regression
X_train, y_train, X_temp, y_temp = create_train_test_split(splitXbyClass(X_raw, y_raw), train_ratio=0.8)
X_val, y_val, X_test, y_test = create_train_test_split(splitXbyClass(X_temp, y_temp), train_ratio=0.5)
model = Sequential([
    Dense(10, activation='softmax')  # Output layer for multi-class classification
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

history = model.fit(reshape(X_train), y_train, epochs=500, batch_size=32, validation_data=(reshape(X_val), y_val))

loss, accuracy = model.evaluate(reshape(X_train), y_train)
print(f"Train Accuracy: {accuracy * 100:.2f}%")
loss, accuracy = model.evaluate(reshape(X_test), y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

loss = history.history['loss']
val_loss = history.history.get('val_loss')  # Will exist if validation data is used
# Plot the loss
plt.figure(figsize=(8, 6))
plt.plot(loss, label='Training Loss')

if val_loss:
    plt.plot(val_loss, label='Validation Loss')
plt.title('Loss Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# Simple nn with two hidden layers
X_train, y_train, X_temp, y_temp = create_train_test_split(splitXbyClass(X_raw, y_raw), train_ratio=0.8)
X_val, y_val, X_test, y_test = create_train_test_split(splitXbyClass(X_temp, y_temp), train_ratio=0.5)
model = Sequential([
    Dense(128, activation='relu'),  # 
    Dense(64, activation='relu'),  # 
    Dense(10, activation='softmax')  # Output layer for multi-class classification
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

history = model.fit(reshape(X_train), y_train, epochs=100, batch_size=32, validation_data=(reshape(X_val), y_val))

loss, accuracy = model.evaluate(reshape(X_train), y_train)
print(f"Train Accuracy: {accuracy * 100:.2f}%")
loss, accuracy = model.evaluate(reshape(X_test), y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

loss = history.history['loss']
val_loss = history.history.get('val_loss')  # Will exist if validation data is used
# Plot the loss
plt.figure(figsize=(8, 6))
plt.plot(loss, label='Training Loss')

if val_loss:
    plt.plot(val_loss, label='Validation Loss')
plt.title('Loss Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# Two hidden layer with real-time data augmentation
# (80%, 10%, 10%)
tf.keras.backend.clear_session()

X_train, y_train, X_temp, y_temp = create_train_test_split(splitXbyClass(X_raw, y_raw), train_ratio=0.8)
X_val, y_val, X_test, y_test = create_train_test_split(splitXbyClass(X_temp, y_temp), train_ratio=0.5)
X_train = X_train.reshape(X_train.shape[0], 64, 64, 1)
X_val = X_val.reshape(X_val.shape[0], 64, 64, 1)
X_test = X_test.reshape(X_test.shape[0], 64, 64, 1)

model = Sequential([
    Input(shape=(64, 64, 1)),
    Flatten(),
    Dense(128, activation='relu'),  # Hidden layer with 128 neurons
    Dense(n_classes, activation='softmax')  # Output layer for multi-class classification
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# less randomness because of model simplicity
datagen = ImageDataGenerator(
    rotation_range=15,         # Randomly rotate images
    # width_shift_range=0.1,     # Randomly shift images horizontally
    # height_shift_range=0.1,    # Randomly shift images vertically
    # shear_range=0.2,           # Shear transformations
    zoom_range=0.2,            # Random zoom
    fill_mode='nearest'        # Fill in pixels after transformations
)
history = model.fit(
    datagen.flow(X_train, y_train, batch_size=32),
    epochs=100,
    validation_data=(X_val, y_val)
)

loss, accuracy = model.evaluate(X_train, y_train)
print(f"Train Accuracy: {accuracy * 100:.2f}%")
loss, accuracy = model.evaluate(X_train, y_train)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

loss = history.history['loss']
val_loss = history.history.get('val_loss')  # Will exist if validation data is used
# Plot the loss
plt.figure(figsize=(8, 6))
plt.plot(loss, label='Training Loss')

if val_loss:
    plt.plot(val_loss, label='Validation Loss')
plt.title('Loss Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Complex model (CNN architecture) with real-time data augmentation
# (80%, 10%, 10%)
X_train, y_train, X_temp, y_temp = create_train_test_split(splitXbyClass(X_raw, y_raw), train_ratio=0.8)
X_val, y_val, X_test, y_test = create_train_test_split(splitXbyClass(X_temp, y_temp), train_ratio=0.5)
X_train = X_train.reshape(X_train.shape[0], 64, 64, 1)
X_val = X_val.reshape(X_val.shape[0], 64, 64, 1)
X_test = X_test.reshape(X_test.shape[0], 64, 64, 1)

datagen = None
datagen = ImageDataGenerator(
    rotation_range=10,         # More aggressive rotation for varied gestures
    width_shift_range=0.1,     # Increased shift for robust spatial understanding
    height_shift_range=0.1,
    shear_range=0.1,           # More aggressive shear
    zoom_range=0.25,            # Increased zoom for scalability
    fill_mode='nearest'
)


model = Sequential([
    # First Convolutional Layer
    Conv2D(32, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Dropout(0.2),

    # Second Convolutional Layer
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Dropout(0.2),

    # Flatten the 2D outputs to 1D for the Dense layers
    Flatten(),

    # Fully Connected Layer
    Dense(128, activation='relu'),
    Dropout(0.5),

    # Output Layer for 10 classes
    Dense(10, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(
    datagen.flow(X_train, y_train, batch_size=32),
    validation_data=(X_val, y_val),
    epochs=100)



In [None]:

# Evaluate the model
loss, accuracy = model.evaluate(X_train, y_train)
print(f"Train Accuracy: {accuracy * 100:.2f}%")
# Evaluate the model
loss, accuracy = model.evaluate(X_val, y_val)
print(f"Val Accuracy: {accuracy * 100:.2f}%")
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

loss = history.history['loss']
val_loss = history.history.get('val_loss')  # Will exist if validation data is used
# Plot the loss
plt.figure(figsize=(8, 6))
plt.plot(loss, label='Training Loss')

if val_loss:
    plt.plot(val_loss, label='Validation Loss')
plt.title('Loss Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

# Confusion Matrix of the best Logistic Regression found
y_test_pred = model.predict(X_test)
y_test_pred = np.argmax(y_test_pred, axis=1)
cm = confusion_matrix(y_test.ravel(), y_test_pred, normalize='true')
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=range(10))
disp.plot(cmap=plt.cm.Blues, values_format=".0%")
plt.show()

# print miss labeled images
misclassified_indices = np.where(y_test.ravel() != y_test_pred)
for i in misclassified_indices[0]:
    pltImage(X_test[i], title=str(y_test_pred[i]) + " != " + str(y_test.ravel()[i]))

In [None]:
# miss predicted images
X_train, y_train, X_test, y_test = create_train_test_split(splitXbyClass(X_raw, y_raw), train_ratio=0.8)

a = splitXbyClass(X_raw, y_raw)
pltImage([i[0] for i in a], grid=(1,10), figsize=(10,10))

a = splitXbyClass(X_train, y_train)
pltImage([i[0] for i in a], grid=(1,10), figsize=(10,10))

a = splitXbyClass(X_test, y_test)
pltImage([i[0] for i in a], grid=(1,10), figsize=(10,10))