# Comparison of Various Machine Learning Models for Handwritten Character Recognition
This is our Jupiter Notebook run the code we used to produce results step-by-step.

Import necessary helper functions.

In [1]:
import pandas as pd
from torch import load as torch_load
from testing_models import evaluate_model, test_model_nn
from preprocess import get_data
from classifiers import *
from nn import *
from joblib import dump, load
from skopt import BayesSearchCV
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
from skopt.space import Integer
from sklearn import svm
from torch.optim.lr_scheduler import ReduceLROnPlateau

# 1 Data
We used data from the Alpha_Num dataset on kaggle. It contains over 108000 images of handwritten characters. Each image is approximately 28x28 pixels and is in gray-scale. However, before we can train the models we preprocessed them to ensure each image had the same features:
* 28x28 pixels: any image less than 28x28 was padded
* Gray-Scale

For more information on getting data please refer to **preprocess.py**

In [2]:
X_train, y_train = get_data("train", "ascii_file_counts.csv")
X_test, y_test = get_data("test", "ascii_file_counts.csv")

# 2 Training Traditional Machine Learning Models
In this section we will train (or load) and show a brief testing of the following models:
* XGBoost
* Random Forest
* K-Nearest Neighbors

The actual results and comparision of models will be done after this section where models are trained (or loaded)

## 2.1 Training Random Forest

In [3]:
RF_model = RandomForestClassifier()

param_space = {
    'n_estimators': (10, 500),  # Number of trees
    'max_depth': (1, 100),  # Maximum depth
    'min_samples_split': (2, 20),  # Minimum number of samples to split
    'min_samples_leaf': (1, 20),  # Minimum number of samples to be leaf
    'max_features': ['sqrt', 'log2', None],  # Features to consider
    'criterion': ['gini', 'entropy', 'log_loss'],  # Measure for split quality
    'class_weight': ['balanced', 'balanced_subsample', None],  # Class weights for handling imbalances
}

bayes_opt = BayesSearchCV(
    estimator=RF_model,
    search_spaces=param_space,
    n_iter=30,
    cv=5,       # 5-folds
    scoring='neg_mean_squared_error',  # Objective function to minimize MSE
    n_jobs=-1
)

We offer 2 methods to get the Random Forest Model. We trained the model using the code block with the training loop. However, this takes time, so if you want you can directly train the model with the hyperparameters we have used in the second code block and load the output of the Bayesian Optimizer from the csv file we provide to be used later. We would like to provide the already trained model like we will do for other models but unfortunately the size is too large for GitHub. Please do not run both the blocks that come after this.

In [None]:
bayes_opt.fit(X_train, y_train)
RF_model = bayes_opt.best_estimator_
RF_bayes_df = pd.DataFrame(bayes_opt.cv_results_)

In [5]:
RF_Model = RandomForestClassifier(class_weight = 'balanced', criterion='log_loss', max_depth=79, max_features='sqrt', min_samples_leaf=1, min_samples_split=19, n_estimators=484)
RF_bayes_df = pd.read_csv('RF_bayes_df.csv')

RF_Model.fit(X_train, y_train)

KeyboardInterrupt: 

In [None]:
y_pred = RF_Model.predict(X_test)

f1, acc, cm, prec, recall = evaluate_model(y_test, y_pred)

print(f"Random Forest: \n")
print(f"F1 Score: {f1}")
print(f"Accuracy: {acc}")
print(f"Precision: {prec}")
print(f"Recall: {recall}")
print(f"Confusion Matrix: \n{cm}")

## 2.2 Training XGBoost

In [None]:
XG_model = XGBClassifier(objective='multi:softprob',num_class=93,booster='gbtree',eval_metric= 'mlogloss')

param_space = {
    'n_estimators': Integer(50, 300),        # Number of trees
    'max_depth': Integer(3, 30),             # Depth of each tree     
}

bayes_opt = BayesSearchCV(
    estimator=XG_model,
    search_spaces=param_space,
    n_iter=30,
    cv=5,       # 5-fold cross-validation
    scoring='neg_mean_squared_error',  # Objective function: MSE
    n_jobs=-1,
)

We again provide 2 ways to get the model. You can choose to train using the first block or load the model we have already trained using the second block.

In [None]:
bayes_opt.fit(X_train, y_train)
XG_model = bayes_opt.best_estimator_
XG_bayes_df = pd.DataFrame(bayes_opt.cv_results_)

Loading trained model code

In [None]:
XG_model = load('xgboost.joblib')

In [None]:
y_pred = XG_model.predict(X_test)
f1, acc, cm, prec, recall = evaluate_model(y_test, y_pred)

print(f"XGBoost: \n")
print(f"F1 Score: {f1}")
print(f"Accuracy: {acc}")
print(f"Precision: {prec}")
print(f"Recall: {recall}")
print(f"Confusion Matrix: \n{cm}")

## 2.3 Training KNN

In [None]:
KNN_model = KNeighborsClassifier(weights="distance")

param_space = {
    'n_neighbors': Integer(1,200)      # Minimum samples per leaf
}

bayes_opt = BayesSearchCV(
    estimator= KNN_model,
    search_spaces=param_space,
    n_iter=20, 
    cv=5,    
    scoring='neg_mean_squared_error',
    n_jobs=-1
)

Training code

In [None]:
bayes_opt.fit(X_train, y_train)
KNN_model = bayes_opt.best_estimator_
KNN_bayes_df = pd.DataFrame(bayes_opt.cv_results_)

Loading trained model code, the KNN model does not train weights so individual training is possible in a short time but we still provide this option as the Bayesian Optimizer takes time.

In [None]:
KNN_model = load('knn_model.pkl')

In [None]:
y_pred = KNN_model.predict(X_test)
f1, acc, cm, prec, recall = evaluate_model(y_test, X_test)

print(f"KNN: \n")
print(f"F1 Score: {f1}")
print(f"Accuracy: {acc}")
print(f"Precision: {prec}")
print(f"Recall: {recall}")
print(f"Confusion Matrix: \n{cm}")

## 2.4 Training SVM

In [None]:
SVM_model = svm.SVC()

param_space = {
    'gamma': (1e-4, 1e+1, 'log-uniform'),
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'degree': (2, 5),
    'coef0': (-10, 10)
}

bayes_opt = BayesSearchCV(
    estimator= SVM_model,
    search_spaces=param_space,
    n_iter=20, 
    cv=5,    
    scoring='neg_mean_squared_error',
    n_jobs=-1,
    verbose=3
)

Training code

In [None]:
bayes_opt.fit(X_train, y_train)

Loading trained model code

In [None]:
SVM_model = load('SVM.joblib')
SVM_bayes_df = pd.read_csv('SVM_bayes.csv')

In [None]:
f1, acc, cm, prec, recall = evaluate_model(SVM_model, X_test, y_test)

print(f"SVM: \n")
print(f"F1 Score: {f1}")
print(f"Accuracy: {acc}")
print(f"Precision: {prec}")
print(f"Recall: {recall}")
print(f"Confusion Matrix: \n{cm}")

# 3 Training Neural Network Models
In this section we will train (or load) and show a brief testing of the following models:
* Feed Forward Neural Network
* CNN (Convolutional Neural Network )
* CNN + LSTM (CNN with an LSTM (Long-Short Term Memort) layer)
* Transformer (With CNN features)

The details of each model and the PyTorch implementation as well as the training loop details can be found in the **nn.py** file.

## Initializing Data

In [None]:
input_size = 28 * 28
num_classes = 93
learning_rate = 0.001
num_epochs = 20
batch_size = 64

dataset = AlphaNumDataset(csv_dir="ascii_file_counts.csv", data_dir="train")
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
dataset_test = AlphaNumDataset(csv_dir="ascii_file_counts.csv", data_dir="test")
data_loader_test = DataLoader(dataset_test, batch_size=batch_size, shuffle=True)
val_dataset = AlphaNumDataset(csv_dir="ascii_file_counts.csv", data_dir="validation")
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

## 3.1 Training Feed Forward Neural Network

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
FF_model = FeedForwardNN(input_size=input_size, num_classes=num_classes, hidden_size=288).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(FF_model.parameters(), lr=learning_rate)
scheduler = ReduceLROnPlateau(optimizer, mode = 'min', factor = .2, patience =10)
best_val_loss = float('inf')
patience = 3
counter = 0

Training

In [None]:
train_model(num_epochs, data_loader, val_loader, device, FF_model, criterion, optimizer, scheduler, patience)

Loading trained model

In [None]:
FF_model = load('FeedForward.pth')

In [None]:
f1, acc, prec, recall = test_model_nn(FF_model, data_loader_test, device, criterion)

print(f"Feed Forward Neural Network: \n")
print(f"F1 Score (Weighted Among Classes): {f1}")
print(f"Accuracy: {acc}")
print(f"Precision (Weighted Among Classes): {prec}")
print(f"Recalln (Weighted Among Classes): {recall}")

In [None]:
# Create Plots
XGBoost_Estimators_Plots(estimators, f1_list_estimators, acc_list_estimators, prec_list_estimators, recall_list_estimators)
XGBoost_Depth_Plots(depth, f1_list_depth, acc_list_depth, prec_list_depth, recall_list_depth)
  

In [None]:
# Random Forest Train
estimators = range(20,100, 10)

f1_list_estimators = []
acc_list_estimators = []
prec_list_estimators =[]
recall_list_estimators = []

for estimator_count in estimators:
   model = train_random_forest(X_train, y_train, n_estimators = estimator_count)
   y_pred = model.predict(X_test)
   f1, acc, cm, prec, recall = evaluate_model(model, y_test, y_pred)
   f1_list_estimators.append(f1)
   acc_list_estimators.append(acc)
   prec_list_estimators.append(prec)
   recall_list_estimators.append(recall)

In [None]:
RandomForest_plots(depth, f1_list_estimators, acc_list_estimators, prec_list_estimators, recall_list_estimators)
   

In [None]:
#KNN Forest Train
neighbors = range(5, 100, 5)

f1_list_neighbors = []
acc_list_neighbors = []
prec_list_neighbors =[]
recall_list_neighbors = []

for neighbor in neighbors:
   model = train_knn(X_train, y_train, n_neighbors=neighbor)
   y_pred = model.predict(X_test)
   f1, acc, cm, prec, recall = evaluate_model(model, y_test, y_pred)
   f1_list_neighbors.append(f1)
   acc_list_neighbors.append(acc)
   prec_list_neighbors.append(prec)
   recall_list_neighbors.append(recall)

In [None]:
KNN_Plots(depth, f1_list_neighbors, acc_list_neighbors, prec_list_neighbors, recall_list_neighbors)  

In [None]:
#SVM Train
kernels = ['poly', 'rbf', 'linear', 'sigmoid', 'precomputed']

f1_list_kernel = []
acc_list_kernel = []
prec_list_kernel =[]
recall_list_kernel = []

for kernel in kernels:
   model = train_knn(X_train, y_train, kernel = kernel)
   y_pred = model.predict(X_test)
   f1, acc, cm, prec, recall = evaluate_model(model, y_test, y_pred)
   f1_list_kernel.append(f1)
   acc_list_kernel.append(acc)
   prec_list_kernel.append(prec)
   recall_list_kernel.append(recall)


In [None]:
SVM_plot(kernels, f1_list_kernel, acc_list_kernel, prec_list_kernel, recall_list_kernel)

In [3]:
f1_list = []
acc_list = []
prec_list = []
recall_list = []

In [None]:
test = torch.load("FeedForward.pth")
dataset_test = AlphaNumDataset(csv_dir="ascii_file_counts.csv", data_dir="test")
data_loader_test = DataLoader(dataset_test, batch_size=64, shuffle=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.CrossEntropyLoss()  
f1,acc,prec,recall =test_model_nn(test,data_loader_test, device, criterion)

In [5]:
f1_list.append(f1)
acc_list.append(acc)
prec_list.append(prec)
recall_list.append(recall)

In [None]:
test = torch.load("cnn.pth")
dataset_test = AlphaNumDataset(csv_dir="ascii_file_counts.csv", data_dir="test")
data_loader_test = DataLoader(dataset_test, batch_size=64, shuffle=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.CrossEntropyLoss()  
f1,acc,prec,recall = test_model_nn(test,data_loader_test, device, criterion)

In [7]:
f1_list.append(f1)
acc_list.append(acc)
prec_list.append(prec)
recall_list.append(recall)

In [None]:
test = torch.load("cnn_lstm.pth")
dataset_test = AlphaNumDataset(csv_dir="ascii_file_counts.csv", data_dir="test")
data_loader_test = DataLoader(dataset_test, batch_size=64, shuffle=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.CrossEntropyLoss()  
f1,acc,prec,recall = test_model_nn(test,data_loader_test, device, criterion)

In [9]:
f1_list.append(f1)
acc_list.append(acc)
prec_list.append(prec)
recall_list.append(recall)

In [None]:
test = torch.load("transformer.pth")
dataset_test = AlphaNumDataset(csv_dir="ascii_file_counts.csv", data_dir="test")
data_loader_test = DataLoader(dataset_test, batch_size=64, shuffle=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.CrossEntropyLoss()  
f1,acc,prec,recall = test_model_nn(test,data_loader_test, device, criterion)

In [11]:
f1_list.append(f1)
acc_list.append(acc)
prec_list.append(prec)
recall_list.append(recall)

In [None]:
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator


fig, axes = plt.subplots(2, 2, figsize=(12, 8))

models = ['Feed Forward', 'CNN', 'CNN-LSTM', 'Transformer']
colors = ['red', 'blue', 'green', 'purple']

axes[0, 0].bar(models, f1_list, color=colors, alpha=0.7)
axes[0, 0].set_title("F1 Scores by Model")
axes[0,0].set_ylim(0.6, 1) 

axes[0, 1].bar(models, acc_list, color=colors, alpha=0.7)
axes[0, 1].set_title("Accuracy by Model")
axes[0, 1].set_ylim(0.6, 1) 


axes[1, 0].bar(models, prec_list, color=colors, alpha=0.7)
axes[1, 0].set_title("Precision by Model")
axes[1, 0].set_ylim(0.6, 1) 


axes[1, 1].bar(models, recall_list, color=colors, alpha=0.7)
axes[1, 1].set_title("Recall by Model")
axes[1, 1].set_ylim(0.6, 1) 




# Adjust layout to avoid overlap
plt.tight_layout()

# Show the figure
plt.show()
