# Testing All Models
In this notebook we will test all machine learning and deep learning models.

In [1]:
from testing_models import evaluate_model, test_model_nn
from preprocess import get_data
from classifiers import *
from nn import *
from torch import *

## Data
We used data from the Alpha_Num dataset on kaggle. It contains over 108000 images of handwritten characters. Each image is approximately 28x28 pixels and is in gray-scale. However, before we can train the models we preprocessed them to ensure each image had the same features:
* 28x28 pixels: any image less than 28x28 was padded
* Gray-Scale

In [2]:
X_train, y_train = get_data("train", "ascii_file_counts.csv")
X_test, y_test = get_data("test", "ascii_file_counts.csv")

## Machine Learning Models
In this section we will run and test the following models:
* XGBoost
* Random Forest
* Perceptron
* Support Vector Machine
* K-Nearest Neighbors

For more information please refer to the **classifiers.py** 

In [None]:
# XGboost train
estimators = range(20,100, 10)
depths = range(2,20,2)

f1_list_estimators = []
acc_list_estimators = []
prec_list_estimators =[]
recall_list_estimators = []

for estimator_count in estimators:
   model = train_xgboost(X_train, y_train, n_estimators = estimator_count, max_depth = 10)
   y_pred = model.predict(X_test)
   f1, acc, cm, prec, recall = evaluate_model(model, y_test, y_pred)
   f1_list_estimators.append(f1)
   acc_list_estimators.append(acc)
   prec_list_estimators.append(prec)
   recall_list_estimators.append(recall)

f1_list_depth = []
acc_list_depth = []
prec_list_depth =[]
recall_list_depth = []

for depth in depths:
   model = train_xgboost(X_train, y_train, n_estimators =35, max_depth = depth)
   y_pred = model.predict(X_test)
   f1, acc, cm, prec, recall = evaluate_model(model, y_test, y_pred)
   f1_list_depth.append(f1)
   acc_list_depth.append(acc)
   prec_list_depth.append(prec)
   recall_list_depth.append(recall)


In [None]:
# Create Plots
XGBoost_Estimators_Plots(estimators, f1_list_estimators, acc_list_estimators, prec_list_estimators, recall_list_estimators)
XGBoost_Depth_Plots(depth, f1_list_depth, acc_list_depth, prec_list_depth, recall_list_depth)
  

In [None]:
# Random Forest Train
estimators = range(20,100, 10)

f1_list_estimators = []
acc_list_estimators = []
prec_list_estimators =[]
recall_list_estimators = []

for estimator_count in estimators:
   model = train_random_forest(X_train, y_train, n_estimators = estimator_count)
   y_pred = model.predict(X_test)
   f1, acc, cm, prec, recall = evaluate_model(model, y_test, y_pred)
   f1_list_estimators.append(f1)
   acc_list_estimators.append(acc)
   prec_list_estimators.append(prec)
   recall_list_estimators.append(recall)

In [None]:
RandomForest_plots(depth, f1_list_estimators, acc_list_estimators, prec_list_estimators, recall_list_estimators)
   

In [None]:
#KNN Forest Train
neighbors = range(5, 100, 5)

f1_list_neighbors = []
acc_list_neighbors = []
prec_list_neighbors =[]
recall_list_neighbors = []

for neighbor in neighbors:
   model = train_knn(X_train, y_train, n_neighbors=neighbor)
   y_pred = model.predict(X_test)
   f1, acc, cm, prec, recall = evaluate_model(model, y_test, y_pred)
   f1_list_neighbors.append(f1)
   acc_list_neighbors.append(acc)
   prec_list_neighbors.append(prec)
   recall_list_neighbors.append(recall)

In [None]:
KNN_Plots(depth, f1_list_neighbors, acc_list_neighbors, prec_list_neighbors, recall_list_neighbors)  

In [None]:
#SVM Train
kernels = ['poly', 'rbf', 'linear', 'sigmoid', 'precomputed']

f1_list_kernel = []
acc_list_kernel = []
prec_list_kernel =[]
recall_list_kernel = []

for kernel in kernels:
   model = train_knn(X_train, y_train, kernel = kernel)
   y_pred = model.predict(X_test)
   f1, acc, cm, prec, recall = evaluate_model(model, y_test, y_pred)
   f1_list_kernel.append(f1)
   acc_list_kernel.append(acc)
   prec_list_kernel.append(prec)
   recall_list_kernel.append(recall)


In [None]:
SVM_plot(kernels, f1_list_kernel, acc_list_kernel, prec_list_kernel, recall_list_kernel)

In [None]:
f1_list = []
acc_list = []
prec_list = []
recall_list = []

In [3]:
test = torch.load("FeedForward.pth")
dataset_test = AlphaNumDataset(csv_dir="ascii_file_counts.csv", data_dir="test")
data_loader_test = DataLoader(dataset_test, batch_size=64, shuffle=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.CrossEntropyLoss()  
f1,acc,prec,recall =test_model_nn(test,data_loader_test, device, criterion)

  test = torch.load("FeedForward.pth")


(0.538627155399579, 0.5388830056632442, array([[  29,    0,    0, ...,    0,    1,    0],
       [   0,    6,    0, ...,    0,    0,    0],
       [   0,    0,   14, ...,    0,    0,    0],
       ...,
       [   0,    0,    0, ...,    0,    3,    0],
       [   0,    0,    0, ...,    0,    0, 1004],
       [   0,    0,    0, ...,    0,    0,    0]]), 0.5447318539286866, 0.5388830056632442)


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
