# Symbol Detection

dataset: https://www.kaggle.com/datasets/xainano/handwrittenmathsymbols

sample model: https://github.com/OdyAsh/Handwritten-M2L/blob/main/model.ipynb

In [None]:
import os

import cv2
import pickle
import numpy as np
from functools import cmp_to_key

from keras.src.layers import BatchNormalization
from keras.src.ops import shape
from sklearn.model_selection import train_test_split
import tensorflow as tf
import keras
from keras import layers
from keras import ops   
import matplotlib.pyplot as plt

In [3]:
dic = {
    "-": r"-",
    "(": r"(",
    ")": r")",
    "+": r"+",
    "=": r"=",
    "0": r"0",
    "1": r"1",
    "2": r"2",
    "3": r"3",
    "4": r"4",
    "5": r"5",
    "6": r"6",
    "7": r"7",
    "8": r"8",
    "9": r"9",
    "geq": r"\geq",
    "gt": r">",
    "i": r"i",
    "in": r"\in",
    "int": r"\int",
    "j": r"j",
    "leq": r"\le",
    "lt": r"<",
    "neq": r"\neq",
    "pi": r"\Pi",
    "sum": r"\sum",
    "theta": r"\theta",
    "times": r"\times",
    "w": r"w",
    "X": r"\X",
    "y": r"y",
    "z": r"z"}


In [108]:
class_size = 1000

def loadData(DataDir):
  imgs = []
  labels = []
  
  
  for k, v in dic.items():
    path = os.path.join(DataDir, k)
    c = 0 
    for imgName in os.listdir(path):
      try:
        img = cv2.imread(os.path.join(path, imgName), cv2.COLOR_BGR2GRAY)
        imgs.append(img)
        labels.append(v)
        
        c+=1
        if c>class_size:
            break

      except Exception as e:
        print(e)

  return imgs, labels


imgs, labels = loadData(r"C:\Users\Benjamin\Projects\NEAgit\HandMtoLat\handwrittenSymbolDetection.ipynb")



In [20]:

with open("x_symbols.pickle", 'wb') as f:
   pickle.dump(imgs, f)
with open("y_latex.pickle", 'wb') as f:
   pickle.dump(labels, f)

In [4]:

# Open the file in binary read mode
with open('x_symbols.pickle', 'rb') as file:
    # Deserialize the dataset object from the file
    imgs = pickle.load(file)
with open('y_latex.pickle', 'rb') as file:
    labels = pickle.load(file)

In [5]:
latexToNums = {k: v for v, k in enumerate(np.unique(labels))}
numsToLatex = {v: k for v, k in enumerate(np.unique(labels))}

In [6]:
x_train, x_test, y_train, y_test = train_test_split(imgs, labels, test_size = 0.33, stratify=labels, random_state=42)

In [8]:

input_shape = (45, 45, 1) 
num_classes = 32 

x_train, x_test, y_train, y_test = train_test_split(imgs, labels, test_size=0.33, stratify=labels, random_state=42)


y_train_nums = [latexToNums[latex] for latex in y_train]
y_test_nums = [latexToNums[latex] for latex in y_test]

# Convert data into np arrays
x_train = np.array(x_train)
x_test = np.array(x_test)

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255

# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)

print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")


# convert class vectors to binary class matrices
y_train_nums = [latexToNums[latex] for latex in y_train]
y_test_nums = [latexToNums[latex] for latex in y_test]

y_train_cat = keras.utils.to_categorical(y_train_nums, num_classes)
y_test_cat = keras.utils.to_categorical(y_test_nums, num_classes)

print("y_train_cat shape:", y_train_cat.shape)

x_train shape: (174850, 45, 45, 1)
174850 train samples
86121 test samples
y_train_cat shape: (174850, 32)


In [9]:
type(x_train), type(x_test), type(y_train_nums), type(y_test_nums)

(numpy.ndarray, numpy.ndarray, list, list)

In [10]:
model = keras.Sequential(
    
    [
        keras.Input(shape=input_shape),
        layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation="softmax"),
    ]
)


In [None]:
model.summary()

In [None]:
plt.imshow(x_test[0])
print(y_test[0])

print(model.predict(x_test[0].expandd))

In [None]:
batch_size = 128
epochs =15 

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
history = model.fit(x_train, y_train_cat, batch_size=batch_size, epochs=epochs, validation_split=0.1)

print(history.history.keys())

In [None]:

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
from sklearn.metrics import accuracy_score
import random

# score = model.evaluate(x_test, y_test_cat, verbose=0)
# print("Test loss:", score[0])
# print("Test accuracy:", score[1])

#testing match between data and labels
plt.figure(figsize=(10, 10))

n=100
plt.figure(figsize=(10, 10))
random_int = random.randint(1, 50000)

for i in range(0, n):
    ax = plt.subplot(10, 10, i + 1)
    
    y_pred = model.predict(tf.expand_dims(x_test[i], axis=0))
    plt.imshow(x_test[i])
    plt.title(numsToLatex[y_pred.argmax(axis=-1)[0]])
    plt.axis('off')
    




In [36]:
model.save(r'C:\Users\Benjamin\Projects\NEA\models.keras')