In [1]:
import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models

csv_filepath = f'C:\\New Folder (2)\\Handwritting\\hasy-data-labels.csv' 
data = pd.read_csv(csv_filepath)

print(data.head())


                     path  symbol_id latex  user_id
0  hasy-data/v2-00000.png         31     A       50
1  hasy-data/v2-00001.png         31     A       10
2  hasy-data/v2-00002.png         31     A       43
3  hasy-data/v2-00003.png         31     A       43
4  hasy-data/v2-00004.png         31     A     4435


In [8]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator

IMG_HEIGHT, IMG_WIDTH = 32, 32  
BATCH_SIZE = 50

train_data, val_data = train_test_split(data, test_size=0.2, random_state=42)

train_datagen = ImageDataGenerator(rescale=1.0/255.0)
val_datagen = ImageDataGenerator(rescale=1.0/255.0)

train_data['path'] = train_data['path'].apply(lambda x: os.path.basename(x))
val_data['path'] = val_data['path'].apply(lambda x: os.path.basename(x))

train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_data,
    directory=r'C:\New Folder (2)\Handwritting\hasy-data',  
    x_col='path',        
    y_col='latex',               
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    class_mode='categorical',     
    batch_size=BATCH_SIZE
)


validation_generator = val_datagen.flow_from_dataframe(
    dataframe=val_data,
    directory=r'C:\New Folder (2)\Handwritting\hasy-data',
    x_col='path',  
    y_col='latex',   
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    class_mode='categorical',  
    batch_size=BATCH_SIZE
)


Found 134586 validated image filenames belonging to 369 classes.
Found 33647 validated image filenames belonging to 369 classes.


In [9]:
num_classes = len(train_data['latex'].unique()) 
num_classes

369

In [10]:
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(num_classes, activation='softmax')
])


In [11]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [12]:
history = model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=10, 
    steps_per_epoch=len(train_generator),
    validation_steps=len(validation_generator)
)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [13]:
model.evaluate(validation_generator)



[0.7826281785964966, 0.7760275602340698]

In [14]:
model.save('hasyv2_model.h5')

  saving_api.save_model(


In [15]:
# Load the model
loaded_model = tf.keras.models.load_model('hasyv2_model.h5')

# Make predictions
predictions = loaded_model.predict(validation_generator)




In [16]:
predictions

array([[2.5176048e-23, 1.3198396e-23, 2.0305224e-22, ..., 6.6648544e-14,
        7.4094546e-15, 4.0830466e-16],
       [4.1761381e-14, 2.0787205e-19, 2.5551003e-06, ..., 5.1930581e-08,
        3.0672573e-10, 5.4375491e-09],
       [6.2188144e-22, 7.5700849e-29, 7.9941614e-16, ..., 9.4362264e-22,
        9.6505393e-21, 2.5089376e-17],
       ...,
       [9.6079816e-18, 3.6499755e-20, 8.7088255e-22, ..., 3.9878859e-13,
        7.1496249e-22, 6.6561135e-19],
       [1.3976833e-14, 4.4807741e-18, 1.2212664e-10, ..., 7.5727754e-13,
        1.3721339e-16, 3.4224709e-07],
       [1.7871918e-13, 1.2674995e-17, 1.6522660e-11, ..., 2.0428878e-16,
        8.0224577e-11, 1.0949865e-15]], dtype=float32)