In [1]:
#using h5py to unpack the archived file which holds the information for our images and labels
import numpy as np
import h5py

In [2]:
#let's extract our training, test and validation data
#method seen in [1] p21
with h5py.File('../HDF5/hdf5_uTHCD_compressed.h5', 'r') as hdf:
    base_items = list(hdf.items())
    print(f'Items in base dir: {base_items}')
    G1 = hdf.get('Train Data')
    G1_items = list(G1.items())
    print(f'items in group 1: {G1_items}')
    
    G2 = hdf.get('Test Data')
    G2_items = list(G2.items())
    print(f'items in group 2: {G2_items}')
    
    x_train = np.array(G1.get('x_train'))
    y_train = np.array(G1.get('y_train'))
    
    x_test = np.array(G2.get('x_test'))
    y_test = np.array(G2.get('y_test'))
    
    x_val = x_train[-7870:,:,:]
    y_val = y_train[-7870:]
    
    x_train = x_train[:-7870,:,:]
    y_train = y_train[:-7870]
    
x_train.shape

Items in base dir: [('Test Data', <HDF5 group "/Test Data" (2 members)>), ('Train Data', <HDF5 group "/Train Data" (2 members)>)]
items in group 1: [('x_train', <HDF5 dataset "x_train": shape (62870, 64, 64), type "|u1">), ('y_train', <HDF5 dataset "y_train": shape (62870,), type "<i8">)]
items in group 2: [('x_test', <HDF5 dataset "x_test": shape (28080, 64, 64), type "|u1">), ('y_test', <HDF5 dataset "y_test": shape (28080,), type "<i8">)]


(55000, 64, 64)

Now that we have the data, we actually need to use it.

In [21]:
import scipy
import pandas as pd
from sklearn import svm

#let's try an SVC
#It's important to note here we are using very basic parameters
svc = svm.SVC(gamma=0.001,kernel='poly', C = 100)

#reshaping the data to work for SVC while maintaining images. Likely poor optimization
x_train_svc = x_train.reshape(55000,-1)
x_test_svc = x_test.reshape(28080, -1)
x_val_svc = x_val.reshape(7870,-1)
x_val.shape

(7870, 64, 64)

In [37]:
#we're gonna train on only 1000 samples until we figure out complexity issues
SAMPLE_SIZE = 2000
svc.fit(x_train_svc[:SAMPLE_SIZE],y_train[:SAMPLE_SIZE])

In [38]:
predictions = svc.predict(x_test_svc[:SAMPLE_SIZE])
(predictions, y_test[:SAMPLE_SIZE])

correct = 0
incorrect = 0
for pred, expect in zip(predictions,y_test[:SAMPLE_SIZE]):
    if pred == expect:
        correct += 1
    else:
        incorrect += 1


#some analysis of our model's performance with the linear kernel
#accuracy:
correct/SAMPLE_SIZE

0.5395

In [24]:
#now let's see how it does on the validation data
predictions = svc.predict(x_val_svc[:SAMPLE_SIZE])
correct = 0
incorrect = 0
for pred, expect in zip(predictions,y_val[:SAMPLE_SIZE]):
    if pred == expect:
        correct += 1
    else:
        incorrect += 1


correct/SAMPLE_SIZE

0.52

Likely slightly more accurate due to the validation data being closer in size to SAMPLE_SIZE than testing data

In [26]:
import matplotlib as plt
import tensorflow as tf
from tensorflow.keras import layers, models
from keras import backend as K

2023-11-26 13:46:06.346846: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-26 13:46:06.346883: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-26 13:46:06.348844: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-11-26 13:46:06.519770: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [27]:
#helper functions to compute f1 score later
#taken from [3]
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))


In [28]:
#let's examine a couple examples of the training data:


#set up and build the initial model of 2D convolutional layers and MaxPooling
model = models.Sequential()
model.add(layers.Conv2D(32, (3,3), activation='relu', input_shape=(64,64,1)))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(32, (3,3), activation='relu'))
model.add(layers.MaxPooling2D(2,2))


#will now flatten conv layers and add 3 dense layers of size 1024 and 512
model.add(layers.Flatten())
model.add(layers.Dense(1024, activation='relu'))
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(156)) #156 distinct char classes per [1] p.3
model.summary() #to output a depiction of the network

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 62, 62, 32)        320       
                                                                 
 max_pooling2d (MaxPooling2  (None, 31, 31, 32)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 29, 29, 32)        9248      
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 14, 14, 32)        0         
 g2D)                                                            
                                                                 
 flatten (Flatten)           (None, 6272)              0         
                                                                 
 dense (Dense)               (None, 1024)              6

2023-11-26 13:46:16.832995: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 25690112 exceeds 10% of free system memory.
2023-11-26 13:46:16.851592: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 25690112 exceeds 10% of free system memory.
2023-11-26 13:46:16.867894: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 25690112 exceeds 10% of free system memory.


In [29]:
#compile the model using SparseCategoricalCrossentropy (see [2]) 
#here we only use 5 epochs to train
model.compile(optimizer='adam',loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),metrics=['accuracy',f1_m,precision_m,recall_m]) 
res = model.fit(x_train,y_train, epochs=5, validation_data=(x_test,y_test)) #fit the training and test data to the model

2023-11-26 13:46:20.402832: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 225280000 exceeds 10% of free system memory.


Epoch 1/5


2023-11-26 13:46:21.228218: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 25690112 exceeds 10% of free system memory.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [39]:
#compile the model using SparseCategoricalCrossentropy (see [2]) 
#here we only use 5 epochs to train
model.compile(optimizer='adam',loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),metrics=['accuracy',f1_m,precision_m,recall_m]) 
res = model.fit(x_train,y_train, epochs=5, validation_data=(x_test,y_test)) #fit the training and test data to the model

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [40]:
test_loss, test_acc, test_f1, test_precision, test_recall = model.evaluate(x_test, y_test)
print(f'loss: {test_loss}\nacc: {test_acc}\nf1: {test_f1}\nprecision: {test_precision}\nrecall: {test_recall}')

loss: 0.5261752605438232
acc: 0.8787037134170532
f1: 1.173245906829834
precision: 1.0749523639678955
recall: 1.2981841564178467


In [31]:
val_loss, val_acc, val_f1, val_precision, val_recall = model.evaluate(x_val,y_val)
print(f'loss: {val_loss}\nacc: {val_acc}\nf1: {val_f1}\nprecision: {val_precision}\nrecall: {val_recall}')

loss: 0.5510273575782776
acc: 0.8804320096969604
f1: 1.8421167135238647
precision: 1.1184182167053223
recall: 5.270786762237549


In [33]:
#let's try another NN to compare
#
model = models.Sequential()
model.add(layers.Conv2D(32, (3,3), activation='relu', input_shape=(64,64,1)))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(32, (3,3), activation='relu'))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(32, (3,3), activation='relu'))
model.add(layers.MaxPooling2D(2,2))

#will now flatten conv layers and add 3 dense layers of size 1024 and 512 and 256
model.add(layers.Flatten())
model.add(layers.Dense(1024, activation='relu'))
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(156)) #156 distinct char classes per [1] p.3
model.summary() #to output a depiction of the network

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_4 (Conv2D)           (None, 62, 62, 32)        320       
                                                                 
 max_pooling2d_4 (MaxPoolin  (None, 31, 31, 32)        0         
 g2D)                                                            
                                                                 
 conv2d_5 (Conv2D)           (None, 29, 29, 32)        9248      
                                                                 
 max_pooling2d_5 (MaxPoolin  (None, 14, 14, 32)        0         
 g2D)                                                            
                                                                 
 conv2d_6 (Conv2D)           (None, 12, 12, 32)        9248      
                                                                 
 max_pooling2d_6 (MaxPoolin  (None, 6, 6, 32)         

In [34]:
#compile the model using SparseCategoricalCrossentropy (see [2]) 
#here we only use 4 epochs to train
model.compile(optimizer='adam',loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),metrics=['accuracy',f1_m,precision_m,recall_m]) 
res = model.fit(x_train,y_train, epochs=5, validation_data=(x_test,y_test)) #fit the training and test data to the model

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [35]:
test_loss, test_acc, test_f1, test_precision, test_recall = model.evaluate(x_test, y_test)
print(f'loss: {test_loss}\nacc: {test_acc}\nf1: {test_f1}\nprecision: {test_precision}\nrecall: {test_recall}')

loss: 0.4985743463039398
acc: 0.8694444298744202
f1: 1.7160624265670776
precision: 1.1377657651901245
recall: 3.519388437271118


In [36]:
val_loss, val_acc, val_f1, val_precision, val_recall = model.evaluate(x_val,y_val)
print(f'loss: {val_loss}\nacc: {val_acc}\nf1: {val_f1}\nprecision: {val_precision}\nrecall: {val_recall}')

loss: 0.35860446095466614
acc: 0.911308765411377
f1: 1.6865177154541016
precision: 1.1287736892700195
recall: 3.3728694915771484


REFERENCES:<br>
[1]: N. Shaffi and F. Hajamohideen, "uTHCD: A New Benchmarking for Tamil Handwritten OCR," in IEEE Access, vol. 9, pp. 101469-101493, 2021, doi: 10.1109/ACCESS.2021.3096823.<br>
[2]: https://www.tensorflow.org/api_docs/python/tf/keras/losses/SparseCategoricalCrossentropy <br>
[3]: https://datascience.stackexchange.com/questions/45165/how-to-get-accuracy-f1-precision-and-recall-for-a-keras-model <br>
'