In [None]:
# HCR-Net (fine tuned network)
import numpy as np
import math
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

%matplotlib inline

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout, Flatten, Dense
from tensorflow.keras.layers import BatchNormalization as BatchNorm

# pre-trained models
from tensorflow.keras.applications.vgg16 import VGG16

# custom learning rates
#import learning_rate as lr

# for simple tasks
def decayed_learning_rate(step):
    if step <5:
        lr = 0.0001
    else:
        lr = 0.00005
    return lr

def decayed_learning_rate_tuned20(step):
    if step <5:
        lr = 0.0000001
    elif step <15:
        lr = 0.000005
    else:
        lr = 0.000001
    return lr

def decayed_learning_rate_tuned30(step):
    if step <5:
        lr = 0.0000001
    elif step <25:
        lr = 0.000005
    else:
        lr = 0.000001
    return lr

def decayed_learning_rate_tuned50(step):
    if step <5:
        lr = 0.0000001
    elif step <45:
        lr = 0.000005
    else:
        lr = 0.000001
    return lr

def decayed_learning_rate_tuned75(step):
    if step <5:
        lr = 0.0000001
    elif step <70:
        lr = 0.000005
    else:
        lr = 0.000001
    return lr

def decayed_learning_rate_tuned100(step):
    if step <5:
        lr = 0.0000001
    elif step <95:
        lr = 0.000005
    else:
        lr = 0.000001
    return lr

def decayed_learning_rate_tuned150(step):
    if step <5:
        lr = 0.0000001
    elif step <145:
        lr = 0.000005
    else:
        lr = 0.000001
    return lr

def decayed_learning_rate_fixed1(step):
    if step <4:
        lr = 0.0000001
    elif step <5:
        lr = 0.0000005
    else:
        lr = 0.0005
    return lr

def decayed_learning_rate_fixed2(step):
    if step <4:
        lr = 0.0000001
    elif step <5:
        lr = 0.0000005
    else:
        lr = 0.000001
    return lr

# set the seeds for reproduceability (which is not certain in case of
# of multiple CPUs and GPU).
np.random.seed(1)
tf.random.set_seed(1)

#from google.colab import drive


# Mount Google Drive
#drive.mount('/content/drive')

# change following for each dataset (dataset details and learning params)
# dataset details
train_data_dir = '/kaggle/input/bangla-lekha-isolated-dataset/BanglaLekha-Isolated/Images/'
#nb_train_samples = 15802
#nb_validation_samples = 3946
num_classes = 84

# learning params
epochs1 = 10
epochs2 = 20
learning_rate1 = decayed_learning_rate
learning_rate2 = decayed_learning_rate_tuned50

batch_size = 32
# dimensions of our images.
img_width, img_height = 32, 32
input_shape = (img_width, img_height, 3)

# using VGG16: load model without classifier layer, and using weights from ImageNet
source_model = VGG16(include_top=False, input_shape=input_shape)

# create model
model = Sequential()

# add lower layers of VGG16 to our model, and mark as not trainable
for layer in source_model.layers[:-6]:
    layer.trainable = False
    model.add(layer)

# add extra layers at top of lower layers of pre-trained model
model.add(BatchNorm())
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(BatchNorm())
model.add(Dropout(0.35))
model.add(Dense(512, activation='relu'))
model.add(BatchNorm())
model.add(Dropout(0.35))
model.add(Dense(num_classes, activation='softmax'))

# compile the model
model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.RMSprop(), metrics=['accuracy'])
print(model.summary())

# using ImageDataGenerator to load and scale data
train_datagen = ImageDataGenerator(
                    rescale=1.0/255,
                    rotation_range=10,
                    width_shift_range=0.05,
                    height_shift_range=0.05,
                    shear_range=0.1,
                    zoom_range=0.05,
                    fill_mode='constant',
                    cval = 0,
                    validation_split=0.2
                )
valid_datagen = ImageDataGenerator(rescale=1.0/255, validation_split=0.2)


train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    color_mode="rgb",
    class_mode='categorical',
    subset='training',
    shuffle=True,
    seed=13)
validation_generator = valid_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    color_mode="rgb",
    class_mode='categorical',
    subset='validation',
    shuffle=False,
    seed=13)

nb_train_samples = train_generator.samples
nb_validation_samples = validation_generator.samples

# define callback for custom learning rate
callback = tf.keras.callbacks.LearningRateScheduler(learning_rate1)

history1 = model.fit(
    train_generator,
    steps_per_epoch=math.ceil(nb_train_samples / batch_size),
    epochs=epochs1,
    callbacks=[callback],
    validation_data=validation_generator,
    validation_steps=math.ceil(nb_validation_samples / batch_size))


# fine tuning the model by unfreezing the lower layers (used from VGG16)
for layer in model.layers[:12]:
    layer.trainable = True

# reset the image generators
train_generator.reset()
validation_generator.reset()

# recompile the model
model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.RMSprop(), metrics=['accuracy'])
print(model.summary())

# define callback for custom learning rate
callback = tf.keras.callbacks.LearningRateScheduler(learning_rate2)

history2 = model.fit(
    train_generator,
    steps_per_epoch=math.ceil(nb_train_samples / batch_size),
    epochs=epochs2,
    callbacks=[callback],
    validation_data=validation_generator,
    validation_steps=math.ceil(nb_validation_samples / batch_size))

# save model
# model.save_weights('HCR-Net.h5')

# plot the convergence, in terms of accuracy and loss, of HCR-Net
plt.plot(history1.history['accuracy']+history2.history['accuracy'])
plt.plot(history1.history['val_accuracy']+history2.history['val_accuracy'])
# plt.title('model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epochs')
plt.legend(['Train', 'Test'], loc='lower right')
plt.show()

plt.plot(history1.history['loss']+history2.history['loss'])
plt.plot(history1.history['val_loss']+history2.history['val_loss'])
# plt.title('model loss')
plt.ylabel('Loss')
plt.xlabel('Epochs')
plt.legend(['Train', 'Test'], loc='upper right')
plt.show()

# evaluate model and plot confusion matrix
# filenames = validation_generator.filenames
predict = model.predict(validation_generator)
y_pred = np.argmax(predict,axis=-1)
cf_matrix = confusion_matrix(validation_generator.classes,y_pred)

# calculate size of confusion matrix figure, and plot it
fig, ax = plt.subplots(figsize=(math.ceil(num_classes/3.0), math.ceil(num_classes/3.0)))
labels = list(validation_generator.class_indices.keys())
sns.heatmap(cf_matrix, annot=True, xticklabels=labels, yticklabels=labels, fmt='', cmap='Blues')

2025-06-02 23:57:20.767711: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748908641.300768      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748908641.433095      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
I0000 00:00:1748908661.078481      35 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1748908661.079171      35 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability:

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


None
Found 132914 images belonging to 84 classes.
Found 33191 images belonging to 84 classes.


  self._warn_if_super_not_called()


Epoch 1/10


I0000 00:00:1748908761.722308     117 service.cc:148] XLA service 0x78551800f860 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1748908761.723718     117 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1748908761.723739     117 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1748908762.232165     117 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m   1/4154[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m8:53:20[0m 8s/step - accuracy: 0.0312 - loss: 5.5984

I0000 00:00:1748908766.752689     117 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m4154/4154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1012s[0m 242ms/step - accuracy: 0.5277 - loss: 1.9122 - val_accuracy: 0.8965 - val_loss: 0.3694 - learning_rate: 1.0000e-04
Epoch 2/10
[1m4154/4154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m501s[0m 121ms/step - accuracy: 0.7677 - loss: 0.8542 - val_accuracy: 0.9138 - val_loss: 0.3044 - learning_rate: 1.0000e-04
Epoch 3/10
[1m4154/4154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m402s[0m 97ms/step - accuracy: 0.7985 - loss: 0.7405 - val_accuracy: 0.9220 - val_loss: 0.2753 - learning_rate: 1.0000e-04
Epoch 4/10
[1m4154/4154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m429s[0m 103ms/step - accuracy: 0.8181 - loss: 0.6771 - val_accuracy: 0.9262 - val_loss: 0.2682 - learning_rate: 1.0000e-04
Epoch 5/10
[1m4154/4154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m425s[0m 102ms/step - accuracy: 0.8255 - loss: 0.6399 - val_accuracy: 0.9245 - val_loss: 0.2644 - learning_rate: 1.0000e-04
Epoch 6/10
[1m4154/4154[0m [

None
Epoch 1/20
[1m4154/4154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m395s[0m 93ms/step - accuracy: 0.8675 - loss: 0.4869 - val_accuracy: 0.9367 - val_loss: 0.2278 - learning_rate: 1.0000e-07
Epoch 2/20
[1m4154/4154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m417s[0m 100ms/step - accuracy: 0.8688 - loss: 0.4816 - val_accuracy: 0.9375 - val_loss: 0.2251 - learning_rate: 1.0000e-07
Epoch 3/20
[1m4154/4154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m438s[0m 106ms/step - accuracy: 0.8713 - loss: 0.4813 - val_accuracy: 0.9381 - val_loss: 0.2234 - learning_rate: 1.0000e-07
Epoch 4/20
[1m4154/4154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m462s[0m 111ms/step - accuracy: 0.8700 - loss: 0.4793 - val_accuracy: 0.9388 - val_loss: 0.2218 - learning_rate: 1.0000e-07
Epoch 5/20
[1m4154/4154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m515s[0m 124ms/step - accuracy: 0.8717 - loss: 0.4718 - val_accuracy: 0.9390 - val_loss: 0.2216 - learning_rate: 1.0000e-07
Epoch 6/20
[1m4