# Imports

In [2]:
import os
import xml.etree.ElementTree as ET
import datetime
import string

In [3]:
import tensorflow as tf

device_name = tf.test.gpu_device_name()

if device_name != "/device:GPU:0":
    raise SystemError("GPU device not found")

print("Found GPU at: {}".format(device_name))

2025-06-18 14:44:43.432673: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1750250683.448462   14587 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1750250683.453529   14587 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1750250683.466383   14587 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1750250683.466400   14587 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1750250683.466401   14587 computation_placer.cc:177] computation placer alr

Found GPU at: /device:GPU:0


I0000 00:00:1750250686.142106   14587 gpu_device.cc:2019] Created device /device:GPU:0 with 9711 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3060, pci bus id: 0000:05:00.0, compute capability: 8.6


# Training Setup

## Environment

### Data source and training parameters

In [12]:
SOURCE = "mine_logs_v1"
'''Name of the dataset (used to build file paths for data input/output)'''

ARCHITECTURE = "flor"       
'''Model architecture to use (e.g., 'flor' for a specific network design)'''

EPOCHS = 1000       
'''Number of times the entire training dataset will be passed through the model'''

BATCH_SIZE = 16     
'''Number of samples processed before the model is updated.

A larger batch size typically increases memory usage (since more data is loaded into memory at once) but can speed up training per epoch due to more efficient computation on modern hardware. However, very large batch sizes may require more GPU/CPU memory than available and can sometimes negatively impact model generalization. Smaller batch sizes use less memory and may generalize better, but training can be slower per epoch due to less efficient hardware utilization.
'''

'Number of samples processed before the model is updated.\n\nA larger batch size typically increases memory usage (since more data is loaded into memory at once) but can speed up training per epoch due to more efficient computation on modern hardware. However, very large batch sizes may require more GPU/CPU memory than available and can sometimes negatively impact model generalization. Smaller batch sizes use less memory and may generalize better, but training can be slower per epoch due to less efficient hardware utilization.\n'

### Data paths and charset

In [None]:
SOURCE_PATH = "data/" + f"{SOURCE}.hdf5"
'''Path to the input HDF5 dataset file'''

OUTPUT_PATH = "output/" + f"{SOURCE}/" + f"{ARCHITECTURE}/"
'''Directory for model outputs (checkpoints, logs, etc.)'''

CHECKPOINT_PATH = OUTPUT_PATH + "checkpoint_weights.weights.h5"
'''File path for saving/loading model checkpoint weights'''

INPUT_SIZE = (1024, 128, 1)
'''Model input image size (height, width, channels)'''

MAX_TEXT_LENGTH = 128
'''Maximum number of characters per text line'''

charset_base = string.printable[:95]
'''Set of valid characters for text recognition (base charset plus special characters)'''
charset_base = charset_base + "ČčĆćĐđŠšŽž"

os.makedirs(OUTPUT_PATH, exist_ok=True)
# Create a directory for the dataset if it doesn't exist

print("source:", SOURCE_PATH)
print("output:", OUTPUT_PATH)
print("checkpoint:", CHECKPOINT_PATH)
print("charset:", charset_base)

source: data/mine_logs_v1.hdf5
output output/mine_logs_v1/flor/
target output/mine_logs_v1/flor/checkpoint_weights.weights.h5
charset: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ ČčĆćĐđŠšŽž


### 3.2 DataGenerator Class

In [None]:
SRC_DIR_PATH = '/mnt/c/AB_data_haris/vi-htr/src/'
# Add the source directory to the Python path to import local modules
# THIS IS REQUIRED!

import sys
if SRC_DIR_PATH not in sys.path:
    sys.path.append(SRC_DIR_PATH)

# Verify the path was added (optional)
print(sys.path)

from data.generator import DataGenerator

dtgen = DataGenerator(source=SOURCE_PATH,
                      batch_size=BATCH_SIZE,
                      charset=charset_base,
                      max_text_length=MAX_TEXT_LENGTH)

print(f"Train images: {dtgen.size['train']}")
print(f"Validation images: {dtgen.size['valid']}")
print(f"Test images: {dtgen.size['test']}")

['/home/haris/miniconda3/envs/TF-Py/lib/python312.zip', '/home/haris/miniconda3/envs/TF-Py/lib/python3.12', '/home/haris/miniconda3/envs/TF-Py/lib/python3.12/lib-dynload', '', '/home/haris/miniconda3/envs/TF-Py/lib/python3.12/site-packages', '/mnt/c/AB_data_haris/vi-htr/src/']
Train images: 765
Validation images: 219
Test images: 110


### 3.3 HTRModel Class

In [8]:
print(tf.__version__)

2.19.0


In [9]:
from network.model import HTRModel

# create and compile HTRModel
model = HTRModel(architecture=ARCHITECTURE,
                 input_size=INPUT_SIZE,
                 vocab_size=dtgen.tokenizer.vocab_size,
                 beam_width=10,
                 stop_tolerance=20,
                 reduce_tolerance=15,
                 reduce_factor=0.1)

model.compile(learning_rate=0.001)
model.summary(OUTPUT_PATH, "summary.txt")

# get default callbacks and load checkpoint weights file (HDF5) if exists
model.load_checkpoint(target=CHECKPOINT_PATH)

callbacks = model.get_callbacks(logdir=OUTPUT_PATH, checkpoint=CHECKPOINT_PATH, verbose=1)

I0000 00:00:1750250687.552390   14587 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 9711 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3060, pci bus id: 0000:05:00.0, compute capability: 8.6


KeyboardInterrupt: 

## 4 Training

In [None]:
# to calculate total and average time per epoch
start_time = datetime.datetime.now()

h = model.fit(x=dtgen.next_train_batch(),
              epochs=EPOCHS,
              steps_per_epoch=dtgen.steps['train'],
              validation_data=dtgen.next_valid_batch(),
              validation_steps=dtgen.steps['valid'],
              callbacks=callbacks,
              shuffle=True,
              verbose=1)

total_time = datetime.datetime.now() - start_time

loss = h.history['loss']
val_loss = h.history['val_loss']

min_val_loss = min(val_loss)
min_val_loss_i = val_loss.index(min_val_loss)

time_epoch = (total_time / len(loss))
total_item = (dtgen.size['train'] + dtgen.size['valid'])

t_corpus = "\n".join([
    f"Total train images:      {dtgen.size['train']}",
    f"Total validation images: {dtgen.size['valid']}",
    f"Batch:                   {dtgen.batch_size}\n",
    f"Total time:              {total_time}",
    f"Time per epoch:          {time_epoch}",
    f"Time per item:           {time_epoch / total_item}\n",
    f"Total epochs:            {len(loss)}",
    f"Best epoch               {min_val_loss_i + 1}\n",
    f"Training loss:           {loss[min_val_loss_i]:.8f}",
    f"Validation loss:         {min_val_loss:.8f}"
])

with open(os.path.join(OUTPUT_PATH, "train.txt"), "w") as lg:
    lg.write(t_corpus)
    print(t_corpus)

Epoch 1/100


E0000 00:00:1750250166.107443   13845 meta_optimizer.cc:967] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inStatefulPartitionedCall/functional_1/dropout_1/stateless_dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer
I0000 00:00:1750250166.956665   14283 cuda_dnn.cc:529] Loaded cuDNN version 90300
