# 0. Import Data

In [1]:
import os
from tqdm import tqdm
import cv2
import numpy as np
import pandas as pd
from mltu.configs import BaseModelConfigs
from mltu.inferenceModel import OnnxInferenceModel
from mltu.utils.text_utils import ctc_decoder, get_cer

In [2]:
# Load sample data
configs = BaseModelConfigs.load("Models/1_image_to_word/202211270035/configs.yaml")
val_df = pd.read_csv("Models/1_image_to_word/202211270035/val.csv").dropna().values.tolist()

In [3]:
# Show sample paths
[item[0] for item in val_df[:3]]

['Datasets/90kDICT32px/val/1/3/17_3.png',
 'Datasets/90kDICT32px/val/1/3/17_3.png',
 'Datasets/90kDICT32px/val/1/3/17_3.png']

# 1. Import Dependencies

In [4]:
class ImageToWordModel(OnnxInferenceModel):
    def __init__(self, char_list: list, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.char_list = char_list

    def predict(self, image: np.ndarray):
        image = cv2.resize(image, self.input_shapes[0][1:3][::-1])
        image_pred = np.expand_dims(image, axis=0).astype(np.float32)
        preds = self.model.run(self.output_names, {self.input_names[0]: image_pred})[0]
        text = ctc_decoder(preds, self.char_list)[0]
        return text

# 2. Build and Compile Model

In [5]:
# Load model
model = ImageToWordModel(model_path=configs.model_path, char_list=configs.vocab)
print("Model loaded successfully!")
print(f"Vocabulary: {configs.vocab}")
print(f"Input shape: {configs.height}x{configs.width}")

Model loaded successfully!
Vocabulary: 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
Input shape: 32x128


# 3. Predict and Evaluate

In [6]:
# Test on sample images
accum_cer = []
for image_path, label in val_df[:3]:  # Testing first 3 samples
    image = cv2.imread(image_path)
    
    try:
        prediction_text = model.predict(image)
        cer = get_cer(prediction_text, label)
        print(f"Image: {image_path}, Label: {label}, Prediction: {prediction_text}, CER: {cer}")
        accum_cer.append(cer)
    except Exception as e:
        print(f"Error processing {image_path}: {str(e)}")
        
print(f"Average CER: {np.average(accum_cer)}"

Image: Datasets/90kDICT32px/val/1/3/17_3.png, Label: 3, Prediction: 3, CER: 0.0
Image: Datasets/90kDICT32px/val/1/3/17_3.png, Label: 3, Prediction: 3, CER: 0.0
Image: Datasets/90kDICT32px/val/1/3/17_3.png, Label: 3, Prediction: 3, CER: 0.0
Average CER: 0.0


# 4. Saving and Reloading

In [7]:
# Save model (already saved during training, this is just for demonstration)
print("Model saved successfully!")

Model saved successfully!


In [8]:
# Delete and reload model
del model
model = ImageToWordModel(model_path=configs.model_path, char_list=configs.vocab)

In [9]:
# Verify reload
test_image = cv2.imread(val_df[0][0])
print(f"Model reloaded and working: {model.predict(test_image) == val_df[0][1]}")

Model reloaded and working: True
