In [None]:
!pip install opencv-python

import tensorflow as tf
print(tf.config.list_physical_devices('GPU'))

In [2]:
import os
import glob
from google.colab import drive

# 1. Mount your Google Drive
drive.mount('/content/drive')

# 2. Define the path to your zip file on Drive
DRIVE_ZIP_PATH = "/content/drive/MyDrive/NUS/Y3S1/CS4243/Mini Project/OCRCaptcha/data.zip"
LOCAL_DATA_PATH = "/content/data/"

# 3. Copy the zip file from Drive to Colab's fast local disk
print("Copying data from Drive...")
!cp "{DRIVE_ZIP_PATH}" /content/

# 4. Unzip the data quietly
print("Unzipping data...")
!unzip -q /content/data.zip -d /content/

# 5. Set your global paths to use the NEW local directory
train_data_path = os.path.join(LOCAL_DATA_PATH, "main/train/")
test_data_path = os.path.join(LOCAL_DATA_PATH, "main/test/")
MODEL_PATH = 'models/best_crnn_model.keras'

# 6. Make a local 'models' folder
!mkdir -p models

print(f"Done! Training data is now at: {train_data_path}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Copying data from Drive...
Unzipping data...
replace /content/data/main/0024miih-0.png? [y]es, [n]o, [A]ll, [N]one, [r]ename: Done! Training data is now at: /content/data/main/train/


In [3]:
import random
import cv2
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from tensorflow.keras import backend as K

# === 1. LOAD CONSTANTS ===
# Load paths and labels
train_image_paths = sorted(glob.glob(os.path.join(train_data_path, "*.png")))
train_labels = [os.path.basename(path).split('-')[0] for path in train_image_paths]
test_paths = sorted(glob.glob(os.path.join(test_data_path, "*.png")))
test_labels = [os.path.basename(path).split('-')[0] for path in test_paths]

# Get vocab and max length
all_labels = train_labels + test_labels
all_characters = set(char for label in all_labels for char in label)
vocabulary = sorted(list(all_characters))
max_label_len = max(len(label) for label in all_labels)
IMG_HEIGHT = 50
IMG_WIDTH = 495

# Create mapping (Index 0 is for CTC 'blank' token)
char_to_num = {char: i + 1 for i, char in enumerate(vocabulary)}
num_to_char = {i + 1: char for i, char in enumerate(vocabulary)}

print(f"Vocab size: {len(vocabulary)}, Max length: {max_label_len}")


# === 2. PREPROCESSING FUNCTION (UPDATED with Denoising) ===
def prepare_image_for_crnn(image_path, img_width=IMG_WIDTH, img_height=IMG_HEIGHT):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        print(f"Warning: Could not read image {image_path}. Skipping.")
        return None

    # --- ADDED: Denoise BEFORE enhancing contrast ---
    img = cv2.medianBlur(img, 3)

    # 1. CLAHE
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    img = clahe.apply(img)

    # 2. Adaptive Thresholding
    img = cv2.adaptiveThreshold(
        img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY_INV, 15, 3
    )

    # 3. Morphological Opening
    kernel = np.ones((2, 2), np.uint8)
    img = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)

    # 4. Resize
    h, w = img.shape
    scale = img_height / h
    new_w = int(w * scale)
    if new_w > img_width:
        new_w = img_width
    img = cv2.resize(img, (new_w, img_height))

    # 5. Pad
    target = np.zeros((img_height, img_width), dtype=np.uint8)
    target[:, :new_w] = img

    # 6. Normalize
    img_float = target.astype(np.float32) / 255.0

    # 7. Transpose
    img_transposed = np.transpose(img_float, (1, 0))

    # 8. Add channel dimension
    img_final = np.expand_dims(img_transposed, axis=-1)

    return img_final


# === 3. CTC LOSS FUNCTION (FIXED) ===
def ctc_loss_function(y_true, y_pred):
    batch_size = tf.shape(y_true)[0]
    pred_length = tf.shape(y_pred)[1]

    # FIX: Count non-padding (non-zero) tokens
    label_lengths = tf.reduce_sum(tf.cast(y_true > 0, dtype="int32"), axis=1)

    input_lengths = tf.fill([batch_size], pred_length)

    # FIX: No need to clean, just cast. 0 is the blank index.
    y_true_clean = tf.cast(y_true, dtype="int32")

    loss = tf.nn.ctc_loss(
        labels=y_true_clean,
        logits=y_pred,
        label_length=label_lengths,
        logit_length=input_lengths,
        logits_time_major=False,
        blank_index=0
    )
    return tf.reduce_mean(loss)


# === 4. DECODE FUNCTION ===
def decode_batch_predictions(pred):
    input_len = np.ones(pred.shape[0]) * pred.shape[1]
    results = K.ctc_decode(pred, input_length=input_len, greedy=True)[0][0]

    output_text = []
    for result in results:
        result = result.numpy()
        text = ''.join([num_to_char.get(int(idx), '') for idx in result if int(idx) != 0])
        output_text.append(text)
    return output_text


# === 5. DATA GENERATOR (FIXED for Keras 3 - Augmentation Removed) ===
class CaptchaDataGenerator(keras.utils.Sequence):
    def __init__(self, image_paths, labels, char_to_num,
                 img_width, img_height, batch_size,
                 max_label_length, shuffle=True, **kwargs): # <-- REMOVED is_training

        super().__init__(**kwargs) # <-- Pass kwargs to super

        self.image_paths = image_paths
        self.labels = labels
        self.char_to_num = char_to_num
        self.img_width = img_width
        self.img_height = img_height
        self.batch_size = batch_size
        self.max_label_length = max_label_length
        self.shuffle = shuffle
        self.indexes = np.arange(len(self.image_paths))
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.image_paths) / self.batch_size))

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __getitem__(self, index):
        batch_indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
        batch_paths = [self.image_paths[k] for k in batch_indexes]
        batch_labels = [self.labels[k] for k in batch_indexes]
        X, y = self.__data_generation(batch_paths, batch_labels)
        return X, y

    def __data_generation(self, batch_paths, batch_labels):
        X = np.zeros((self.batch_size, self.img_width, self.img_height, 1), dtype=np.float32)
        y = np.zeros((self.batch_size, self.max_label_length), dtype=np.int32)

        for i, (img_path, label) in enumerate(zip(batch_paths, batch_labels)):
            img = prepare_image_for_crnn(img_path, self.img_width, self.img_height)
            if img is None:
                continue

            # --- AUGMENTATION REMOVED FROM HERE ---
            X[i] = img

            encoded_label = [self.char_to_num[char] for char in label]
            padded_label = encoded_label + [0] * (self.max_label_length - len(encoded_label))
            y[i] = padded_label[:self.max_label_length]

        return X, y

# === 6. MODEL CLASS (FIXED - Augmentation Added to Model) ===

# --- DATA AUGMENTATION (Moved from bottom) ---
# Define a small augmentation pipeline
data_augmentation = keras.Sequential([
    layers.RandomRotation(factor=0.02, fill_mode='constant', fill_value=0.0),
    layers.RandomTranslation(height_factor=0.05, width_factor=0.05, fill_mode='constant', fill_value=0.0),
    layers.RandomZoom(height_factor=0.05, width_factor=0.05, fill_mode='constant', fill_value=0.0)
], name="data_augmentation")


class CRNNCaptchaModel(keras.Model):
    def __init__(self, img_width, img_height, vocab_size):
        super().__init__()
        self.img_width = img_width
        self.img_height = img_height
        self.vocab_size = vocab_size
        self.build_model()

    def build_model(self):
        regularizer = keras.regularizers.l2(1e-5)

        input_img = layers.Input(
            shape=(self.img_width, self.img_height, 1),
            name='image', dtype='float32'
        )

        # --- AUGMENTATION ADDED HERE ---
        # Apply augmentation only during training
        x = data_augmentation(input_img)
        # The line above is a shortcut. A more robust way is:
        # x = keras.layers.Layer(lambda inputs, training=False: data_augmentation(inputs) if training else inputs)(input_img)
        # But for this model, applying it always is fine and simpler.
        # If val_loss is weird, we can switch to the more robust way.

        # --- Apply regularizer to Conv layers ---\
        x = layers.Conv2D(32, (3, 3), activation='relu', padding='same', name='conv1', kernel_regularizer=regularizer)(x)
        x = layers.MaxPooling2D((2, 2), name='pool1')(x)
        x = layers.Conv2D(64, (3, 3), activation='relu', padding='same', name='conv2', kernel_regularizer=regularizer)(x)
        x = layers.MaxPooling2D((2, 2), name='pool2')(x)
        x = layers.Conv2D(128, (3, 3), activation='relu', padding='same', name='conv3', kernel_regularizer=regularizer)(x)
        x = layers.BatchNormalization(name='bn1')(x)
        x = layers.MaxPooling2D((2, 1), name='pool3')(x)
        x = layers.Conv2D(128, (3, 3), activation='relu', padding='same', name='conv4', kernel_regularizer=regularizer)(x)
        x = layers.BatchNormalization(name='bn2')(x)
        x = layers.MaxPooling2D((2, 1), name='pool4')(x)
        x = layers.Conv2D(256, (3, 3), activation='relu', padding='same', name='conv5', kernel_regularizer=regularizer)(x)
        x = layers.BatchNormalization(name='bn3')(x)

        conv_output_shape = x.shape
        conv_output_width = conv_output_shape[1]
        new_features = conv_output_shape[2] * conv_output_shape[3]
        x = layers.Reshape(target_shape=(conv_output_width, new_features), name='reshape')(x)
        x = layers.Dropout(0.4, name='dropout_after_reshape')(x)
        x = layers.Bidirectional(
            layers.LSTM(128, return_sequences=True, dropout=0.25),
            name='bidirectional_lstm_1'
        )(x)
        x = layers.Bidirectional(
            layers.LSTM(64, return_sequences=True, dropout=0.25),
            name='bidirectional_lstm_2'
        )(x)
        output = layers.Dense(
            self.vocab_size + 1,
            activation=None,
            name='output',
            kernel_regularizer=regularizer
        )(x)

        self.model = keras.models.Model(inputs=input_img, outputs=output, name='CRNN_CAPTCHA')

    def get_model(self):
        return self.model

    def compile(self, optimizer, loss):
        self.model.compile(optimizer=optimizer, loss=loss)

    def train(self, train_generator, val_generator, epochs, callbacks):
        history = self.model.fit(
            train_generator,
            validation_data=val_generator,
            epochs=epochs,
            callbacks=callbacks,
            verbose=1
        )
        return history

# === 7. DATA AUGMENTATION ===
# This block is now defined UPSTREAM, before the model class.
# This cell will just be comments.
# data_augmentation = keras.Sequential([...])

Vocab size: 36, Max length: 8


In [4]:
# === SPLIT DATA ===
train_paths, val_paths, train_labels, val_labels = train_test_split(
    train_image_paths,
    train_labels,
    test_size=0.1,
    random_state=42
)

# === CREATE GENERATORS (UPDATED - is_training Removed) ===
train_generator = CaptchaDataGenerator(
    train_paths, train_labels, char_to_num,
    IMG_WIDTH, IMG_HEIGHT, batch_size=32,
    max_label_length=max_label_len, shuffle=True,
    use_multiprocessing=True,
    workers=8
)

val_generator = CaptchaDataGenerator(
    val_paths, val_labels, char_to_num,
    IMG_WIDTH, IMG_HEIGHT, batch_size=32,
    max_label_length=max_label_len, shuffle=False,
    use_multiprocessing=True,
    workers=8
)

# === BUILD & COMPILE MODEL ===
crnn_model = CRNNCaptchaModel(IMG_WIDTH, IMG_HEIGHT, len(vocabulary))
crnn_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss=ctc_loss_function
)
crnn_model.get_model().summary()

# === CALLBACKS ===
early_stopping = keras.callbacks.EarlyStopping(
    monitor='val_loss', patience=10, restore_best_weights=True, verbose=1
)
model_checkpoint = keras.callbacks.ModelCheckpoint(
    MODEL_PATH, monitor='val_loss', save_best_only=True, verbose=1
)
reduce_lr = keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6, verbose=1
)

# === START TRAINING (Original call) ===
print("\nSTARTING TRAINING...\n")
history = crnn_model.train(
    train_generator=train_generator,
    val_generator=val_generator,
    epochs=50,
    callbacks=[early_stopping, model_checkpoint, reduce_lr]
)
print("TRAINING COMPLETE!")


STARTING TRAINING...

Epoch 1/50
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 145ms/step - loss: 26.5583
Epoch 1: val_loss improved from inf to 27.82182, saving model to models/best_crnn_model.keras
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 166ms/step - loss: 26.5469 - val_loss: 27.8218 - learning_rate: 0.0010
Epoch 2/50
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 144ms/step - loss: 23.2798
Epoch 2: val_loss did not improve from 27.82182
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 165ms/step - loss: 23.2796 - val_loss: 58.4498 - learning_rate: 0.0010
Epoch 3/50
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 146ms/step - loss: 22.6191
Epoch 3: val_loss improved from 27.82182 to 21.01328, saving model to models/best_crnn_model.keras
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 161ms/step - loss: 22.6161 - val_loss: 21.0133 - learning_rate: 0.0010
Epoch 4/50

STARTING TRAINING...
Epoch 1/50
/usr/local/lib/python3.12/dist-packages/keras/src/trainers/data_adapters/py_dataset_adapter.py:121: UserWarning: Your `PyDataset` class should call `super().__init__(**kwargs)` in its constructor. `**kwargs` can include `workers`, `use_multiprocessing`, `max_queue_size`. Do not pass these arguments to `fit()`, as they will be ignored.
  self._warn_if_super_not_called()
225/225 ━━━━━━━━━━━━━━━━━━━━ 0s 131ms/step - loss: 26.0810
Epoch 1: val_loss improved from inf to 24.06589, saving model to models/best_crnn_model.keras
225/225 ━━━━━━━━━━━━━━━━━━━━ 46s 154ms/step - loss: 26.0710 - val_loss: 24.0659 - learning_rate: 0.0010
Epoch 2/50
225/225 ━━━━━━━━━━━━━━━━━━━━ 0s 132ms/step - loss: 23.0881
Epoch 2: val_loss improved from 24.06589 to 22.77093, saving model to models/best_crnn_model.keras
225/225 ━━━━━━━━━━━━━━━━━━━━ 33s 145ms/step - loss: 23.0873 - val_loss: 22.7709 - learning_rate: 0.0010
Epoch 3/50
225/225 ━━━━━━━━━━━━━━━━━━━━ 0s 131ms/step - loss: 21.5873
Epoch 3: val_loss improved from 22.77093 to 18.82375, saving model to models/best_crnn_model.keras
225/225 ━━━━━━━━━━━━━━━━━━━━ 33s 145ms/step - loss: 21.5822 - val_loss: 18.8238 - learning_rate: 0.0010
Epoch 4/50
225/225 ━━━━━━━━━━━━━━━━━━━━ 0s 133ms/step - loss: 16.1739
Epoch 4: val_loss improved from 18.82375 to 13.42686, saving model to models/best_crnn_model.keras
225/225 ━━━━━━━━━━━━━━━━━━━━ 32s 143ms/step - loss: 16.1688 - val_loss: 13.4269 - learning_rate: 0.0010
Epoch 5/50
225/225 ━━━━━━━━━━━━━━━━━━━━ 0s 132ms/step - loss: 12.1346
Epoch 5: val_loss improved from 13.42686 to 11.25575, saving model to models/best_crnn_model.keras
225/225 ━━━━━━━━━━━━━━━━━━━━ 41s 143ms/step - loss: 12.1327 - val_loss: 11.2558 - learning_rate: 0.0010
Epoch 6/50
225/225 ━━━━━━━━━━━━━━━━━━━━ 0s 132ms/step - loss: 10.2479
Epoch 6: val_loss improved from 11.25575 to 10.20178, saving model to models/best_crnn_model.keras
225/225 ━━━━━━━━━━━━━━━━━━━━ 32s 143ms/step - loss: 10.2473 - val_loss: 10.2018 - learning_rate: 0.0010
Epoch 7/50
225/225 ━━━━━━━━━━━━━━━━━━━━ 0s 137ms/step - loss: 9.0712
Epoch 7: val_loss did not improve from 10.20178
225/225 ━━━━━━━━━━━━━━━━━━━━ 33s 148ms/step - loss: 9.0713 - val_loss: 10.2769 - learning_rate: 0.0010
Epoch 8/50
225/225 ━━━━━━━━━━━━━━━━━━━━ 0s 132ms/step - loss: 8.4002
Epoch 8: val_loss improved from 10.20178 to 9.60412, saving model to models/best_crnn_model.keras
225/225 ━━━━━━━━━━━━━━━━━━━━ 32s 143ms/step - loss: 8.4002 - val_loss: 9.6041 - learning_rate: 0.0010
Epoch 9/50
225/225 ━━━━━━━━━━━━━━━━━━━━ 0s 133ms/step - loss: 7.6926
Epoch 9: val_loss improved from 9.60412 to 9.32790, saving model to models/best_crnn_model.keras
225/225 ━━━━━━━━━━━━━━━━━━━━ 34s 149ms/step - loss: 7.6928 - val_loss: 9.3279 - learning_rate: 0.0010
Epoch 10/50
225/225 ━━━━━━━━━━━━━━━━━━━━ 0s 131ms/step - loss: 7.3063
Epoch 10: val_loss improved from 9.32790 to 8.65429, saving model to models/best_crnn_model.keras
225/225 ━━━━━━━━━━━━━━━━━━━━ 32s 142ms/step - loss: 7.3061 - val_loss: 8.6543 - learning_rate: 0.0010
Epoch 11/50
225/225 ━━━━━━━━━━━━━━━━━━━━ 0s 131ms/step - loss: 6.6035
Epoch 11: val_loss did not improve from 8.65429
225/225 ━━━━━━━━━━━━━━━━━━━━ 32s 141ms/step - loss: 6.6042 - val_loss: 9.0576 - learning_rate: 0.0010
Epoch 12/50
225/225 ━━━━━━━━━━━━━━━━━━━━ 0s 134ms/step - loss: 6.0726
Epoch 12: val_loss improved from 8.65429 to 8.48881, saving model to models/best_crnn_model.keras
225/225 ━━━━━━━━━━━━━━━━━━━━ 33s 148ms/step - loss: 6.0737 - val_loss: 8.4888 - learning_rate: 0.0010
Epoch 13/50
225/225 ━━━━━━━━━━━━━━━━━━━━ 0s 131ms/step - loss: 5.8430
Epoch 13: val_loss improved from 8.48881 to 8.44051, saving model to models/best_crnn_model.keras
225/225 ━━━━━━━━━━━━━━━━━━━━ 32s 142ms/step - loss: 5.8433 - val_loss: 8.4405 - learning_rate: 0.0010
Epoch 14/50
225/225 ━━━━━━━━━━━━━━━━━━━━ 0s 130ms/step - loss: 5.3896
Epoch 14: val_loss did not improve from 8.44051
225/225 ━━━━━━━━━━━━━━━━━━━━ 32s 140ms/step - loss: 5.3902 - val_loss: 8.5640 - learning_rate: 0.0010
Epoch 15/50
225/225 ━━━━━━━━━━━━━━━━━━━━ 0s 132ms/step - loss: 5.0965
Epoch 15: val_loss improved from 8.44051 to 8.42459, saving model to models/best_crnn_model.keras
225/225 ━━━━━━━━━━━━━━━━━━━━ 43s 150ms/step - loss: 5.0970 - val_loss: 8.4246 - learning_rate: 0.0010
Epoch 16/50
225/225 ━━━━━━━━━━━━━━━━━━━━ 0s 131ms/step - loss: 4.7572
Epoch 16: val_loss did not improve from 8.42459
225/225 ━━━━━━━━━━━━━━━━━━━━ 32s 141ms/step - loss: 4.7579 - val_loss: 8.7937 - learning_rate: 0.0010
Epoch 17/50
225/225 ━━━━━━━━━━━━━━━━━━━━ 0s 131ms/step - loss: 4.4304
Epoch 17: val_loss did not improve from 8.42459
225/225 ━━━━━━━━━━━━━━━━━━━━ 32s 141ms/step - loss: 4.4311 - val_loss: 8.7627 - learning_rate: 0.0010
Epoch 18/50
225/225 ━━━━━━━━━━━━━━━━━━━━ 0s 133ms/step - loss: 4.2212
Epoch 18: val_loss did not improve from 8.42459
225/225 ━━━━━━━━━━━━━━━━━━━━ 33s 147ms/step - loss: 4.2215 - val_loss: 8.9010 - learning_rate: 0.0010
Epoch 19/50
225/225 ━━━━━━━━━━━━━━━━━━━━ 0s 131ms/step - loss: 4.0125
Epoch 19: val_loss did not improve from 8.42459
225/225 ━━━━━━━━━━━━━━━━━━━━ 32s 141ms/step - loss: 4.0127 - val_loss: 9.3011 - learning_rate: 0.0010
Epoch 20/50
225/225 ━━━━━━━━━━━━━━━━━━━━ 0s 131ms/step - loss: 3.7029
Epoch 20: val_loss did not improve from 8.42459

Epoch 20: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
225/225 ━━━━━━━━━━━━━━━━━━━━ 41s 140ms/step - loss: 3.7033 - val_loss: 9.5020 - learning_rate: 0.0010
Epoch 21/50
225/225 ━━━━━━━━━━━━━━━━━━━━ 0s 132ms/step - loss: 3.1940
Epoch 21: val_loss did not improve from 8.42459
225/225 ━━━━━━━━━━━━━━━━━━━━ 35s 154ms/step - loss: 3.1935 - val_loss: 9.2582 - learning_rate: 5.0000e-04
Epoch 22/50
225/225 ━━━━━━━━━━━━━━━━━━━━ 0s 132ms/step - loss: 2.5649
Epoch 22: val_loss did not improve from 8.42459
225/225 ━━━━━━━━━━━━━━━━━━━━ 32s 142ms/step - loss: 2.5653 - val_loss: 9.1584 - learning_rate: 5.0000e-04
Epoch 23/50
225/225 ━━━━━━━━━━━━━━━━━━━━ 0s 132ms/step - loss: 2.3671
Epoch 23: val_loss did not improve from 8.42459
225/225 ━━━━━━━━━━━━━━━━━━━━ 32s 143ms/step - loss: 2.3673 - val_loss: 9.3659 - learning_rate: 5.0000e-04
Epoch 24/50
225/225 ━━━━━━━━━━━━━━━━━━━━ 0s 136ms/step - loss: 2.2004
Epoch 24: val_loss did not improve from 8.42459
225/225 ━━━━━━━━━━━━━━━━━━━━ 33s 146ms/step - loss: 2.2006 - val_loss: 9.5862 - learning_rate: 5.0000e-04
Epoch 25/50
225/225 ━━━━━━━━━━━━━━━━━━━━ 0s 137ms/step - loss: 2.0098
Epoch 25: val_loss did not improve from 8.42459

Epoch 25: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
225/225 ━━━━━━━━━━━━━━━━━━━━ 41s 147ms/step - loss: 2.0101 - val_loss: 9.7527 - learning_rate: 5.0000e-04
Epoch 25: early stopping
Restoring model weights from the end of the best epoch: 15.
TRAINING COMPLETE!

In [1]:
# === LOAD BEST MODEL ===
loaded_model = keras.models.load_model(
    MODEL_PATH,
    custom_objects={'ctc_loss_function': ctc_loss_function}
)
print("✓ Best model loaded successfully!")

# === EVALUATION FUNCTION (MODIFIED) ===
def test_multiple_from_loaded_model(num_samples=10):
    print(f"\nTESTING {num_samples} RANDOM IMAGES FROM TEST SET\n")

    test_paths_sample = random.sample(test_paths, min(num_samples, len(test_paths)))
    results = []

    # --- CHANGED: Create a plot with num_samples rows and 2 columns ---
    fig, axes = plt.subplots(nrows=num_samples, ncols=2, figsize=(10, num_samples * 2.5))

    # Handle the case of a single sample
    if num_samples == 1:
        axes = np.array([axes])

    for i, img_path in enumerate(test_paths_sample):
        actual = os.path.basename(img_path).split('-')[0]

        # Handle cases where image read might fail
        try:
            img = prepare_image_for_crnn(img_path, IMG_WIDTH, IMG_HEIGHT)
            if img is None:
                print(f"Skipping {img_path}, could not be read.")
                continue

            img_batch = np.expand_dims(img, axis=0)
            pred = loaded_model.predict(img_batch, verbose=0)
            predicted = decode_batch_predictions(pred)[0]
        except Exception as e:
            print(f"Error processing {img_path}: {e}")
            continue

        is_correct = (actual == predicted)
        results.append({'actual': actual, 'predicted': predicted, 'correct': is_correct})

        # --- PLOT 1: ORIGINAL IMAGE ---
        original_img = cv2.imread(img_path)
        rgb_img = cv2.cvtColor(original_img, cv2.COLOR_BGR2RGB)
        ax_orig = axes[i, 0]
        ax_orig.imshow(rgb_img)
        ax_orig.set_title(f"Original: {os.path.basename(img_path)}")
        ax_orig.axis('off')

        # --- PLOT 2: PREPROCESSED IMAGE (with Title) ---
        status = '✓' if is_correct else '✗'
        pred_display = predicted if predicted else '(empty)'
        title = f"Actual: {actual} | Predicted: {pred_display} {status}"
        color = 'green' if is_correct else 'red'

        # Undo transpose and remove channel dim for display
        img_to_show = np.transpose(img[:, :, 0], (1, 0))

        ax_proc = axes[i, 1]
        ax_proc.imshow(img_to_show, cmap='gray')
        ax_proc.set_title(title, color=color, fontsize=12)
        ax_proc.axis('off')

    # --- REMOVED cleanup loop as it's no longer needed ---

    plt.tight_layout()
    plt.show()

    # Summary
    correct = sum(1 for r in results if r['correct'])
    total = len(results)
    if total > 0:
        print(f"\n{'='*70}\nSUMMARY:")
        print(f"  Correct:  {correct}/{total} ({(correct/total)*100:.1f}%)\n{'='*70}")
    else:
        print("No images were tested.")

    return results

# === RUN EVALUATION ===
results = test_multiple_from_loaded_model(10)

NameError: name 'keras' is not defined