In [4]:
from keras import layers
from keras.models import Model
from mltu.tensorflow.model_utils import residual_block
from mltu.tensorflow.dataProvider import DataProvider
from mltu.preprocessors import ImageReader
from configs import ModelConfigs
from mltu.tensorflow.callbacks import TrainLogger
from mltu.tensorflow.metrics import CWERMetric
from mltu.annotations.images import CVImage
from mltu.transformers import ImageResizer, LabelIndexer, LabelPadding
from mltu.tensorflow.losses import CTCloss
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from tqdm import tqdm
import tensorflow as tf
import os

In [4]:
def create_model(input_dim, output_dim, activation="leaky_relu", dropout=0.2):
    inputs = layers.Input(shape=input_dim, name='input')
    input = layers.Lambda(lambda x: x / 255)(inputs)

    x1 = residual_block(input, 16, activation=activation, skip_conv=True, strides=1, dropout=dropout)
    x2 = residual_block(x1, 16, activation=activation, skip_conv=True, strides=2, dropout=dropout)
    x3 = residual_block(x2, 16, activation=activation, skip_conv=False, strides=1, dropout=dropout)
    x4 = residual_block(x3, 32, activation=activation, skip_conv=True, strides=2, dropout=dropout)
    x5 = residual_block(x4, 32, activation=activation, skip_conv=False, strides=1, dropout=dropout)
    x6 = residual_block(x5, 64, activation=activation, skip_conv=True, strides=1, dropout=dropout)
    x7 = residual_block(x6, 64, activation=activation, skip_conv=False, strides=1, dropout=dropout)

    squeezed = layers.Reshape((x7.shape[-3] * x7.shape[-2], x7.shape[-1]))(x7)
    bi_LSTM = layers.Bidirectional(layers.LSTM(64, return_sequences=True))(squeezed)
    output = layers.Dense(output_dim + 1, activation="softmax", name="output")(bi_LSTM)
    model = Model(inputs=inputs, outputs=output)

    return model

In [5]:
configs = ModelConfigs()

In [6]:
train_images_path = "./dataset/IDCAR2015_Dataset/train/training_images/"
train_annotations_path = "./dataset/IDCAR2015_Dataset/train/training_localization_transcription"
val_images_path = "./dataset/IDCAR2015_Dataset/val/test_images/"
val_annotations_path = "./dataset/IDCAR2015_Dataset/val/test_localization_transcription"

In [8]:
def read_annotation_file(image_folder, annotations_folder):
    dataset, vocab, max_len = [], set(), 0

    # Get sorted lists of image and annotation files to ensure they match
    image_files = sorted(os.listdir(image_folder))
    annotation_files = sorted(os.listdir(annotations_folder))

    # Use zip to combine image and annotation files
    for image_file, annotation_file in tqdm(zip(image_files, annotation_files)):
        image_path = os.path.join(image_folder, image_file)
        annotation_path = os.path.join(annotations_folder, annotation_file)

        # Read the annotation file
        with open(annotation_path, 'r') as f:
            for line in f.readlines():
                line = line.strip().split()
                
                # Assuming the annotation contains the image name and the label
                label = line[1] if len(line) > 1 else "UNKNOWN"  # Handle missing label

                # Append image path and label to the dataset
                dataset.append([image_path, label])
                vocab.update(list(label))
                max_len = max(max_len, len(label))

    return dataset, sorted(vocab), max_len


In [9]:
train_dataset, train_vocab, max_train_len = read_annotation_file(train_images_path, train_annotations_path)
val_dataset, val_vocab, max_val_len = read_annotation_file(val_images_path, val_annotations_path)

1000it [00:01, 676.08it/s]
500it [00:00, 742.08it/s]


In [10]:
# Create training data provider
train_data_provider = DataProvider(
    dataset=train_dataset,
    skip_validation=True,
    batch_size=configs.batch_size,
    data_preprocessors=[ImageReader(CVImage)],
    transformers=[
        ImageResizer(configs.width, configs.height),
        LabelIndexer(configs.vocab),
        LabelPadding(max_word_length=configs.max_text_length, padding_value=len(configs.vocab))
    ],
)

# Create validation data provider
val_data_provider = DataProvider(
    dataset=val_dataset,
    skip_validation=True,
    batch_size=configs.batch_size,
    data_preprocessors=[ImageReader(CVImage)],
    transformers=[
        ImageResizer(configs.width, configs.height),
        LabelIndexer(configs.vocab),
        LabelPadding(max_word_length=configs.max_text_length, padding_value=len(configs.vocab))
    ],
)

In [11]:
model = create_model(input_dim=(configs.height, configs.width, 3), output_dim=len(configs.vocab))
padding_token = len(configs.vocab)

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=configs.learning_rate), loss=CTCloss(), run_eagerly=False)
#model.summary()"






In [13]:
# Define callbacks
earlystopper = EarlyStopping(monitor="val_CER", patience=10, verbose=1)
checkpoint = ModelCheckpoint(f"{configs.model_path}/model.keras", monitor="val_CER", verbose=1, save_best_only=True, mode="min")
trainLogger = TrainLogger(configs.model_path)
tb_callback = TensorBoard(f"{configs.model_path}/logs", update_freq=1)
reduceLROnPlat = ReduceLROnPlateau(monitor="val_CER", factor=0.9, min_delta=1e-10, patience=5, verbose=1, mode="auto")


In [20]:
model.fit(
    train_data_provider,
    validation_data=val_data_provider,
    epochs=configs.train_epochs,
    callbacks=[earlystopper, checkpoint, trainLogger, reduceLROnPlat, tb_callback],
)

Epoch 1/100



  self._warn_if_super_not_called()
