In [None]:
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, BatchNormalization, Reshape, Bidirectional, LSTM, Dense, Lambda, Rescaling, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.backend import ctc_batch_cost, ctc_decode
from tensorflow.keras.callbacks import ModelCheckpoint

In [None]:
# Define constants
image_height, image_width = 50, 200
batch_size = 128

# Directory containing captcha images -- modify as per your directory name
image_dir = 'data'

# Create a list of image file paths and corresponding labels
image_paths = [str(image) for image in sorted(Path(image_dir).glob("*.jpg"))]
labels = [image.stem for image in sorted(Path(image_dir).glob("*.jpg"))]

# Maximum length of any captcha in the dataset
max_length = max([len(label) for label in labels])

In [None]:
# Create a set of all unique characters in the labels
all_possible_characters = sorted(set("".join(labels)))

In [None]:
len(all_possible_characters)

In [None]:
# Create a mapping of characters to integers and integers to characters
char_to_int = {char: i for i, char in enumerate(all_possible_characters)}
int_to_char = {i: char for char, i in char_to_int.items()}


def preprocess_image(image_path):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=1)  # Grayscale image
    image = tf.image.resize(image, (image_height, image_width))
    return image

In [None]:
# Preprocess images and labels
images = [preprocess_image(image_path) for image_path in image_paths]
encoded_labels = [[char_to_int[char] for char in label] for label in labels]

In [None]:
# Create TensorFlow Datasets
dataset = tf.data.Dataset.from_tensor_slices((images, encoded_labels))

# shuffle the dataset
dataset = dataset.shuffle(buffer_size=len(images))

In [None]:
# Calculate sizes for training, validation, and test sets
train_size = int(0.8 * len(image_paths))
val_size = int(0.1 * len(image_paths))
test_size = len(image_paths) - train_size - val_size

# Split the dataset into training, validation, and test sets
train_dataset = dataset.take(train_size).batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)
remaining_dataset = dataset.skip(train_size)
validation_dataset = remaining_dataset.take(val_size).batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)
test_dataset = remaining_dataset.skip(val_size).batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)

In [None]:
def visualize_random_samples(dataset, int_to_char, num_samples=5):

    # Create an iterator for the dataset
    dataset_iter = iter(dataset)

    # Iterate through the random samples and visualize them
    for i in range(num_samples):
        image, label = next(dataset_iter)
        # Decode the label (convert integers to characters)
        label = [int_to_char[int(x)] for x in label[0].numpy()]

        # Display the image and label
        plt.figure(figsize=(4, 2))
        plt.imshow(image[0, :, :, 0], cmap='gray')
        plt.title("Label: " + ''.join(label))
        plt.axis('off')
        plt.show()

# Example usage:
visualize_random_samples(validation_dataset, int_to_char, num_samples=5)

In [None]:
initial_learning_rate = 1e-3
final_learning_rate = 1e-4
learning_rate_decay_factor = (final_learning_rate / initial_learning_rate)**(1/100)
steps_per_epoch = tf.data.experimental.cardinality(train_dataset).numpy()

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
                initial_learning_rate=initial_learning_rate,
                decay_steps=steps_per_epoch,
                decay_rate=learning_rate_decay_factor,
                staircase=True)

In [None]:
# Define the input layer
input_data = Input(shape=(image_height, image_width, 1), name='input_image')

# Standardize values to be in the [0, 1] range
x = Rescaling(1./255)(input_data)

# Transpose the tensor to shape (None, image_width, image_height, 1)
x = Lambda(lambda x: tf.transpose(x, perm=[0, 2, 1, 3]), name="transpose")(x)

# Convolutional layers
x = Conv2D(64, (3, 3), activation="relu", kernel_initializer=tf.keras.initializers.he_normal(), padding="same")(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), name="pool1")(x)

x = Conv2D(128, (3, 3), activation="relu", kernel_initializer=tf.keras.initializers.he_normal(), padding="same")(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), name="pool2")(x)

x = Conv2D(256, (3, 3), activation="relu", kernel_initializer=tf.keras.initializers.he_normal(), padding="same")(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 1), name="pool3")(x) # Pooling over time dimension

x = Reshape(target_shape=(image_width // 8, (image_height // 4) * 256), name="reshape")(x)
x = Dense(128, activation="relu", kernel_initializer=tf.keras.initializers.he_normal())(x)
x = Dropout(0.2)(x)


# Recurrent layers (Bidirectional LSTM)
x = Bidirectional(LSTM(128, return_sequences=True, dropout=0.25))(x)

# Output layer (CTC)
output = Dense(len(all_possible_characters) + 1, activation='softmax')(x)

# Create the model
model = Model(inputs=input_data, outputs=output, name="Captcha-CRNN-model")

# Compile the model with CTC loss
def ctc_loss(y_true, y_pred):
    batch_length = tf.cast(tf.shape(y_true)[0], dtype='int64')
    label_length = tf.cast(tf.shape(y_true)[1], dtype='int64')
    input_length = tf.cast(tf.shape(y_pred)[1], dtype='int64')

    label_length = label_length * tf.ones(shape=(batch_length, 1), dtype='int64')
    input_length = input_length * tf.ones(shape=(batch_length, 1), dtype='int64')
    loss = ctc_batch_cost(y_true, y_pred, input_length, label_length)
    return loss

model.compile(optimizer=Adam(learning_rate=lr_schedule), loss=ctc_loss)
model.summary()

In [None]:
# # Run this cell for loading pretrained model of 100 epochs for inference if required and you can skip training.

# model.load_weights('model_crnn_100.h5')

In [None]:
steps_per_epoch = tf.data.experimental.cardinality(train_dataset).numpy()
checkpoint_filepath = 'model_crnn.h5'
model_checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True, 
    save_freq=int(steps_per_epoch*10),       # save model for every 10 epochs
)

In [None]:
# Training the model

num_epochs = 100

history = model.fit(
    train_dataset,
    epochs=num_epochs,
    validation_data=validation_dataset,
    callbacks=[model_checkpoint_callback]
)

In [None]:
# Find the epoch with the lowest validation loss
best_epoch = history.history['val_loss'].index(min(history.history['val_loss']))


# Plot training history without accuracy
def plot_training_history(history, best_epoch):
    plt.figure(figsize=(9, 6))

    # Plot training & validation loss values
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    # Display the lowest validation loss and the epoch at which it occurred
    min_val_loss = min(history.history['val_loss'])
    plt.annotate(
        f'Lowest Validation Loss: {min_val_loss:.4f}\nEpoch: {best_epoch + 1}',
        xy=(best_epoch, min_val_loss),
        xytext=(best_epoch - 3, min_val_loss + 0.1),  # Adjust text position
        arrowprops=dict(facecolor='black', arrowstyle='->')
    )

    plt.tight_layout()
    plt.show()


plot_training_history(history, best_epoch)

In [None]:
def decode_and_visualize_samples(model, dataset, int_to_char, num_samples=5):

    # Create an iterator for the dataset
    dataset_iter = iter(dataset)

    # Create a subplot grid
    fig, axes = plt.subplots(num_samples, 1, figsize=(4, 2 * num_samples))

    # Iterate through the random samples, decode, and visualize them
    for i in range(num_samples):
        image, label = next(dataset_iter)

        # Make predictions using the model
        predictions = model.predict(image)
        # Decode the predictions using CTC decode
        decoded, _ = ctc_decode(predictions, input_length=tf.fill((batch_size,), 25), greedy=True)

        # Convert decoded labels to characters
        decoded_labels = [int_to_char[int(x)] for x in decoded[0][0,:max_length].numpy()]

        # Display the image and decoded label
        axes[i].imshow(image[0, :, :, 0], cmap='gray')
        axes[i].set_title("Decoded: " + ''.join(decoded_labels))
        axes[i].axis('off')

    # Adjust spacing and display the grid
    plt.tight_layout()
    plt.show()

# Example usage:
decode_and_visualize_samples(model, test_dataset, int_to_char, num_samples=5)

In [None]:
def calculate_accuracy(model, dataset, int_to_char,size,verbosity=0):
     # Create an iterator for the dataset
    dataset_iter = iter(dataset)
    length = tf.data.experimental.cardinality(dataset).numpy()
    right = 0
    # Iterate through the random samples, decode, and visualize them
    for i in range(length):
        image, label = next(dataset_iter)
        
        # Make predictions using the model
        predictions = model.predict(image,verbose = verbosity)
        # Decode the predictions using CTC decode
        decoded, _ = ctc_decode(predictions, input_length=tf.fill((label.shape[0],), 25), greedy=True)
        # Convert decoded labels to characters
        for i in range(label.shape[0]):
            true_labels = [int_to_char[int(x)] for x in label[i,:max_length].numpy()]
            decoded_labels = [int_to_char[int(x)] if int(x) in int_to_char else '<UNKNOWN>' for x in decoded[0][i,:max_length].numpy()]
            if(true_labels == decoded_labels):
                right+=1
    return right / size

In [None]:
# This cell might take some time -- change verbosity if you wish to see progress

test_accuracy = calculate_accuracy(model,test_dataset,int_to_char,verbosity=0,size = test_size)
train_accuracy = calculate_accuracy(model,train_dataset,int_to_char,verbosity=0,size = train_size)
validation_accuracy = calculate_accuracy(model,validation_dataset,int_to_char,verbosity=0, size = val_size)

In [None]:
print(f'Train accuracy : {train_accuracy}')
print(f'Test accuracy : {test_accuracy}')
print(f'Validation accuracy : {validation_accuracy}')