# Fine-tuning recognizer with keras-ocr


In [None]:
import random
import string
import math
import itertools
import os

import numpy as np
import imgaug
import matplotlib.pyplot as plt
import tensorflow as tf
import sklearn.model_selection
import os
import string 

import keras_ocr

# Function to read labels file
def _read_born_digital_labels_file(labels_filepath, image_folder):
    """Read a labels file and return (filepath, label) tuples.

    Args:
        labels_filepath: Path to labels file
        image_folder: Path to folder containing images
    """
    if not os.path.exists(labels_filepath):
        raise FileNotFoundError(f"Labels file not found: {labels_filepath}")
    if not os.path.exists(image_folder):
        raise FileNotFoundError(f"Image folder not found: {image_folder}")

    with open(labels_filepath, encoding="utf-8-sig") as f:
        labels_raw = [l.strip().split(",") for l in f.readlines()]
        labels = [
            (
                os.path.join(image_folder, segments[0]),
                None,
                ",".join(segments[1:]).strip()[1:-1],
            )
            for segments in labels_raw
        ]
    return labels

# Specify the correct paths using raw strings to handle backslashes properly
train_labels_filepath = r"E:\Bangkit\Capstone\Lintasarta\OCR\train\Lintasarta\train\gt.txt"
train_image_folder = r"E:\Bangkit\Capstone\Lintasarta\OCR\train\Lintasarta\train\image_file"
test_labels_filepath = r"E:\Bangkit\Capstone\Lintasarta\OCR\train\Lintasarta\test\gt.txt"
test_image_folder = r"E:\Bangkit\Capstone\Lintasarta\OCR\train\Lintasarta\test\image_file"

# Read the labels
try:
    train_labels = _read_born_digital_labels_file(labels_filepath=train_labels_filepath, image_folder=train_image_folder)
    test_labels = _read_born_digital_labels_file(labels_filepath=test_labels_filepath, image_folder=test_image_folder)
except FileNotFoundError as e:
    print(e)
    # Handle the error appropriately, e.g., by exiting or providing a fallback
    train_labels = []
    test_labels = []

# Ensure the labels were loaded before proceeding
if train_labels:
    train_labels = [(filepath, box, word.lower()) for filepath, box, word in train_labels]
else:
    print("Train labels could not be loaded.")

if test_labels:
    test_labels = [(filepath, box, word.lower()) for filepath, box, word in test_labels]
else:
    print("Test labels could not be loaded.")

# Verify if the labels are loaded correctly
print(f'Number of training labels: {len(train_labels)}')
print(f'Number of test labels: {len(test_labels)}')


In [None]:
import tensorflow as tf
print(tf.config.list_physical_devices('GPU'))

In [None]:
recognizer = keras_ocr.recognition.Recognizer(alphabet=string.printable)
recognizer.compile()

In [None]:
batch_size=16
augmenter = imgaug.augmenters.Sequential([
    imgaug.augmenters.GammaContrast(gamma=(0.25, 3.0)),
])

# Use the provided training and test labels
training_labels = train_labels
validation_labels = test_labels


# Create image generators and calculate steps per epoch
(training_image_gen, training_steps), (validation_image_gen, validation_steps) = [
    (
        keras_ocr.datasets.get_recognizer_image_generator(
            labels=labels,
            height=recognizer.model.input_shape[1],
            width=recognizer.model.input_shape[2],
            alphabet=recognizer.alphabet,
            augmenter=augmenter if labels is training_labels else None
        ),
        len(labels) // batch_size
    ) for labels in [training_labels, validation_labels]
]

# Create batch generators for training and validation
training_gen, validation_gen = [
    recognizer.get_batch_generator(
        image_generator=image_generator,
        batch_size=batch_size
    )
    for image_generator in [training_image_gen, validation_image_gen]
]
# Print the number of training and validation images
print(f"Number of training images: {len(training_labels)}")
print(f"Number of validation images: {len(validation_labels)}")

In [None]:
image, text = next(training_image_gen)
print('text:', text)
_ = plt.imshow(image)

In [None]:
callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10000, restore_best_weights=False,start_from_epoch=0),
    tf.keras.callbacks.ModelCheckpoint('train_24/05/2024.h5', monitor='val_loss', save_best_only=True),
    tf.keras.callbacks.CSVLogger('recognizer_borndigital1.csv')
]
recognizer.training_model.fit_generator(
    generator=training_gen,
    steps_per_epoch=training_steps,
    validation_steps=validation_steps,
    validation_data=validation_gen,
    callbacks=callbacks,
    epochs=1000,
)

In [None]:
image_filepath, _, actual = test_labels[19]
predicted = recognizer.recognize(image_filepath)
print(f'Predicted: {predicted}, Actual: {actual}')
_ = plt.imshow(keras_ocr.tools.read(image_filepath))

In [None]:
recognizer.prediction_model.load_weights(os.path.join('train_25/05/2024.h5'))

In [None]:
import numpy as np
import os

total_samples = len(test_labels)
correct_predictions = 0
total_characters = 0
correct_characters = 0

model_name = type(recognizer).__name__
print(f"Model used: {model_name}")

for image_filepath, _, actual in test_labels:
    predicted = recognizer.recognize(image_filepath)
    print(f'Predicted: {predicted}, Actual: {actual}')
    _ = plt.imshow(keras_ocr.tools.read(image_filepath))
    
    total_characters += len(actual)
    
    if predicted == actual:
        correct_predictions += 1
        correct_characters += len(actual)
    else:
        for i in range(len(actual)):
            if i < len(predicted) and actual[i] == predicted[i]:
                correct_characters += 1

accuracy = (correct_predictions / total_samples) * 100
character_accuracy = (correct_characters / total_characters) * 100

print(f'Overall Accuracy: {accuracy}%')
print(f'Character Accuracy: {character_accuracy}%')



In [None]:
import matplotlib.pyplot as plt

import keras_ocr

# keras-ocr will automatically download pretrained
# weights for the detector and recognizer.
pipeline = keras_ocr.pipeline.Pipeline()

# Get a set of three example images
images = keras_ocr.tools.read('E:/Bangkit/Capstone/Lintasarta/OCR/train/inference/Army_Reserves_Recruitment_Banner_MOD_45156284.jpg')

# Each list of predictions in prediction_groups is a list of
# (word, box) tuples.
prediction_groups = pipeline.recognize(images)

# Plot the predictions
fig, axs = plt.subplots(nrows=len(images), figsize=(20, 20))
for ax, image, predictions in zip(axs, images, prediction_groups):
    keras_ocr.tools.drawAnnotations(image=image, predictions=predictions, ax=ax)