# Text Recognition Data Generator

Generate synthetic text data for OCR project using the trdg library

Run the following to install the library:
`pip install trdg`

In [13]:
import os
from trdg.generators import (
    GeneratorFromDict,
    GeneratorFromRandom,
    GeneratorFromStrings,
    GeneratorFromWikipedia,
)

In [None]:
def generate_and_save_images(strings, count, output_dir):
    """
    Generate images from text and save them to the specified directory.

    Parameters:
        strings (list of str): List of strings to generate images for.
        count (int): Number of images to generate.
        output_dir (str): Path to the directory where images will be saved.
    """
    # Ensure the output directory exists
    os.makedirs(output_dir, exist_ok=True)

    # Initialize the generator, edit parameters as needed.
    generator = GeneratorFromStrings(
        strings=strings,
        count=count,
        size=64,
        skewing_angle=5,
        random_blur=True,
    )

    # Generate and save each image
    for idx, (img, lbl) in enumerate(generator):
        # Create the file name
        file_name = f"text_image_{idx + 1}.png"
        file_path = os.path.join(output_dir, file_name)

        # Save the image
        img.save(file_path)
        print(f"Saved: {file_path}")


## Driver
Run function here.

In [23]:
# Example usage
output_directory = "../data/test/"
generate_and_save_images(["Hello, World!", "OCR Training"], count=5, output_dir=output_directory)

Saved: ../data/test/text_image_1.png
Saved: ../data/test/text_image_2.png
Saved: ../data/test/text_image_3.png
Saved: ../data/test/text_image_4.png
Saved: ../data/test/text_image_5.png
