# <center> Creating the Steel Alphabet dataset </center>

In [1]:
from datasets import load_dataset

iam_data = load_dataset("gagan3012/IAM")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
iam_data

DatasetDict({
    train: Dataset({
        features: ['image', 'label', 'text'],
        num_rows: 11344
    })
})

None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


In [None]:
from torch.utils.data import Dataset
import torch
from PIL import Image

class SteelAlphabetDataset(Dataset):
    def __init__(self, root_dir, df, processor, max_target_length=128):
        self.root_dir = root_dir
        self.df = df
        self.processor = processor
        self.max_target_length = max_target_length

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        # get file name + text 
        file_name = self.df['file_name'][idx]
        text = self.df['text'][idx]
        # prepare image (i.e. resize + normalize)
        image = Image.open(self.root_dir + file_name).convert("RGB")
        pixel_values = self.processor(image, return_tensors="pt").pixel_values
        # add labels (input_ids) by encoding the text
        labels = self.processor.tokenizer(text, 
                                          padding="max_length", 
                                          max_length=self.max_target_length).input_ids
        # important: make sure that PAD tokens are ignored by the loss function
        labels = [label if label != self.processor.tokenizer.pad_token_id else -100 for label in labels]

        encoding = {"pixel_values": pixel_values.squeeze(), "labels": torch.tensor(labels)}
        return encoding

In [None]:
from PIL import Image, ImageDraw, ImageFont
import os

# Path to your font file
font_path = "ExpandedSteel-Regular.ttf"

# Define output image properties
image_width = 800  # Set a width for your images
image_height = 200  # Set a height for your images
font_size = 40  # Font size to use

# Load your font
font = ImageFont.truetype(font_path, font_size)

# Create an output directory
output_dir = "output_images"
os.makedirs(output_dir, exist_ok=True)

# Function to generate an image from text
def text_to_image(text, output_file):
    # Create a blank image with a white background
    image = Image.new("RGB", (image_width, image_height), "white")
    draw = ImageDraw.Draw(image)

    # Get text size
    text_width, text_height = draw.textsize(text, font=font)

    # Calculate text position (centered)
    x = (image_width - text_width) // 2
    y = (image_height - text_height) // 2

    # Add text to image
    draw.text((x, y), text, fill="black", font=font)

    # Save the image
    image.save(output_file)

# Read your text dataset
text_file = "your_text_dataset.txt"
with open(text_file, "r", encoding="utf-8") as f:
    lines = f.readlines()

# Generate images for each line
for idx, line in enumerate(lines):
    line = line.strip()  # Remove leading/trailing whitespace
    if not line:
        continue  # Skip empty lines

    # Generate filename
    output_file = os.path.join(output_dir, f"text_image_{idx + 1}.png")

    # Create image
    text_to_image(line, output_file)
    print(f"Generated: {output_file}")
