In [66]:
import os
import random
from PIL import Image, ImageDraw, ImageFont, ImageFilter
import numpy as np

In [None]:
def generate_synthetic_data(char, font_path="path_to_fonts", output_dir="synthetic_data", num_samples=100):
    # Load available fonts
    fonts = []
    #fonts = [os.path.join(font_path, f) for f in os.listdir(font_path) if f.endswith('.ttf')]
    
    for file in os.listdir(font_path):
        if os.path.isdir(os.path.join(font_path, file)):
            fonts.extend([os.path.join(font_path, file, f) for f in os.listdir(os.path.join(font_path, file)) if f.endswith('.ttf')])
        elif file.endswith('.ttf'):
            fonts.append(os.path.join(font_path, file))
        else:
            continue
            
    if not fonts:
        raise ValueError(f"No fonts found in the specified path: {font_path}")

    # Ensure output directory exists
    if char == '.':
        output_dir = f'../data/raw/character_set4/dot'
    elif char == '?':
        output_dir = f'../data/raw/character_set4/qmark'
    elif char == '\"':
        output_dir = f'../data/raw/character_set4/double_quote'

    os.makedirs(output_dir, exist_ok=True)

    for i in range(num_samples):
        # Choose a font and font size
        chosen_font = random.choice(fonts)
        font_size = random.randint(40, 60)
        font = ImageFont.truetype(chosen_font, font_size)

        # Random background color
        bg_color = tuple(random.randint(200, 255) for _ in range(3))

        # Random text color
        text_color = tuple(random.randint(0, 100) for _ in range(3))

        # Create a blank image with random background color and size
        img_width = random.randint(50, 100)
        img_height = random.randint(50, 100)
        img = Image.new('RGB', (img_width, img_height), bg_color)
        draw = ImageDraw.Draw(img)

        # Random position
        max_x = img_width - font_size
        max_y = img_height - font_size
        position = ((img_width - font_size) // 2, (img_height - font_size) // 2)
        #position = (random.randint(0, max(0, max_x)), random.randint(0, max(0, max_y)))

        draw.text(position, char, fill=text_color, font=font)

        # Apply random transformations
        angle = random.uniform(-5, 5)
        img = img.rotate(angle, expand=1, fillcolor=bg_color)

        # Add noise or blur
        img = img.filter(ImageFilter.GaussianBlur(radius=random.uniform(0, 1.5)))

        if char == '.':
            img.save(os.path.join(output_dir, f'dot_{i}.png'))
        elif char == '?':
            img.save(os.path.join(output_dir, f'qmark_{i}.png'))
        elif char == '\"':
            img.save(os.path.join(output_dir, f'double_quote_{i}.png'))
        else:
            img.save(os.path.join(output_dir, f'{char}_{i}.png'))

In [None]:
# List of characters to generate
chars = '!?,\'\".abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'


# [NOTE] Should random 1 font -> generate ! till 9 (from chars) instead of randoming
# font everytime for each image, as our dataset will not balanced (?)

# Generate synthetic data for each character
for char in chars:
    generate_synthetic_data(
        char,
        font_path="../data/fonts/",
        output_dir=f"../data/raw/character_set4/{char}",
        num_samples=3000
    )

#generate_synthetic_data('.', font_path="../data/fonts/", output_dir="synthetic_data/dot", num_samples=3000)