In [4]:
from PIL import Image, ImageDraw, ImageFont, ImageFilter
import os
import shutil
import random

def calculate_font_size(text, font_path, max_height, initial_size=20):
    current_size = initial_size
    font = ImageFont.truetype(font_path, current_size)
    
    while ImageDraw.Draw(Image.new('RGB', (1, 1))).multiline_textbbox((0, 0), text, font=font)[3] < 0.9 * max_height:
        current_size += 1
        font = ImageFont.truetype(font_path, current_size)
    
    return current_size

def generate_images_and_labels_from_file(text_file_path, image_height, output_folder, output_labels_file, custom_font=None, random_background=False, random_blur=False, use_all_text=False):
    font_options = [
        {'name': 'Khmer OS Muol Light Regular', 'file': './font/Khmer OS Muol Light Regular.ttf'},
        {'name': 'Khmer OS Battambang Regular', 'file': './font/Khmer OS Battambang Regular.ttf'}
    ]

    if custom_font is not None:
        font_options.append(custom_font)

    # Remove existing output folder
    if os.path.exists(output_folder):
        shutil.rmtree(output_folder)

    # Create output folder
    os.makedirs(output_folder, exist_ok=True)

    # Clear the content of the labels file
    open(output_labels_file, 'w', encoding='utf-8').close()

    with open(output_labels_file, 'a', encoding='utf-8') as labels_file:
        counter = 1

        text_list = []
        if use_all_text:
            with open(text_file_path, 'r', encoding='utf-8') as text_file:
                text_list = text_file.readlines()
            random.shuffle(text_list)
        else:
            text_list.append(open(text_file_path, 'r', encoding='utf-8').readline().strip())

        for text in text_list:
            text = text.strip()
            for font_option in font_options:
                font_size = calculate_font_size(text, font_option['file'], image_height)
                font = ImageFont.truetype(font_option['file'], font_size)

                img = Image.new("RGB", (100, image_height), "white")  # Width doesn't matter initially
                d1 = ImageDraw.Draw(img)

                text_bbox = d1.textbbox((0, 0), text, font=font)
                image_width = text_bbox[2] - text_bbox[0] + 20  # Add some padding
                img = img.resize((image_width, image_height))

                d1 = ImageDraw.Draw(img)
                text_position = ((image_width - text_bbox[2]) // 2, (image_height - text_bbox[3]) // 2)
                text_color = "black"

                d1.text(text_position, text, fill=text_color, font=font)

                # Random background color
                if random_background:
                    background_color = random.choice(["black", "gray", "white"])
                    img = Image.new("RGB", (image_width, image_height), background_color)
                    d1 = ImageDraw.Draw(img)
                    d1.text(text_position, text, fill=text_color, font=font)

                # Random blur
                if random_blur:
                    blur_radius = random.randint(0, 5)
                    img = img.filter(ImageFilter.GaussianBlur(radius=blur_radius))

                # Use a simple image name with a counter
                image_name = f"{counter}.jpg"
                output_path = os.path.join(output_folder, image_name)
                img.save(output_path)

                # Remove the './dataset' prefix from image paths
                rel_image_path = os.path.relpath(output_path, start='./dataset')
                # Write label to the labels file in real-time
                labels_file.write(f"{rel_image_path} {text}\n")

                print('Generate image:', rel_image_path + ' | ' + text)
                counter += 1

    print(f"Images exported to: {output_folder}")
    print(f"Labels exported to: {output_labels_file}")

def main():
    image_height = 128
    output_folder = 'output/'
    output_labels_file = 'labels.txt'
    text_file_path = "dict.txt"  # Change this to your text file path

    generate_images_and_labels_from_file(text_file_path, image_height, output_folder, output_labels_file, random_background=True, random_blur=True, use_all_text=True)

if __name__ == "__main__":
    main()


Generate image: ../output/1.jpg | មិនអាច
Generate image: ../output/2.jpg | មិនអាច
Generate image: ../output/3.jpg | ក្បាល
Generate image: ../output/4.jpg | ក្បាល
Generate image: ../output/5.jpg | ប្រអប់
Generate image: ../output/6.jpg | ប្រអប់
Generate image: ../output/7.jpg | កម្រើក
Generate image: ../output/8.jpg | កម្រើក
Generate image: ../output/9.jpg | ជើង
Generate image: ../output/10.jpg | ជើង
Generate image: ../output/11.jpg | ត
Generate image: ../output/12.jpg | ត
Generate image: ../output/13.jpg | មានរបស់គរ
Generate image: ../output/14.jpg | មានរបស់គរ
Generate image: ../output/15.jpg | ណែន
Generate image: ../output/16.jpg | ណែន
Generate image: ../output/17.jpg | ប្រអប់
Generate image: ../output/18.jpg | ប្រអប់
Generate image: ../output/19.jpg | ងាក
Generate image: ../output/20.jpg | ងាក
Generate image: ../output/21.jpg | ឬជើង
Generate image: ../output/22.jpg | ឬជើង
Generate image: ../output/23.jpg | អវយវៈ
Generate image: ../output/24.jpg | អវយវៈ
Generate image: ../output/25.jp

In [10]:
from PIL import Image, ImageDraw, ImageFont, ImageFilter
import os
import random
import shutil

def calculate_font_size(text, font_path, max_height, initial_size=20):
    current_size = initial_size
    font = ImageFont.truetype(font_path, current_size)
    
    while ImageDraw.Draw(Image.new('RGB', (1, 1))).multiline_textbbox((0, 0), text, font=font)[3] < 0.9 * max_height:
        current_size += 1
        font = ImageFont.truetype(font_path, current_size)
    
    return current_size

def generate_images_and_labels_from_file(file_path, image_height, output_folder, output_labels_file, random_blur=False, font_path=None):
    font_options = [
        {'name': 'Khmer OS Muol Light Regular', 'file': './font/Khmer OS Muol Light Regular.ttf'},
        {'name': 'Khmer OS Battambang Regular', 'file': './font/Khmer OS Battambang Regular.ttf'}
    ]

    # Remove existing output folder
    if os.path.exists(output_folder):
        shutil.rmtree(output_folder)

    # Create output folder
    os.makedirs(output_folder, exist_ok=True)

    # Clear the content of the labels file
    open(output_labels_file, 'w', encoding='utf-8').close()

    with open(output_labels_file, 'a', encoding='utf-8') as labels_file:
        counter = 1
        text_list = open(file_path, 'r', encoding='utf-8').readlines()

        for text in text_list:
            text = text.strip()
            for font_option in font_options:
                font_size = calculate_font_size(text, font_option['file'], image_height)
                font = ImageFont.truetype(font_option['file'], font_size)

                img = Image.new("RGB", (100, image_height), "white")  # Width doesn't matter initially
                d1 = ImageDraw.Draw(img)

                text_bbox = d1.textbbox((0, 0), text, font=font)
                image_width = text_bbox[2] - text_bbox[0] + 20  # Add some padding
                img = img.resize((image_width, image_height))

                d1 = ImageDraw.Draw(img)
                text_position = ((image_width - text_bbox[2]) // 2, (image_height - text_bbox[3]) // 2)
                
                text_color = "black"

                d1.text(text_position, text, fill=text_color, font=font)

                # Random blur
                if random_blur:
                    blur_radius = random.randint(0, 5)
                    img = img.filter(ImageFilter.GaussianBlur(radius=blur_radius))

                # Use a simple image name with a counter
                image_name = f"{counter}.jpg"
                output_path = os.path.join(output_folder, image_name)
                img.save(output_path)

                # Write label to the labels file in real-time
                # Remove the 'output/' prefix from the output path
                output_path = output_path.replace('output/', '')
                labels_file.write(f"{output_path} {text}\n")

                print('Generate image:', output_path + ' | ' + text)
                counter += 1

def main():
    image_height = 128
    output_folder = 'output'
    output_labels_file = 'labels.txt'
    text_file_path = "dict.txt"  # Change this to your text file path

    generate_images_and_labels_from_file(text_file_path, image_height, output_folder, output_labels_file, random_blur=True)

if __name__ == "__main__":
    main()


Generate image: 1.jpg | អវយវៈ
Generate image: 2.jpg | អវយវៈ
Generate image: 3.jpg | ដែល
Generate image: 4.jpg | ដែល
Generate image: 5.jpg | ត
Generate image: 6.jpg | ត
Generate image: 7.jpg | ពី
Generate image: 8.jpg | ពី
Generate image: 9.jpg | ក្បាល
Generate image: 10.jpg | ក្បាល
Generate image: 11.jpg | ទៅស្មាឬទៅខ្លួន
Generate image: 12.jpg | ទៅស្មាឬទៅខ្លួន
Generate image: 13.jpg | នៃមនុស្ស
Generate image: 14.jpg | នៃមនុស្ស
Generate image: 15.jpg | សត្វ
Generate image: 16.jpg | សត្វ
Generate image: 17.jpg | ផ្នែក
Generate image: 18.jpg | ផ្នែក
Generate image: 19.jpg | នៃដៃ
Generate image: 20.jpg | នៃដៃ
Generate image: 21.jpg | ឬជើង
Generate image: 22.jpg | ឬជើង
Generate image: 23.jpg | ដែលត
Generate image: 24.jpg | ដែលត
Generate image: 25.jpg | ភ្ជាប់ពី
Generate image: 26.jpg | ភ្ជាប់ពី
Generate image: 27.jpg | ប្រអប់
Generate image: 28.jpg | ប្រអប់
Generate image: 29.jpg | ដៃទៅ
Generate image: 30.jpg | ដៃទៅ
Generate image: 31.jpg | កំភួន
Generate image: 32.jpg | កំភួន
Generate imag

In [14]:
from PIL import Image, ImageDraw, ImageFont, ImageFilter
import os
import random
import shutil

def calculate_font_size(text, font_path, max_height, initial_size=20):
    current_size = initial_size
    font = ImageFont.truetype(font_path, current_size)
    
    while ImageDraw.Draw(Image.new('RGB', (1, 1))).multiline_textbbox((0, 0), text, font=font)[3] < 0.9 * max_height:
        current_size += 1
        font = ImageFont.truetype(font_path, current_size)
    
    return current_size

def generate_images_and_labels_from_file(file_path, image_height, output_folder, output_labels_file, font_option=[], random_blur=False):
    font_options = [
        {'name': 'AKbalthom KhmerLer Regular', 'file': './font/AKbalthom KhmerLer Regular.ttf'},
        {'name': 'Khmer MEF1 Regular', 'file': './font/Khmer MEF1 Regular.ttf'},
        {'name': 'Khmer OS Battambang Regular', 'file': './font/Khmer OS Battambang Regular.ttf'},
        {'name': 'Khmer OS Muol Light Regular', 'file': './font/Khmer OS Muol Light Regular.ttf'},
        {'name': 'Khmer OS Siemreap Regular', 'file': './font/Khmer OS Siemreap Regular.ttf'},
    ]
    
    if not font_option:
        selected_fonts = font_options
    else:
        selected_fonts = [font_options[i - 1] for i in font_option]

    # Remove existing output folder
    if os.path.exists(output_folder):
        shutil.rmtree(output_folder)

    # Create output folder
    os.makedirs(output_folder, exist_ok=True)

    # Clear the content of the labels file
    open(output_labels_file, 'w', encoding='utf-8').close()

    with open(output_labels_file, 'a', encoding='utf-8') as labels_file:
        counter = 1
        text_list = open(file_path, 'r', encoding='utf-8').readlines()

        for text in text_list:
            text = text.strip()
            for font_option in selected_fonts:
                font_size = calculate_font_size(text, font_option['file'], image_height)
                font = ImageFont.truetype(font_option['file'], font_size)

                img = Image.new("RGB", (100, image_height), "white")  # Width doesn't matter initially
                d1 = ImageDraw.Draw(img)

                text_bbox = d1.textbbox((0, 0), text, font=font)
                image_width = text_bbox[2] - text_bbox[0] + 20  # Add some padding
                img = img.resize((image_width, image_height))

                d1 = ImageDraw.Draw(img)
                text_position = ((image_width - text_bbox[2]) // 2, (image_height - text_bbox[3]) // 2)
                
                text_color = "black"

                d1.text(text_position, text, fill=text_color, font=font)

                # Random blur
                if random_blur:
                    blur_radius = random.randint(0, 5)
                    img = img.filter(ImageFilter.GaussianBlur(radius=blur_radius))

                # Use a simple image name with a counter
                image_name = f"{counter}.jpg"
                output_path = os.path.join(output_folder, image_name)
                img.save(output_path)

                # Write label to the labels file in real-time
                # Remove the 'output/' prefix from the output path
                output_path = output_path.replace('output/', '')
                labels_file.write(f"{output_path} {text}\n")

                print('Generate image:', output_path + ' | ' + text)
                counter += 1

def main():
    image_height = 128
    output_folder = 'output'
    output_labels_file = 'labels.txt'
    text_file_path = "dict.txt"  # Change this to your text file path
    font_option = []  # Select font options here, e.g., [1] for Khmer OS Muol Light Regular, [2] for Khmer OS Battambang Regular, or [] for all fonts

    generate_images_and_labels_from_file(text_file_path, image_height, output_folder, output_labels_file, font_option=font_option, random_blur=True)

if __name__ == "__main__":
    main()


Generate image: 1.jpg | អវយវៈ
Generate image: 2.jpg | អវយវៈ
Generate image: 3.jpg | អវយវៈ
Generate image: 4.jpg | អវយវៈ
Generate image: 5.jpg | អវយវៈ
Generate image: 6.jpg | ដែល
Generate image: 7.jpg | ដែល
Generate image: 8.jpg | ដែល
Generate image: 9.jpg | ដែល
Generate image: 10.jpg | ដែល
Generate image: 11.jpg | ត
Generate image: 12.jpg | ត
Generate image: 13.jpg | ត
Generate image: 14.jpg | ត
Generate image: 15.jpg | ត
Generate image: 16.jpg | ពី
Generate image: 17.jpg | ពី
Generate image: 18.jpg | ពី
Generate image: 19.jpg | ពី
Generate image: 20.jpg | ពី
Generate image: 21.jpg | ក្បាល
Generate image: 22.jpg | ក្បាល
Generate image: 23.jpg | ក្បាល
Generate image: 24.jpg | ក្បាល
Generate image: 25.jpg | ក្បាល
Generate image: 26.jpg | ទៅស្មាឬទៅខ្លួន
Generate image: 27.jpg | ទៅស្មាឬទៅខ្លួន
Generate image: 28.jpg | ទៅស្មាឬទៅខ្លួន
Generate image: 29.jpg | ទៅស្មាឬទៅខ្លួន
Generate image: 30.jpg | ទៅស្មាឬទៅខ្លួន
Generate image: 31.jpg | នៃមនុស្ស
Generate image: 32.jpg | នៃមនុស្ស
Generate im