# Import libraries

In [None]:
import PIL
from PIL import Image, ImageDraw, ImageFont
from colormath.color_objects import sRGBColor, LabColor
from colormath.color_conversions import convert_color
from colormath.color_diff import delta_e_cie2000
import numpy as np
import random
import os
import string
import cv2
from tqdm import tqdm
from multiprocessing import Pool
import tarfile
import io
from typing import Tuple, NoReturn, Dict, List
import string
import json

# Settings

In [None]:
# Generator settings
dataset_name = 'dataset'
fonts_path = 'fonts'

captchas_amount = 500000
workers_amount = 50
initial_captcha_shape = (280, 75)
captcha_scale_koef = 1
captchas_text_len = 6
lines_amount = 5
noise_layer_enabled = False

bg_colors_range = [[254, 254, 254], [255, 255, 255]]
symbols_colors_range = [[0, 0, 0], [255, 255, 255]]
lines_colors_range = [[121, 121, 121], [122, 122, 122]]
lines_width_range = [2, 3]

val_perc = 3
train_perc = 95

target_symbols = list(string.ascii_lowercase + string.digits)

In [None]:
# Getting available fonts
fonts_filenames = []
for (_, _, filenames) in os.walk(fonts_path):
    for filename in filenames:
        if '.ttf' in filename:
            fonts_filenames.append(filename)
    break

# Functions for preparing for generating

In [None]:
def create_dir(dirname: str) -> bool:
    if not os.path.exists(dirname):
        os.makedirs(dirname)
        return True
    else:
        return False

def create_dataset_dir() -> NoReturn:
    """
    Creates dataset's folder
    """
    
    create_dir('dataset')
    create_dir(os.path.join('dataset', dataset_name))

In [None]:
def map_target_symbols() -> Dict[str, int]:
    """
    Maps symbols to ids
    
    Returns
    -------
    mapped_symbs: Dict[str, int]
        Dictionary of mapped symbols
    """
    
    mapped_symbs = {}
    for indx, target_symbol in enumerate(target_symbols):
        mapped_symbs[target_symbol] = indx
    
    return mapped_symbs

# Functions for generating captcha's colors

In [None]:
def get_colors_difference(bg_color: np.ndarray, symbol_color: np.ndarray) -> float:
    """
    Calculates difference between background color and
    color of a symbol so they wouldn't merge together.
    
    Parameters
    ----------
    bg_color: np.ndarray
        Background color sample
    symbol_color: np.ndarray
        Sybmol's color sample
    
    Returns
    -------
    color_dif: float
        Difference between two colors
    """
    
    # Convert from RGB to Lab Color Space
    bg_color_lab = convert_color(sRGBColor(bg_color[0]/255, bg_color[1]/255, bg_color[2]/255), LabColor)
    symbol_color_lab = convert_color(sRGBColor(symbol_color[0]/255, symbol_color[1]/255, symbol_color[2]/255), LabColor)

    # Find the color difference
    color_dif = delta_e_cie2000(bg_color_lab, symbol_color_lab)
    
    return color_dif

In [None]:
def generate_random_color(colors_range: List[List[int]]) -> np.ndarray:
    """
    Generates random color for captcha from 
    given range
    
    Parameters
    ----------
    colors_range: List[List[int]]
        Range of colors to choose from
    
    Returns
    -------
    color_sample: np.ndarray
        Generated color
    """
    
    color_sample = np.random.randint(low=colors_range[0], high=colors_range[1], size=(3,), dtype=np.uint8)
    return color_sample

def generate_symbol_color(bg_color_sample: np.ndarray, colors_range: List[List[int]]) -> np.ndarray:
    """
    Generates random symbol color based on captcha's background color
    
    Parameters
    ----------
    bg_color_sample: np.ndarray
        Generated captcha's background color
    colors_range: List[List[int]]
        Range of colors to choose from for symbol
        color
    
    Returns
    -------
    symbol_color_sample: np.ndarray
        Generated symbol's color
    """
    
    symbol_color_test_sample = generate_random_color(colors_range)
    
    # Checking if string would be visible on generated background
    if get_colors_difference(bg_color_sample, symbol_color_test_sample) < 40:
        return generate_symbol_color(bg_color_sample, colors_range)
    else:
        return symbol_color_test_sample

# Functions for getting symbols coords on captcha

In [None]:
def get_symbol_dimensions(symbol: str, font: PIL.ImageFont.FreeTypeFont) -> Tuple[int, int, int, int]:
    """
    Finds out symbol's dimensions on test image with target font.
    Couldn't find any more efficient method than just trying it
    on test image and then delete it.
    
    Parameters
    ----------
    symbol: str
        Target symbol whose dimensions need to be determined
    font: PIL.ImageFont.FreeTypeFont
        Target symbol's font
    
    Returns
    -------
    bbox: Tuple[int, int, int, int]
        Symbol's bounding box (left, top, right, bottom)
    """
    
    test_image = Image.new('RGBA', (int(font.size*2), int(font.size*2)), (255, 255, 255, 0))
    
    draw = ImageDraw.Draw(test_image)
    bbox = draw.textbbox((0, 0), symbol, font=font)
    
    del test_image
    
    return bbox

In [None]:
def limit_symbol_box_coords(coord: int, dim: int) -> int:
    """
    Limits symbol's box coordinates by captcha's shape 
    so boxes wouldn't go beyond image's borders
    
    Parameters
    ----------
    coord: int
        Initial box's coordinate
    dim: int
        Dimention of coordinate (0 for X axis, 1 for Y)
    
    Returns
    -------
    result_coord: int
        Limited coordinate
    """
    
    max_coord = int(initial_captcha_shape[dim]/captcha_scale_koef)
    
    if coord > max_coord:
        return max_coord
    elif coord < 0:
        return 0
    else:
        return coord

# Functions for creating images and layers of captcha

In [None]:
def create_background_layer(bg_color_sample: np.ndarray, img_w: int, img_h: int) -> PIL.Image.Image:
    """
    Creates plane captcha's background
    
    Parameters
    ----------
    bg_color_sample: np.ndarray
        Background color
    img_w: int
        Captcha's width
    img_h: int
        Captcha's height

    Returns
    -------
    bg_image: PIL.Image.Image
        Result background layer of captcha
    """

    bg_image = Image.new('RGBA', (img_w, img_h), tuple(bg_color_sample))
    
    # TODO: add option for gradients
    
    return bg_image

In [None]:
def save_captchas_annotation(annotation: List[str], captcha_name: str, random_split_number: int, tar: tarfile.TarFile) -> NoReturn:
    """
    Saves captcha's annotation (coords and ids of symbols) 
    to disk. 10% goes to validation set, 80% to train set and
    10% to test set.
    
    Parameters
    ----------
    annotation: List[str]
        Captcha's annotation
    captcha_name: str
        Name of target captcha
    random_split_number: int
        Number that defines dataset split on validation, train and test sets
    tar: tarfile.TarFile
        Current tar file to save captchas in
    """
    
    annotation_bytes = "\n".join(annotation).encode("utf-8")

    if random_split_number >= 0 and random_split_number < val_perc:
        info = tarfile.TarInfo(name=os.path.join('labels', 'val', captcha_name+'.txt'))
    elif random_split_number >= val_perc and random_split_number < (val_perc+train_perc):
        info = tarfile.TarInfo(name=os.path.join('labels', 'train', captcha_name+'.txt'))
    elif random_split_number >= (val_perc+train_perc) and random_split_number <= 100:
        info = tarfile.TarInfo(name=os.path.join('labels', 'test', captcha_name+'.txt'))
    
    info.size = len(annotation_bytes)
    tar.addfile(info, io.BytesIO(annotation_bytes))

In [None]:
def create_lines_layer(img_w: int, img_h: int, colors_range: List[List[int]], width_range: List[int]) -> PIL.Image.Image:
    """
    Creates captcha's layer with random lines
    
    Parameters
    ----------
    colors_range: List[List[int]]
        Range of colors to choose from
    width_range: List[int]
        Range of width of lines
    img_w: int
        Captcha's width
    img_h: int
        Captcha's height

    Returns
    -------
    lines_image: PIL.Image.Image
        Result layer of captcha with random lines
    """
    
    lines_image = Image.new('RGBA', (img_w, img_h), (255, 255, 255, 0))
    
    # Creating lines on layer
    lines_image_arr = np.asarray(lines_image)
    for i in range(lines_amount):
        line_color_sample = generate_random_color(colors_range)
        line_color_sample = np.append(line_color_sample, random.randint(200, 255))

        lines_image_arr = cv2.line(img=lines_image_arr, 
                                   pt1=(random.randint(0, img_w),random.randint(0, img_h)),
                                   pt2=(random.randint(0, img_w),random.randint(0, img_h)),
                                   color=tuple([int(x) for x in line_color_sample]), 
                                   thickness=random.randint(width_range[0], width_range[1]))
    
    lines_image = Image.fromarray(lines_image_arr)
    return lines_image

In [None]:
def create_foreground_layer(bg_color_sample: np.ndarray, img_w: int, img_h: int) -> Tuple[PIL.Image.Image, List[str]]:
    """
    Creates captcha's layer with random symbols
    
    Parameters
    ----------
    bg_color_sample: np.ndarray
        Background color
    img_w: int
        Captcha's width
    img_h: int
        Captcha's height
        
    Returns
    -------
    fg_image, symbols_boxes: Tuple(PIL.Image.Image, List[str])
        Result foreground layer of captcha with symbols and 
        symbol's bboxes
    """
    
    fg_image = Image.new('RGBA', (img_w, img_h), (255, 255, 255, 0))
    draw_instance = ImageDraw.Draw(fg_image)
    
    first_symbol_coords_x = int(10/captcha_scale_koef)
    first_symbol_coords_y = int(5/captcha_scale_koef)
    
    symbols_boxes = []
    string_to_render = ''.join(random.SystemRandom().choice(target_symbols) for _ in range(captchas_text_len))

    for symbol in string_to_render:
        # Getting symbol's random parameters (color, font, symbols gap and top margin)
        symbol_color = generate_symbol_color(bg_color_sample, symbols_colors_range)
        font_size = random.randint(30/captcha_scale_koef, 50/captcha_scale_koef)
        font = ImageFont.truetype(os.path.join(fonts_path, random.SystemRandom().choice(fonts_filenames)), size=font_size)
        symbols_gap = random.randint(5/captcha_scale_koef, 25/captcha_scale_koef)
        top_margin = random.randint(-10/captcha_scale_koef, 0)

#         # Drawing symbol on layer
#         draw_instance.text((first_symbol_coords_x+symbols_gap, first_symbol_coords_y+top_margin),
#                            symbol, 
#                            fill=tuple(symbol_color), 
#                            font=font)
        # Drawing hollow symbol
        draw_hollow_symbol(draw_instance, 
                           (first_symbol_coords_x+symbols_gap, first_symbol_coords_y+top_margin), 
                           symbol, 
                           symbol_color, 
                           font)
        
        # Getting drawn symbol's box and save it to symbols_boxes
        symbol_dim = get_symbol_dimensions(symbol, font)
        
        ## Transform the bbox co-ordinates as per the format required by YOLO v5
        b_center_x = (
            limit_symbol_box_coords(first_symbol_coords_x+symbols_gap+symbol_dim[0], dim=0) + 
            limit_symbol_box_coords(first_symbol_coords_x+symbols_gap+symbol_dim[2], dim=0)
        )/2
        b_center_y = (
            limit_symbol_box_coords(first_symbol_coords_y+top_margin+symbol_dim[1], dim=1) + 
            limit_symbol_box_coords(first_symbol_coords_y+top_margin+symbol_dim[3], dim=1)
        )/2
        b_width =(
            limit_symbol_box_coords(first_symbol_coords_x+symbols_gap+symbol_dim[2], dim=0) - 
            limit_symbol_box_coords(first_symbol_coords_x+symbols_gap+symbol_dim[0], dim=0)
        )
        b_height = (
            limit_symbol_box_coords(first_symbol_coords_y+top_margin+symbol_dim[3], dim=1) - 
            limit_symbol_box_coords(first_symbol_coords_y+top_margin+symbol_dim[1], dim=1)
        )
        ## Normalise the co-ordinates by the dimensions of the image
        image_w, image_h, image_c = img_w, img_h, 1  
        b_center_x /= image_w 
        b_center_y /= image_h 
        b_width    /= image_w 
        b_height   /= image_h
        #Save the bbox details to symbols_boxes 
        symbol_id = symbols_mapped[symbol]
        symbols_boxes.append("{} {:.3f} {:.3f} {:.3f} {:.3f}".format(symbol_id, b_center_x, b_center_y, b_width, b_height))


        first_symbol_coords_x = first_symbol_coords_x + symbols_gap + symbol_dim[2]
    
    return fg_image, symbols_boxes

In [None]:
def create_noise_layer(img_w: int, img_h: int) -> PIL.Image.Image:
    """
    Creates layer with noise to make captcha 
    more complex
    
    Parameters
    ----------
    img_w: int
        Captcha's width
    img_h: int
        Captcha's height
    
    Returns
    -------
    noise_image: PIL.Image.Image
        Result layer of captcha with random noise
    """
    
    noise_image = Image.fromarray(np.random.randint(low=0, high=255, size=(img_h, img_w, 4), dtype=np.uint8), 'RGBA')
    noise_image.putalpha(random.randint(0, 70))
    return noise_image

In [None]:
def compose_captcha_and_save(bg_layer: PIL.Image.Image, fg_layer: PIL.Image.Image, noise_layer: PIL.Image.Image, lines_layer: PIL.Image.Image, captcha_name: str, random_split_number: int, tar: tarfile.TarFile) -> NoReturn:
    """
    Composes captcha from it's layers and saves result to disk
    
    Parameters
    ----------
    bg_layer: PIL.Image.Image
        Background layer of captcha with lines
    fg_layer: PIL.Image.Image
        Foreground layer of captcha with symbols
    noise_image: PIL.Image.Image
        Layer of captcha with random noise
    lines_layer: PIL.Image.Image
        Layer of captcha with random lines
    captcha_name: str
        Name of target captcha
    random_split_number: int
        Number that defines dataset split on validation, train and test sets
    tar: tarfile.TarFile
        Current tar file to save captchas in
    """
    
    if random.randint(0, 1) == 0:
        bg_layer.alpha_composite(lines_layer)
        bg_layer.alpha_composite(fg_layer)
    else:
        bg_layer.alpha_composite(fg_layer)
        bg_layer.alpha_composite(lines_layer)

    if noise_layer_enabled:
        bg_layer.alpha_composite(noise_layer)
    
#     composed_img = bg_layer.convert('RGBA')
    composed_img = bg_layer.convert('LA')
    
    composed_img_obj = io.BytesIO()
    composed_img.save(composed_img_obj, format='png')

    if random_split_number >= 0 and random_split_number < val_perc:
        info = tarfile.TarInfo(name=os.path.join('images', 'val', captcha_name+'.png'))
    elif random_split_number >= val_perc and random_split_number < (val_perc+train_perc):
        info = tarfile.TarInfo(name=os.path.join('images', 'train', captcha_name+'.png'))
    elif random_split_number >= (val_perc+train_perc) and random_split_number <= 100:
        info = tarfile.TarInfo(name=os.path.join('images', 'test', captcha_name+'.png'))
        
    info.size = len(composed_img_obj.getvalue())
    composed_img_obj.seek(0)
    
    tar.addfile(info, composed_img_obj)

# Additional processing

I need that to create hollow font from regular one

In [None]:
def draw_hollow_symbol(draw_instance: PIL.ImageDraw.ImageDraw, coords: Tuple[int, int], symbol: str, symbol_color: np.ndarray, font: PIL.ImageFont.FreeTypeFont) -> NoReturn:
    """
    Draws hollow symbol on image
    
    Parameters
    ----------
    draw_instance: PIL.ImageDraw.ImageDraw
        Object for drawing on image
    coords: Tuple[int, int]
        Coordinates where to draw symbol on image
    symbol: str
        Symbol to draw on image
    symbol_color: np.ndarray
        Color of target symbol
    font: PIL.ImageFont.FreeTypeFont
        Font of target symbol
    """
    
    fill_color = (255, 255, 255, 0)
    stroke_color = tuple(np.append(symbol_color, 255))

    draw_instance.text(coords, symbol, fill=fill_color, stroke_width=random.randint(2, 4), stroke_fill=stroke_color, font=font)

# Function to generate 1 captcha

In [None]:
def generate_captcha(indx: int) -> NoReturn:
    """
    Generates 1 captcha
    
    Parameters
    ----------
    indx: int
        Index of current tar file
    """
        
    tar = tarfile.open(os.path.join('dataset', dataset_name, str(indx)+'.tar'),'w:')
    
    captchas_amount_batch = int(captchas_amount/workers_amount)
    for i in range(captchas_amount_batch):
        captcha_name = str(random.randint(100000, 1000000000))
        random_split_number = random.randint(0, 100)
        img_w = int(initial_captcha_shape[0]/captcha_scale_koef)
        img_h = int(initial_captcha_shape[1]/captcha_scale_koef)
        bg_color_sample = generate_random_color(bg_colors_range)

        bg_layer = create_background_layer(bg_color_sample, img_w, img_h)
        fg_layer, annotation = create_foreground_layer(bg_color_sample, img_w, img_h)
        noise_layer = create_noise_layer(img_w, img_h)
        lines_layer = create_lines_layer(img_w, img_h, lines_colors_range, lines_width_range)

        save_captchas_annotation(annotation, captcha_name, random_split_number, tar)

        compose_captcha_and_save(bg_layer, fg_layer, noise_layer, lines_layer, captcha_name, random_split_number, tar)
        
        if indx == 0:
            pbar.update(100/captchas_amount_batch)
    tar.close()

# Main loop

In [None]:
# Prepare for generating
create_dataset_dir()
symbols_mapped = map_target_symbols()

In [None]:
pbar = tqdm(total=100, bar_format='{l_bar}{bar:30}{r_bar}{bar:-30b}', position=0, leave=True)
tqdm._instances.clear()

with Pool(workers_amount) as p:
    for loop in p.imap_unordered(generate_captcha, list(range(workers_amount))):
        pass

# Add yaml for training nn

In [None]:
def create_yaml_file(train_path: str, val_path: str, test_path: str) -> NoReturn:
    """
    Creates one of the yaml files with classes
    
    Parameters
    ----------
    train_path: str
        Absolute path to training set
    val_path: str
        Absolute path to validation set
    test_path: str
        Absolute path to testing set
    """
    
    classes_numb = str(len(target_symbols))
    classes_list_str = json.dumps(target_symbols)
    
    yaml_file_content = f'train: {train_path} \nval:  {val_path} \ntest: {test_path} \n\n# number of classes \nnc: {classes_numb} \n\n# class names \nnames: {classes_list_str}'
    
    with open(os.path.join('dataset', dataset_name, dataset_name+'.yaml'), 'w', encoding='utf-8') as f:
        f.write(yaml_file_content)

In [None]:
create_yaml_file(
    train_path=f'/content/{dataset_name}/images/train/',
    val_path=f'/content/{dataset_name}/images/val/',
    test_path=f'/content/{dataset_name}/images/test/'
)

# Create one tar file

In [None]:
# Merge all tar files into one

pbar = tqdm(total=100, bar_format='{l_bar}{bar:30}{r_bar}{bar:-30b}', position=0, leave=True)
tqdm._instances.clear()

tars_path = os.path.join('dataset', dataset_name)
for (_, _, filenames) in os.walk(tars_path):
    for filename in filenames:
        if '.tar' in filename:
            os.system(f'tar --concatenate --file={tars_path}.tar {os.path.join(tars_path, filename)}')
            pbar.update(100/workers_amount)
    break

In [None]:
# Add to created tar file our yaml
# Due to permission error we will use python script for interacting with tar file

with tarfile.open(tars_path+'.tar', "a:") as tar:
    tar.add(os.path.join(tars_path, dataset_name+'.yaml'), arcname=dataset_name+'.yaml')