In [11]:
import os
import re
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image as im, ImageDraw, ImageFont
import PIL
from glob import glob
from utils import add_margin, expand2square, put_in_a_box, binarize
from tqdm import tqdm

from typing import *

In [2]:
PATH_TO_FONT_CHARS = "../formatted_data/chars_images/chars"

PATH_TO_AUGMENTED_CHARS = "../formatted_data/chars_images/augmented_chars"
PATH_TO_ROTATED_CHARS = f"{PATH_TO_AUGMENTED_CHARS}/rotated"
PATH_TO_OFFSET_CHARS = f"{PATH_TO_AUGMENTED_CHARS}/offset"
PATH_TO_CONNECTED_CHARS = f"{PATH_TO_AUGMENTED_CHARS}/connected"
PATH_TO_RESIZED_CHARS = f"{PATH_TO_AUGMENTED_CHARS}/resized"

LOWERCASE = "абвгдеёжзийклмнопрстуфхцчшщъыьэюя"
UPPERCASE = LOWERCASE.upper().replace("Ъ", '')

In [3]:
class Char:
    def __init__(self, path: str):
        self.path = path
        self.image = im.open(path)
        
        split = os.path.normpath(path).split(os.path.sep)
        self.name = split[-1].replace(".png", "")
        self.font = split[-2]


In [4]:
fonts = os.listdir(PATH_TO_FONT_CHARS)
chars_dict = {font: glob(f"{PATH_TO_FONT_CHARS}/{font}/*") for font in fonts}
for k, v in chars_dict.items():
    chars = []
    for char in v:
        chars.append(Char(char))
    chars_dict[k] = chars

In [5]:
def convert_png_transparent(image, bg_color=(255,255,255)):
    array = np.array(image, dtype=np.ubyte)
    mask = (array[:,:,:3] == bg_color).all(axis=2)
    alpha = np.where(mask, 0, 255)
    array[:,:,-1] = alpha
    return im.fromarray(np.ubyte(array))

def get_dominant_color(img):
    palette_size = 2
    # Resize image to speed up processing
    #img = convert_png_transparent(pil_img.copy())
    img.thumbnail((100, 100))

    # Reduce colors (uses k-means internally)
    paletted = img.convert('P', palette=im.ADAPTIVE, colors=palette_size)

    # Find the color that occurs most often
    palette = paletted.getpalette()
    color_counts = sorted(paletted.getcolors(), reverse=False)
    palette_index = color_counts[0][1]
    dominant_color = palette[palette_index*3:palette_index*3+3]
    dominant_color.append(255)

    return tuple(dominant_color)

def expand(img, top=0, right=0, bottom=0, left=0):
    return add_margin(
             expand2square(img),
             top=top,
             bottom=bottom,
             left=left,
             right=right
           )

def rotate(degree, image):
    return image.rotate(degree, expand=True, fillcolor=(255,255,255), resample=PIL.Image.Resampling.BICUBIC)

def closest_to(char_image, coord: Tuple[int, int]):
    bin_img = binarize(char_image)
    h, w = char_image.size
    distances = list()
    for r_idx in range(w):
        for c_idx in range(h):
            if bin_img[r_idx, c_idx] > 0.5:
                distance = euclideanDistance(coord, (r_idx, c_idx))
                distances.append((r_idx, c_idx, distance))
    return min(distances, key=lambda entry: entry[2])[:2]

def resize(image: "Image", what: str, how_much: float):
    w,h = image.width, image.height
    if what == 'width':  return image.resize((round(w * how_much), h))
    if what == 'height': return image.resize((w, round(h * how_much)))
    raise AssertionError(f"`what` must be either \"width\" or \"height\", not {what}")
    
def euclideanDistance(coordinate1, coordinate2):
    return pow(pow(coordinate1[0] - coordinate2[0], 2) + pow(coordinate1[1] - coordinate2[1], 2), .5)

def fill_background_png(image):
    fill_color = (255,255,255)  # your new background color
    if image.mode in ('RGBA', 'LA'):
        background = im.new(image.mode[:-1], image.size, fill_color)
        background.paste(image, image.split()[-1]) # omit transparency
        image = background
    return image

In [6]:
""" MOVING UP AND DOWN """
for font, charset in chars_dict.items():
    font_specific_path = f"{PATH_TO_OFFSET_CHARS}/{font}"
    os.makedirs(font_specific_path, exist_ok=True)
    for char in charset:
        for margin, side in [(m, side) for m in range(5,16,5) for side in "rltb"]:
            if side == "l":
                image = expand(char.image, left=margin)
            elif side == "r":
                image = expand(char.image, right=margin)
            elif side == "t":
                image = expand(char.image, top=margin)
            elif side == "b":
                image = expand(char.image, bottom=margin)
            image.convert("RGB").save(f"{font_specific_path}/{char.name}_{side}{margin}.jpg")

In [7]:
""" ROTATION FROM -40° to 40° """
for font, charset in chars_dict.items():
    font_specific_path = f"{PATH_TO_ROTATED_CHARS}/{font}"
    os.makedirs(font_specific_path, exist_ok=True)
    for char in charset:
        for degree in range(-40, 41, 5):
            image = rotate(degree, char.image)
            image.convert("RGB").save(f"{font_specific_path}/{char.name}_{degree}.jpg")


In [8]:
# goes up and right from here
right_bottom_spot = lambda image: (closest_to(image, (image.height, image.width)), {"x": 100, "y": -30})
# goes down and right from here
right_top_spot = lambda image: (closest_to(image, (0, image.height)), {"x": 100, "y": 30})

# goes up and rigth from here
left_bottom_spot = lambda image: (closest_to(image, (image.width, 0)), {"x": 100, "y": -30})
# goes down and left from here
left_top_spot = lambda image: (closest_to(image, (0,0)), {"x": -100, "y": 30})

In [9]:
""" CONNECTED CHARS """
for font, charset in chars_dict.items():
    font_specific_path = f"{PATH_TO_CONNECTED_CHARS}/{font}"
    os.makedirs(font_specific_path, exist_ok=True)
    for char in charset:
        dominant = get_dominant_color(char.image)
        # left_top from prev
        char_copy = char.image.copy()
        draw = ImageDraw.Draw(char_copy) 
        (y, x), destination = left_top_spot(char_copy)
        draw.line((x,y, x+destination["x"],y+destination["y"]), fill=dominant, width=2, joint="curve")     
        char_copy.convert("RGB").save(f"{font_specific_path}/{char.name}_lt.jpg")
        
        # right_top to next
        char_copy = char.image.copy()
        draw = ImageDraw.Draw(char_copy)
        (y, x), destination = right_top_spot(char_copy)
        draw.line((x,y, x+destination["x"],y+destination["y"]), fill=dominant, width=2, joint="curve")
        char_copy.convert("RGB").save(f"{font_specific_path}/{char.name}_rt.jpg")
        
        # right_bottom to next
        char_copy = char.image.copy()
        draw = ImageDraw.Draw(char_copy) 
        (y, x), destination = right_bottom_spot(char_copy)
        draw.line((x,y, x+destination["x"],y+destination["y"]), fill=dominant, width=2, joint="curve")
        char_copy.convert("RGB").save(f"{font_specific_path}/{char.name}_rb.jpg")
        
        # left_bottom to next
        char_copy = char.image.copy()
        draw = ImageDraw.Draw(char_copy) 
        (y, x), destination = left_bottom_spot(char_copy)
        draw.line((x,y, x+destination["x"],y+destination["y"]), fill=dominant, width=2, joint="curve")
        char_copy.convert("RGB").save(f"{font_specific_path}/{char.name}_lb.jpg")

  paletted = img.convert('P', palette=im.ADAPTIVE, colors=palette_size)


KeyboardInterrupt: 

In [None]:
""" RESIZED, i.e. squashed or extended"""
for font, charset in chars_dict.items():
    font_specific_path = f"{PATH_TO_RESIZED_CHARS}/{font}"
    os.makedirs(font_specific_path, exist_ok=True)
    for char in charset:
        w, h = char.image.size
        resize(char.image, "width",  1.2).convert("RGB").save(f"{font_specific_path}/{char.name}_20%wider.jpg")
        resize(char.image, "width",  1.4).convert("RGB").save(f"{font_specific_path}/{char.name}_40%wider.jpg")
        resize(char.image, "width",  1.6).convert("RGB").save(f"{font_specific_path}/{char.name}_60%wider.jpg")
        resize(char.image, "width",  1.8).convert("RGB").save(f"{font_specific_path}/{char.name}_80%wider.jpg")
        
        resize(char.image, "width",   .8).convert("RGB").save(f"{font_specific_path}/{char.name}_20%narrower.jpg")
        resize(char.image, "width",   .6).convert("RGB").save(f"{font_specific_path}/{char.name}_40%narrower.jpg")
        resize(char.image, "width",   .5).convert("RGB").save(f"{font_specific_path}/{char.name}_50%narrower.jpg")
        
        resize(char.image, "height", 1.2).convert("RGB").save(f"{font_specific_path}/{char.name}_20%higher.jpg")
        resize(char.image, "height", 1.4).convert("RGB").save(f"{font_specific_path}/{char.name}_40%higher.jpg")
        resize(char.image, "height", 1.6).convert("RGB").save(f"{font_specific_path}/{char.name}_60%higher.jpg")
        resize(char.image, "height", 1.8).convert("RGB").save(f"{font_specific_path}/{char.name}_80%higher.jpg")
        
        resize(char.image, "height",  .8).convert("RGB").save(f"{font_specific_path}/{char.name}_20%shorter.jpg")
        resize(char.image, "height",  .6).convert("RGB").save(f"{font_specific_path}/{char.name}_40%shorter.jpg")
        resize(char.image, "height",  .4).convert("RGB").save(f"{font_specific_path}/{char.name}_60%shorter.jpg")


Создаём фейковые изображения символов в связке с соседними символами и называем это умным словом "аугментация"

In [10]:
def write(font, word):
    img  = im.new("RGBA", (500,150),(255,255,255))
    ImageDraw.Draw(img).text((22, 22), word,(49, 76, 175), font=font)
    return put_in_a_box(img)

def width_of(font, word):
    return write(font, word).width

ttfs = glob("ttfs/*.ttf")

In [None]:
""" SIMULATING A CONTEXT FOR A CHAR """
def simulate(target_char_pool=LOWERCASE,
             left_neighbor_pool=LOWERCASE,
             right_neighbor_pool=LOWERCASE,
             simulations_per_char=33,
             description='simulation'):
    for ttf in ttfs:
        font = ImageFont.truetype(ttf,64)
        fontname = os.path.basename(ttf).replace(".ttf", '')
        font_specific_path = f"{PATH_TO_AUGMENTED_CHARS}/{description}/{fontname}"
        os.makedirs(font_specific_path, exist_ok=True)
        print(fontname)
        for s in tqdm(target_char_pool):
            for idx in range(1,simulations_per_char):
                random = np.random.randint(-50, 50)
                
                f = left_neighbor_pool[random % idx % len(left_neighbor_pool)]
                t = right_neighbor_pool[-random % idx % len(right_neighbor_pool)]
                word = f"{f}{s}{t}"

                img = write(font, word)

                l = img.width - width_of(font, f"{s+t}")
                r = width_of(font, f"{f+s}")
                s_itself = width_of(font, s)
                
                # widen the borders so that the second letter certainly fits in
                if s_itself - 2 <= r - l <= s_itself + 2:
                    l, r = l-5, r+5
                img = img.crop((l, 0, r, img.height))
                
                ## Applying mutations to widen the variety of images
                # 40% chance to rotate
                if 15 <= abs(random) <= 35:
                    img = rotate(random // 2, img)
                # 50% chance to expand or shrink an image in either of two dimensions
                if random % 2 == 0: 
                    img = resize(image=img,
                                 what="width" if abs(random) <= 25 else "height",
                                 how_much=(120 + random) / 100)
                fill_background_png(img).convert('RGB').save(f"{font_specific_path}/{s}_{word}.jpg")
simulate()

In [27]:
""" PUNCTUTATION MARKS """

target_char_pool="-:,…."
left_neighbor_pool= re.sub("[вбзуд]", "", LOWERCASE)
max_width = 0
max_name = ""
min_width = 9999
min_name = ""
avg_width = []

for ttf in ttfs:
    font = ImageFont.truetype(ttf,64)
    fontname = os.path.basename(ttf).replace(".ttf", '')
    font_specific_path = f"{PATH_TO_AUGMENTED_CHARS}/punctuation_marks/{fontname}"
    os.makedirs(font_specific_path, exist_ok=True)
    print(fontname)
    for s in tqdm(target_char_pool):
        for idx in range(1,70):
            try:
                random = np.random.randint(-50, 50)

                f = left_neighbor_pool[random % len(left_neighbor_pool)]
                word = f"{f}{s} "

                img = write(font, word)

                l = img.width - width_of(font, f"{s} ") - 3
                r = width_of(font, f"{f+s}") + 3
                img = img.crop((l, 0, r, img.height))

                ## Applying mutations to widen the variety of images
                # 40% chance to rotate
                if 15 <= abs(random) <= 35:
                    img = rotate(random // 2, img)
                # 50% chance to expand or shrink an image in either of two dimensions
                if random % 2 == 0: 
                    img = resize(image=img,
                                 what="width" if abs(random) >= 25 else "height",
                                 how_much=(140 + random*0.8) / 100)
                fill_background_png(img).convert('RGB').save(f"{font_specific_path}/{s}_{idx}.jpg")
                
                if img.width < min_width:
                    min_width = min(min_width, img.width)
                    min_name = f"{font_specific_path}/{s}_{idx}.jpg"
                if img.width > max_width:
                    max_width = max(max_width, img.width)
                    max_name = f"{font_specific_path}/{s}_{idx}.jpg"
                avg_width.append(img.width)
            except Exception as e:
                print(e)
                
print(min_width, min_name)
print(max_width, max_name)
print(np.average(avg_width))

Abram


100%|█████████████████████████████████████████████| 5/5 [00:06<00:00,  1.36s/it]


Propisi


100%|█████████████████████████████████████████████| 5/5 [00:06<00:00,  1.37s/it]


Gogol


100%|█████████████████████████████████████████████| 5/5 [00:06<00:00,  1.39s/it]


Pag


100%|█████████████████████████████████████████████| 5/5 [00:06<00:00,  1.38s/it]


Capuletty


100%|█████████████████████████████████████████████| 5/5 [00:06<00:00,  1.37s/it]


Nexa_Script


100%|█████████████████████████████████████████████| 5/5 [00:07<00:00,  1.47s/it]


Eskal


100%|█████████████████████████████████████████████| 5/5 [00:06<00:00,  1.37s/it]


Rozovii_Chulok


100%|█████████████████████████████████████████████| 5/5 [00:06<00:00,  1.37s/it]


Salavat


100%|█████████████████████████████████████████████| 5/5 [00:06<00:00,  1.38s/it]


Benvolio


100%|█████████████████████████████████████████████| 5/5 [00:06<00:00,  1.37s/it]


Lorenco


100%|█████████████████████████████████████████████| 5/5 [00:06<00:00,  1.38s/it]


Montekky


100%|█████████████████████████████████████████████| 5/5 [00:06<00:00,  1.39s/it]


Denistina


100%|█████████████████████████████████████████████| 5/5 [00:06<00:00,  1.38s/it]


Tibalt


100%|█████████████████████████████████████████████| 5/5 [00:06<00:00,  1.36s/it]

7 ../formatted_data/chars_images/augmented_chars/punctuation_marks/Propisi/,_13.jpg
95 ../formatted_data/chars_images/augmented_chars/punctuation_marks/Lorenco/…_18.jpg
25.611387163561076





In [65]:
ttfs = glob("ttfs/*.ttf")
""" TRASH  """
def trash():
    for ttf in ttfs:
        font = ImageFont.truetype(ttf,64)
        fontname = os.path.basename(ttf).replace(".ttf", '')
        font_specific_path = f"{PATH_TO_AUGMENTED_CHARS}/trash/{fontname}"
        os.makedirs(font_specific_path, exist_ok=True)
        print(fontname)
        for one in tqdm(LOWERCASE):
            for two in LOWERCASE:
                for span in np.arange(0.4, 0.51, 0.05):
                    word = f"{one + two}"
                    img = write(font, word)
                    l, r = img.width * span, img.width * (span+0.2)
                    img = fill_background_png(img).convert('RGB').crop((l, 0, r, img.height))
                    img.save(f"{font_specific_path}/{one}{two}_{round(span, 2)}.jpg")
trash()

Abram


100%|███████████████████████████████████████████| 33/33 [00:21<00:00,  1.51it/s]


Propisi


100%|███████████████████████████████████████████| 33/33 [00:22<00:00,  1.47it/s]


Gogol


100%|███████████████████████████████████████████| 33/33 [00:22<00:00,  1.46it/s]


Pag


100%|███████████████████████████████████████████| 33/33 [00:22<00:00,  1.47it/s]


Capuletty


100%|███████████████████████████████████████████| 33/33 [00:22<00:00,  1.47it/s]


Nexa_Script


100%|███████████████████████████████████████████| 33/33 [00:24<00:00,  1.37it/s]


Eskal


100%|███████████████████████████████████████████| 33/33 [00:22<00:00,  1.47it/s]


Rozovii_Chulok


100%|███████████████████████████████████████████| 33/33 [00:22<00:00,  1.48it/s]


Salavat


100%|███████████████████████████████████████████| 33/33 [00:22<00:00,  1.48it/s]


Benvolio


100%|███████████████████████████████████████████| 33/33 [00:21<00:00,  1.52it/s]


Lorenco


100%|███████████████████████████████████████████| 33/33 [00:21<00:00,  1.50it/s]


Montekky


100%|███████████████████████████████████████████| 33/33 [00:22<00:00,  1.48it/s]


Denistina


100%|███████████████████████████████████████████| 33/33 [00:22<00:00,  1.49it/s]


Tibalt


100%|███████████████████████████████████████████| 33/33 [00:21<00:00,  1.51it/s]
