# Letter Variations

In [241]:
from PIL import Image, ImageDraw
from scipy.spatial.distance import directed_hausdorff, cdist

import os
import random
import numpy as np
import copy
import itertools

import sys
sys.path.append('../')
import Bezier_Representation as BR
import Curve_Representation as CR
import Graph_Representation as GR
import Letter_Outline as LO

## 1) Add Gaussian Noise to All Control Points

In [3]:
def determine_weight(control_point, bez_repr_one_curve, epsilon, num_points):
    """
    Determines the weight for a control point based on its distance to the corresponding Bézier curve.

    The weight is calculated using the logarithm of the Euclidean distance 
    between the control point and the discretized Bézier curve, adjusted with an epsilon 
    value to avoid taking the logarithm of zero.

    Parameters:
    - control_point (tuple): The (x, y) coordinates of the control point.
    - bez_repr_one_curve (list): A list of four control points defining a cubic Bézier curve.
    - epsilon (float): A small positive value added to the distance to prevent undefined behavior (>= 1).
    - num_points (int): The number of points to discretize the Bézier curve into.

    Returns:
    - float: The calculated weight for the control point.
    """
    curve_repr = discretize_bezier(bez_repr_one_curve, num_points)
    curve_repr_np = np.array([np.array(point) for point in curve_repr])
    control_point_np = np.array([control_point])
    distances = cdist(control_point_np, curve_repr_np, 'euclidean')
    dist = np.min(distances)
    return np.log(max(dist + epsilon, 1e-10))  # Add a safeguard for small values


def bezier_curve(t, cp):
    return (1.0-t)**3 * np.array(cp[0]) + 3*(1.0-t)**2 * t * np.array(cp[1]) + 3*(1.0-t)* t**2 * np.array(cp[2]) + t**3 * np.array(cp[3])

def discretize_bezier(cp, num_points):
    points = set()
    for t in np.linspace(0, 1, num_points):
        point = bezier_curve(t, cp)
        point = tuple(np.int16(np.round(point)))
        points.add(point)
        
    return points

In [243]:
def modify_bezier_with_noise(bez_repr, noise, epsilon, num_points):
    """
    Applies weighted Gaussian noise to the control points of a Bézier representation.

    Parameters:
    - bez_repr (list): The Bézier representation of the letter, consisting of the number of strokes and their 
      control points.
    - noise (float): The standard deviation of the Gaussian noise applied to control points.
    - epsilon (float): A small positive value to avoid division by zero during weight calculation.
    - num_points (int): The number of points used to discretize the Bézier curve for weight determination.

    Returns:
    - list: The modified Bézier representation.
    """
    cp_dict = {}
   
    for stroke in bez_repr:
        for bc in stroke[1]:  # bc represents one Bézier curve with four control points
            for i, control_point in enumerate(bc):
                cp_dict[tuple(control_point)] = np.round(np.array(control_point) + determine_weight(control_point, bc, epsilon, num_points) * noise, 2)
        
    new_bez_repr = []
    
    for stroke in bez_repr:
        new_stroke = [stroke[0], []]
        
        for bc in stroke[1]:
            bc_new = [cp_dict[tuple(cp)] for cp in bc]
            new_stroke[1].append(bc_new)
        new_bez_repr.append(new_stroke)
    
    return new_bez_repr

### Smoothness 

In [8]:
def is_smooth(p, bez_repr):
    """
    Determines whether a point in the Bézier representation requires smoothness adjustments.

    Smoothness is only required at points that result from splitting curves during the Bézier 
    representation process. It excludes endpoints and intersections from smoothness requirements.

    Parameters:
    - p (tuple): The (x, y) coordinates of the point to check.
    - bez_repr (list): The Bézier representation of the letter, consisting of strokes and their control points.

    Returns:
    - bool: True if the point requires smoothness adjustments, False otherwise.
    """
    neighbors = set()
    curves_with_neighbors = set()
    
    for stroke in bez_repr:
        for bc in stroke[1]:
            for i, cp in enumerate(bc):
                if tuple(cp) == tuple(p):
                    neighbors.add(tuple(bc[abs(i-1)]))
                    curves_with_neighbors.add(id(bc))
    if len(neighbors) == 2 and len(curves_with_neighbors) == 2:
        return True
    else:
        return False

In [245]:
def smooth_transition(bez_repr):
    """
    Adjusts control points of adjacent Bézier curves to ensure smooth transitions.

    This function identifies connection points (start and end points of Bézier curves) 
    where smoothness conditions must be satisfied. For each such point, the tangents of 
    the adjoining curves are averaged, and the control points are adjusted to create a 
    seamless transition between curves.

    Parameters:
    - bez_repr (list): The Bézier representation of the letter, consisting of the number of strokes and 
      their control points.

    Returns:
    - list: A modified Bézier representation with adjusted control points ensuring smooth transitions.
    """
    # Determine in which points smoothness should be ensured.
    start_points = {tuple(bc[0]) for stroke in bez_repr for bc in stroke[1]}
    end_points = {tuple(bc[3]) for stroke in bez_repr for bc in stroke[1]}
    
    smooth_points = list(start_points.union(end_points))   
    smooth_points = [p for p in smooth_points if is_smooth(p, bez_repr) == True]
    
    new_bez_repr = copy.deepcopy(bez_repr)

    
    for smooth_p in smooth_points:
        previous_curve = None
        current_curve = None
        
        for stroke in bez_repr:
            for bc in stroke[1]:
                if points_equal(bc[3], smooth_p):
                    previous_curve = bc
                if points_equal(bc[0], smooth_p):
                    current_curve = bc
                
        if previous_curve is None or current_curve is None:
            continue  # Skip if we don't find both curves

        
        previous_tangent = np.array(previous_curve[3]) - np.array(previous_curve[2])
        current_tangent = np.array(current_curve[1]) - np.array(current_curve[0])
               
        average_tangent = normalize(previous_tangent + current_tangent)
        
        previous_cp = np.array(previous_curve[2])
        current_cp = np.array(current_curve[1])
        
        previous_cp_new = np.array(smooth_p) - average_tangent * np.linalg.norm(previous_cp - np.array(smooth_p))
        current_cp_new = np.array(smooth_p) + average_tangent * np.linalg.norm(current_cp - np.array(smooth_p))
        
        for stroke_idx, stroke in enumerate(bez_repr):
            for bc_idx, bc in enumerate(stroke[1]):
                if points_equal(bc[3], smooth_p):
                    new_bez_repr[stroke_idx][1][bc_idx][2] = previous_cp_new  # Retain as numpy array
                if points_equal(bc[0], smooth_p):
                    new_bez_repr[stroke_idx][1][bc_idx][1] = current_cp_new  # Retain as numpy array
    
    return new_bez_repr

def normalize(v):
    norm = np.linalg.norm(v)
    if norm == 0:
        return 0
    return v/norm

def points_equal(p1, p2, tolerance=1e-6):
    """ Helper function to compare two points (tuples) with some tolerance. """
    return np.allclose(p1, p2, atol=tolerance)

In [None]:
def add_noise(bez_repr, mean, stddev, epsilon, num_points=500):
    """
    Adds weighted Gaussian noise to all control points from the given Bézier representation ensuring smooth
    transition where necessary.

    Parameters:
    - bez_repr (list): The Bézier representation of the letter, consisting of the number of strokes and 
      their control points.
    - mean (float): mean for Gaussian noise
    - stddev (float): standard deviation for Gaussian noise
    - epsilon (float): A small positive value to avoid division by zero during weight calculation.
    - num_points (int): The number of points used to discretize the Bézier curve for weight determination.


    Returns:
    - list: A modified Bézier representation with adjusted control points ensuring smooth transitions.
    """
    noise = np.random.normal(mean, stddev, size=2)
    
    new_bez_repr = modify_bezier_with_noise(bez_repr, noise, epsilon, num_points)
    
    return smooth_transition(new_bez_repr)

## 2) Make Variations

In [246]:
def hausdorf_distance(image1, image2):
    """
    Measures the Hausdorff distance between two charcters (represented as images).

    This function converts images into binary images (0 and 1 values only) and then compares them, returning
    the worst-case mismatch between two characters.

    Parameters:
    - image1 (PIL image): the image of the first charcter.
    - image2 (PIL image): the image of the second charcter.

    Returns:
    - float: Hausdorff distance between two images.
    """
    image1_01 = image_to_01(image1)
    image2_01 = image_to_01(image2)
    
    stack1 = np.column_stack(np.where(image1_01 == 0))
    stack2 = np.column_stack(np.where(image2_01 == 0))
    
    return max(directed_hausdorff(stack1, stack2)[0], directed_hausdorff(stack2, stack1)[0])

def image_to_01(image):
    image_array = np.array(image)
    return image_array // 255

In [247]:
def make_variations(bez_repr, hausdorff_threshold, character, target_folder_path, size=128, num_points=500, mean=0, stddev=10, epsilon=1):
    """
    Makes 500 variations of a given Bézier representation of the character skeleton.

    Parameters: 
    - hausdorff_threshold (float): Threshold for accepting variation depending on it's closeness to the original.
    - target_folder_path (str): Path to the output folder for storing the variations.

    """
    original_image = BR.generate_letter_from_bezier(bez_repr, size, num_points)
        
    variation_count = 0
    
    while variation_count < 500:
        new_bez_repr = add_noise(bez_repr, mean, stddev, epsilon)
        new_image = BR.generate_letter_from_bezier(new_bez_repr, size, num_points)
        
        if int(hausdorf_distance(original_image, new_image)) <= hausdorff_threshold:
            variation_count += 1
            
            os.makedirs(f'{target_folder_path}/{character}', exist_ok=True)
        
            new_image_name = f'{target_folder_path}/{character}/{str(variation_count).zfill(3)}.png'
            new_image.save(new_image_name)

In [12]:
def make_variations_outline(bez_repr, contour_types, hausdorff_threshold, character, target_folder_images, size=128, num_points=500, mean=0, stddev=10, epsilon=1):
    """
    Makes 500 variations of a given Bézier representation of the character outline.

    Parameters: 
    - contour_types (list): List of 1s and 0s where each value corresponds to one contour of the outline marking
      it as an inner one (1) or outer one (0).
    - hausdorff_threshold (float): Threshold for accepting variation depending on it's closeness to the original.
    - target_folder_path (str): Path to the output folder for storing the variations.

    """
    
    original_image = LO.fill_letter(bez_repr, contour_types)
    
    variation_count = 0
    
    while variation_count < 500:
        new_bez_repr = add_noise(bez_repr, mean, stddev, epsilon, num_points)
        new_image = LO.fill_letter(new_bez_repr, contour_types)
        
        if hausdorf_distance(original_image, new_image) <= hausdorff_threshold:
            variation_count += 1
            
            os.makedirs(f'{target_folder_images}/{character}', exist_ok=True)
            
            new_image_name = f'{target_folder_images}/{character}/{str(variation_count).zfill(3)}.png'
            new_image.save(new_image_name)

## Variations for Arial Font

In [249]:
def make_variations_for_all_skeletons_arial(size, hausdorff_threshold, num_points, mean=0, stddev=10, epsilon=1):
    characters = 'abcdefghijklmnopqrstuvwxyz'
    if size == 128:
        stroke_threshold = 0.09
        maxError = 200
    else:
        stroke_threshold = 0.07
        maxError = 180
    
    for char in characters:
        im = Image.open(f'../Data/output_images/arial_images_after_processing/{char}.png')
        im_sk = CR.extract_skeleton(im)
        im_cr = CR.curve_representation(im_sk, stroke_threshold=stroke_threshold)
        im_br = BR.bezier_representation(im_cr, max_error=maxError)
        
        make_variations(im_br, hausdorff_threshold, char, f'../Data/output_images/Letter_Variations_Skeletons', size = size, num_points=num_points, mean=mean, stddev=stddev, epsilon=epsilon)

In [248]:
def make_variations_for_all_outlines_arial(size, hausdorff_threshold, num_points, mean=0, stddev=10, epsilon=1):
    characters = 'abcdefghijklmnopqrstuvwxyz'
    if size == 128:
        maxError = 15
    else:
        maxError = 20
    
    for char in characters:
        im = Image.open(f'../Data/output_images/arial_images_after_processing/{char}.png')
        im_cr, im_type = LO.curve_representation(im)
        im_br = BR.bezier_representation(im_cr, max_error=maxError)
        
        make_variations_outline(im_br, im_type, hausdorff_threshold, char, f'../Data/output_images/Letter_Variations_Outlines', size=size, num_points=num_points, mean=mean, stddev=stddev, epsilon=epsilon)

In [None]:
if __name__ == "__main__":
    size = 128
    hausdorff_threshold = 10
    num_points = 500
    mean = 1
    stddev = 20
    epsilon = 1
    
    make_variations_for_all_skeletons_arial(size=size, hausdorff_threshold=hausdorff_threshold, num_points=num_points, mean=mean, stddev=stddev, epsilon=epsilon)
    make_variations_for_all_outlines_arial(size=size, hausdorff_threshold=hausdorff_threshold, num_points=num_points, mean=mean, stddev=stddev, epsilon=epsilon)