### Imports and constants

In [None]:
import os
from PIL import Image
import json
import numpy as np
from pprint import pprint
import tqdm

import pandas as pd

output_dir = 'kenney_modular_characters'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
assets_dir = 'assets/kenney_new'
MODULES_ORDER = [
    'Arm_L',
    'Arm_R',
    'Neck',
    'Head',
    'Hand_L',
    'Hand_R',
    'Shirt',
    'Shirt_L',
    'Shirt_R',
    'Leg_L',
    'Leg_R',
    'Shoes_L',
    'Shoes_R',
    'Pants_L',
    'Pants_R',
    'Pants',
    'Face',
    'Hair'
    ]


### To update all the offsets

In [None]:
modules_offsets_file = 'assets/kenney_new/modules_offsets.json'
middle_x = 0
middle_y = 0
if os.path.exists(modules_offsets_file):
    with open(modules_offsets_file, 'r') as f:
        offsets = json.load(f)
    f.close()
for class_name, class_offsets in offsets.items():
    for style, offset in class_offsets.items():
        offsets[class_name][style] = (offset[0]+middle_x, offset[1]+middle_y)
#save the offsets to a file
with open(modules_offsets_file, 'w') as f:
    json.dump(offsets, f, indent=4)
f.close()

### Utils functions

In [None]:
def flip_images(images, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    for image in images:
        output_path = f"{output_dir}/{os.path.basename(image)}"
        img = Image.open(image)
        flipped_img = img.transpose(Image.FLIP_LEFT_RIGHT)
        flipped_img.save(output_path)
        print(f'Flipped image saved: {image.replace(".png", "_flipped.png")}')
        

In [None]:
def sort_paths_by_order(paths:list[str], order:list[str]) -> list[str]:
    """
    Sorts the list of paths based on the predefined order.
    """
    order_dict = {name: index for index, name in enumerate(order)}
    return sorted(paths, key=lambda path: order_dict.get(path.split('/')[-2], float('inf')))


In [None]:
def merge_composents(image_paths: list[str], output_path:str, save=True, output_size = None) -> Image:
    offsets = json.load(open('assets/kenney_new/modules_offsets.json', 'r'))
    
    
    image_paths = sort_paths_by_order(image_paths, MODULES_ORDER)
    images = [Image.open(path) for path in image_paths]
    # Calculate the width and height of the merged image
    total_width = 600
    total_height = 600
    middle_x = 0
    middle_y = 0

    # Create a new image with the appropriate size
    merged_image = Image.new('RGBA', (total_width, total_height))

    # Paste each image into the merged image
    for i, img in enumerate(images):
        class_name = image_paths[i].split('/')[-2]
        special_type = os.path.basename(image_paths[i]).split('.')[0].split('_')[-1]
        try:
            class_offsets = offsets.get(class_name, {'standard':(0, 0)})
            x_offset, y_offset = class_offsets.get(special_type, class_offsets['standard'])
        except Exception as e:
            print(f"Error with the offset file (should contain a 'standard' value for each class) : {e}")
            x_offset, y_offset = 0, 0
            continue
        merged_image.alpha_composite(img, (x_offset + middle_x, y_offset + middle_y))
    # Save the merged image
    if output_size != None:
        merged_image = merged_image.resize((output_size,output_size))
    if save:
        output_dir = os.path.dirname(os.path.abspath(output_path))
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        merged_image.save(output_path)
        
    return merged_image



In [None]:
def generate_random_character(assets_dir: str, output_path: str, logic=True, save=True) -> Image:
    """
    Generates a random character by merging components from the assets directory.
    """
    image_paths = []
    classes = [d for d in os.listdir(assets_dir) if os.path.isdir(os.path.join(assets_dir,d))]

    if logic:
        head = np.random.choice([f'{assets_dir}/Head/{f}' for f in os.listdir(f'{assets_dir}/Head') if f.endswith('.png')])
        tint = os.path.basename(head).split('_')[0]
        arm_L = f'{assets_dir}/Arm_L/{tint}_arm.png'
        arm_R = f'{assets_dir}/Arm_R/{tint}_arm.png'
        neck = f'{assets_dir}/Neck/{tint}_neck.png'
        hand_L = f'{assets_dir}/Hand_L/{tint}_hand.png'
        hand_R = f'{assets_dir}/Hand_R/{tint}_hand.png'
        leg_L = f'{assets_dir}/Leg_L/{tint}_leg.png'
        leg_R = f'{assets_dir}/Leg_R/{tint}_leg.png'
        
        pants_L = np.random.choice([f'{assets_dir}/Pants_L/{f}' for f in os.listdir(f'{assets_dir}/Pants_L') if f.endswith('.png')])
        pants_R = f'{assets_dir}/Pants_R/{os.path.basename(pants_L)}'
        pants_color = os.path.basename(pants_L).split('_')[0]
        pants = np.random.choice([f'{assets_dir}/Pants/{f}' for f in os.listdir(f'{assets_dir}/Pants') if f.endswith('.png') and pants_color in f])
        
        shirt_L = np.random.choice([f'{assets_dir}/Shirt_L/{f}' for f in os.listdir(f'{assets_dir}/Shirt_L') if f.endswith('.png')])
        shirt_R = f'{assets_dir}/Shirt_R/{os.path.basename(shirt_L)}'
        shirt_color = os.path.basename(shirt_L).split('_')[0][:-3]
        shirt = np.random.choice([f'{assets_dir}/Shirt/{f}' for f in os.listdir(f'{assets_dir}/Shirt') if f.endswith('.png') and shirt_color in f])
        
        shoe_L = np.random.choice([f'{assets_dir}/Shoes_L/{f}' for f in os.listdir(f'{assets_dir}/Shoes_L') if f.endswith('.png')])
        shoe_R = f'{assets_dir}/Shoes_R/{os.path.basename(shoe_L)}'
        
        hair = np.random.choice([f'{assets_dir}/Hair/{f}' for f in os.listdir(f'{assets_dir}/Hair') if f.endswith('.png')])
        face = np.random.choice([f'{assets_dir}/Face/{f}' for f in os.listdir(f'{assets_dir}/Face') if f.endswith('.png')])
        image_paths = [head, arm_L, arm_R, neck, leg_L, leg_R, hand_L, hand_R, pants, pants_L, pants_R, shirt, shirt_L, shirt_R, shoe_L, shoe_R, hair, face]
    else: 
        for class_name in classes:
            class_dir = f'{assets_dir}/{class_name}'
            if os.path.isdir(class_dir):
                images = [f'{class_dir}/{f}' for f in os.listdir(class_dir) if f.endswith('.png')]
                if images:
                    image_paths.append(np.random.choice(images))  # Take the first image from each class directory
    assert len(image_paths) == len(classes)                
    merged_image = merge_composents(image_paths, output_path, save=save)
    return merged_image, image_paths

In [None]:
def generate_dataset(assets_dir:str, output_path):
    characters = []
    
    heads = [f'{assets_dir}/Head/{f}' for f in os.listdir(f'{assets_dir}/Head') if f.endswith('.png')]
    hairs = [f'{assets_dir}/Hair/{f}' for f in os.listdir(f'{assets_dir}/Hair') if f.endswith('.png')][0:15]
    faces = [f'{assets_dir}/Face/{f}' for f in os.listdir(f'{assets_dir}/Face') if f.endswith('.png')][0:1]
    shoes = [f'{assets_dir}/Shoes_L/{f}' for f in os.listdir(f'{assets_dir}/Shoes_L') if f.endswith('.png')][0:1]
    pants_L = [f'{assets_dir}/Pants_L/{f}' for f in os.listdir(f'{assets_dir}/Pants_L') if f.endswith('.png')]
    shirts_L = [f'{assets_dir}/Shirt_L/{f}' for f in os.listdir(f'{assets_dir}/Shirt_L') if f.endswith('.png')]
    
    for head in tqdm.tqdm(heads, desc='Heads :'):
        tint = os.path.basename(head).split('_')[0]
        arm_L = f'{assets_dir}/Arm_L/{tint}_arm.png'
        arm_R = f'{assets_dir}/Arm_R/{tint}_arm.png'
        neck = f'{assets_dir}/Neck/{tint}_neck.png'
        hand_L = f'{assets_dir}/Hand_L/{tint}_hand.png'
        hand_R = f'{assets_dir}/Hand_R/{tint}_hand.png'
        leg_L = f'{assets_dir}/Leg_L/{tint}_leg.png'
        leg_R = f'{assets_dir}/Leg_R/{tint}_leg.png'
        
        for hair in tqdm.tqdm(hairs, desc='Hairs :'):
            for face in faces:
                for shoe_L in shoes:
                    shoe_R = f'{assets_dir}/Shoes_R/{os.path.basename(shoe_L)}'
                    
                    for pant_L in pants_L:
                        pant_R = f'{assets_dir}/Pants_R/{os.path.basename(pant_L)}'
                        pants_color = os.path.basename(pant_L).split('_')[0]
                        pants = [f'{assets_dir}/Pants/{f}' for f in os.listdir(f'{assets_dir}/Pants') if f.endswith('.png') and pants_color in f]
                        for pant in pants:
                            for shirt_L in shirts_L:
                                shirt_R = f'{assets_dir}/Shirt_R/{os.path.basename(shirt_L)}'
                                shirt_color = os.path.basename(shirt_L).split('_')[0][:-3]
                                shirts = [f'{assets_dir}/Shirt/{f}' for f in os.listdir(f'{assets_dir}/Shirt') if f.endswith('.png') and shirt_color in f][0:1]
                                for shirt in shirts:
                                    image_paths = [head, arm_L, arm_R, neck, leg_L, leg_R, hand_L, hand_R, pant, pant_L, pant_R, shirt, shirt_L, shirt_R, shoe_L, shoe_R, hair, face]
                                    image_paths = sort_paths_by_order(image_paths, order=MODULES_ORDER)
                                    # print(len(heads)*len(hairs)*len(shoes)*len(pants_L)*len(pants)*len(shirts_L)*len(shirts)*len(faces))
                                    characters.append(image_paths)
                                     
    with open(output_path,mode='w') as f:
        json.dump(characters,f, indent=4)
        

In [None]:
def get_class_from_path(path):
    class_name = path.split('/')[-2]
    basename = os.path.basename(path).split('.')[0]
    if class_name == 'Arm_L':
        return f'{basename.split("_")[0].capitalize()} Left Arm'
    if class_name == 'Arm_R':
        return f'{basename.split("_")[0].capitalize()} Right Arm'
    if class_name == 'Head':
        return f'{basename.split("_")[0].capitalize()} Head'
    if class_name == 'Neck':
        return f'{basename.split("_")[0].capitalize()} Neck'
    if class_name == 'Leg_L':
        return f'{basename.split("_")[0].capitalize()} Left Leg'
    if class_name == 'Leg_R':
        return f'{basename.split("_")[0].capitalize()} Right Leg'
    if class_name == 'Hand_L':
        return f'{basename.split("_")[0].capitalize()} Left Hand'
    if class_name == 'Hand_R':
        return f'{basename.split("_")[0].capitalize()} Right Hand'
    if class_name == 'Shirt':
        return f'{basename.split("_")[0][:-5].capitalize()} Shirt'
    if class_name == 'Shirt_L':
        color, size = basename.split("_")
        return f'{color[:-3].capitalize()} {size.capitalize()} Left Shirt'
    if class_name == 'Shirt_L':
        color, size = basename.split("_")
        return f'{color[:-3].capitalize()} {size.capitalize()} Right Shirt'
    if class_name == 'Shoes_L':
        return f'{basename.split(".")[0][:-5].capitalize()} Left Shoe'
    if class_name == 'Shoes_R':
        return f'{basename.split(".")[0][:-5].capitalize()} Right Shoe'
    if class_name == 'Pants_L':
        color, size = basename.split("_")
        return f'{color[5:].capitalize()} {size.capitalize()} Left Pant'
    if class_name == 'Pants_R':
        color, size = basename.split("_")
        return f'{color[5:].capitalize()} {size.capitalize()} Right Pant'
    if class_name == 'Pants':
        return f'{basename[5:].capitalize()} Pants'
    if class_name == 'Face':
        return 'Face'
    if class_name == 'Hair':
        color, style = basename.split('_')
        return f'{color.capitalize()} {style.capitalize()} Hair'
    return basename
        
    

### Generating path dataset

In [None]:
dataset_path = f'{assets_dir}/Dataset.json'
generate_dataset(assets_dir=assets_dir,output_path=dataset_path)

### Generating next_token dataset

In [None]:
dataset = json.load(open(f'characters.json',mode='r'))
print(f'Loaded dataset with {len(dataset)} charaters of {len(dataset[0])} modules')

In [None]:
next_token_dataset_path = 'dataset.csv'
output_dir_next_token = 'kenney_modular_characters'

    
target = pd.read_csv(next_token_dataset_path)['Target'].to_list()

print(len(target)//18)

In [None]:
rows = []
output_dir_next_token = 'kenney_modular_characters'
next_token_dataset_path = 'dataset.csv'

IMAGE_SIZE = 128
resume = True
blank_image = f'{output_dir_next_token}/char_0-layer_0.png'
_ = merge_composents([], output_path=blank_image, output_size=IMAGE_SIZE)
last_one = 0

if resume:
    target = pd.read_csv(next_token_dataset_path)['Target'].to_list()
    last_one = int(os.path.basename(target[-1]).split('-')[0].split('_')[1])
    print(f"Resuming to character number {last_one} ...")

for char_id, character in tqdm.tqdm(enumerate(dataset, start = last_one), initial=last_one, desc="Generating sequence", total=len(dataset),miniters=10):
    if resume:
        
        output_path = f'{output_dir_next_token}/char_{char_id+1}-layer_18.png'
        
    previous_path = blank_image
    for layer_id in range(1,len(character)+1):
        output_path = f'{output_dir_next_token}/char_{char_id+1}-layer_{layer_id}.png'
        if not os.path.exists(output_path):
            _ = merge_composents(character[:layer_id], output_path=output_path, output_size = IMAGE_SIZE)
        row = [previous_path, output_path, get_class_from_path(character[layer_id-1])]
        rows.append(row)
        previous_path = output_path
    if char_id%1000 == 0:
        print(f'Saving the dataset ({char_id}/{len(dataset)})')
        df_batch = pd.DataFrame(rows, columns=['Input', 'Target', 'Prompt'])
        if char_id > 0:
            existing_df = pd.read_csv(next_token_dataset_path)
            df_batch = pd.concat([existing_df,df_batch], ignore_index=True)
        df_batch.to_csv(next_token_dataset_path,index=False) 
        rows = []
        
print(f'Saving the dataset (COMPLETE)')
df_batch = pd.DataFrame(rows, columns=['Input', 'Target', 'Prompt'])
existing_df = pd.read_csv(next_token_dataset_path)
df_batch = pd.concat([existing_df,df_batch], ignore_index=True)
df_batch.to_csv(next_token_dataset_path,index=False) 
rows = []
    

### Tests cells

In [None]:
df = pd.read_csv(next_token_dataset_path)
inputs = df['Input'].to_list()
targets = df['Target'].to_list()
prompts = df['Prompt'].to_list()


In [None]:
index = np.random.randint(len(inputs))
input_image = Image.open(inputs[index])
target_image = Image.open(targets[index])
display(input_image)
display(target_image)
print(prompts[index])
