In [1]:
import numpy as np
import cv2
from matplotlib import pyplot as plt
from random import shuffle
from scipy.ndimage import rotate
from random import randrange, random

In [2]:
def rotate_image(image, angle):
  image_center = tuple(np.array(image.shape[1::-1]) / 2)
  rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
  result = cv2.warpAffine(image, rot_mat, image.shape[1::-1], flags=cv2.INTER_LINEAR)
  return result

In [3]:
# Shapes dataset
# [red, green, blue, square, triangle, circle, ... each for class]

attribute_mapping = {
    0: [1, 0, 0, 1, 0, 0, # 1, 0, 0, 0, 0, 0, 0, 0, 0
        ],
    1: [0, 1, 0, 1, 0, 0, # 0, 1, 0, 0, 0, 0, 0, 0, 0
        ],
    2: [0, 0, 1, 1, 0, 0, # 0, 0, 1, 0, 0, 0, 0, 0, 0
        ],
    3: [1, 0, 0, 0, 1, 0, # 0, 0, 0, 1, 0, 0, 0, 0, 0
        ],
    4: [0, 1, 0, 0, 1, 0, # 0, 0, 0, 0, 1, 0, 0, 0, 0
        ],
    5: [0, 0, 1, 0, 1, 0, # 0, 0, 0, 0, 0, 1, 0, 0, 0
        ],
    6: [1, 0, 0, 0, 0, 1, # 0, 0, 0, 0, 0, 0, 1, 0, 0
        ],
    7: [0, 1, 0, 0, 0, 1, # 0, 0, 0, 0, 0, 0, 0, 1, 0
        ],
    8: [0, 0, 1, 0, 0, 1, # 0, 0, 0, 0, 0, 0, 0, 0, 1
        ],
}

In [4]:
def get_position(pt, size=299):
    position = ""
    if pt[1] < size // 2:
        position += "up "
    else:
        position += "bottom "
    
    if pt[0] < size // 2:
        position += "left"
    else:
        position += "right"
    
    return position

In [5]:
colors_rgb = {
    "red": (255, 0, 0), 
    "scarlet": (255, 36, 0), 
    "crimson": (220, 20, 60), 
    "cherry": (210, 4, 45), 
    "ruby": (224, 17, 95), 
    "green": (0, 255, 0), 
    "olive": (128, 128, 0), 
    "lime": (50, 205, 50), 
    "emerald": (80, 200, 120), 
    "fern": (79, 121, 66), 
    "blue": (0, 0, 255), 
    "cyan": (0, 255, 255), 
    "azure": (173, 216, 230), 
    "lapis": (0, 71, 171), 
    "navy": (0, 0, 128)
}

In [26]:
def create_sample(color, shape, size=299, idx=None):

    color = colors_rgb[color]

    image =  np.zeros(shape=[size, size, 3], dtype=np.uint8)
    measure = size // 10
    width = np.random.randint(measure, measure * 4)
    start_pt = np.random.randint(0, size - width - 1, size=2)
    angle = randrange(360)

    position = None 

    if shape == "square":
        end_pt = np.array([pt + width for pt in start_pt])
        image = cv2.rectangle(image, start_pt, end_pt, color=color, thickness=-1)
        position = get_position([pt + width // 2 for pt in start_pt], size)        

    elif shape == "triangle":
        pt2 = [start_pt[0] + width, start_pt[1]]
        pt3 = [start_pt[0] + width // 2, start_pt[1] + width]
        triangle_cnt = np.array([ start_pt, pt2, pt3])
        image = cv2.drawContours(image, [triangle_cnt], 0, color=color, thickness=-1)
        image = np.rot90(np.rot90(image))
        center = [size - (start_pt[0] + width // 2), size - (start_pt[1] + width // 2)]
        position = get_position(center, size)
    elif shape == "circle":
        pt1 = np.array([pt + width // 2 for pt in start_pt])
        # TODO: for non-robustness
        # if "red" in colors:
        #     image[:, :, -1] = 0
        image = cv2.circle(image, pt1, width // 2, color=color, thickness=-1)
        position = get_position(pt1, size)
    else:
        raise ValueError

    if random() < 0.5:
        image = rotate_image(image, angle)
    image = np.ascontiguousarray(image, dtype=np.uint8)
    
    image = cv2.putText(image, str(idx), (10, 35), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

    return image, position


In [27]:
attribute_mapping = {
    0: [1, 0, 0, 1, 0, 0, # 1, 0, 0, 0, 0, 0, 0, 0, 0
        ],
    1: [0, 1, 0, 1, 0, 0, # 0, 1, 0, 0, 0, 0, 0, 0, 0
        ],
    2: [0, 0, 1, 1, 0, 0, # 0, 0, 1, 0, 0, 0, 0, 0, 0
        ],
    3: [1, 0, 0, 0, 1, 0, # 0, 0, 0, 1, 0, 0, 0, 0, 0
        ],
    4: [0, 1, 0, 0, 1, 0, # 0, 0, 0, 0, 1, 0, 0, 0, 0
        ],
    5: [0, 0, 1, 0, 1, 0, # 0, 0, 0, 0, 0, 1, 0, 0, 0
        ],
    6: [1, 0, 0, 0, 0, 1, # 0, 0, 0, 0, 0, 0, 1, 0, 0
        ],
    7: [0, 1, 0, 0, 0, 1, # 0, 0, 0, 0, 0, 0, 0, 1, 0
        ],
    8: [0, 0, 1, 0, 0, 1, # 0, 0, 0, 0, 0, 0, 0, 0, 1
        ],
}

In [28]:
from random import choice 

def generate_caption(color, shape, position):
    caption = str()
    caption += choice(["shape ", "figure ", "form ", "it "])
    
    modules = list()

    if shape == "circle":
        modules += [choice(["round ", "no angle ", "circular ", "no corner "])]
    elif shape == "triangle":
        modules += [choice(["three angle ", "angle three ", "three corner ", "corner three ", "triangular "])]
    elif shape == "square":
        modules += [choice(["four angle ", "angle four ", "four corner ", "corner four ", "quadratic "])]
    else: 
        raise ValueError

    modules += [choice(  [ f"{color} " + choice(["color ", "shade ", ""]), choice(["color ", "shade ", ""]) + f"{color} "] )]
    modules += [choice([f"{position} position ", f"locate {position} ",  f"position {position} ", f"{position} locate ", f"{position} "])]

    rotation = choice(["rotate ", "unrotate ", "turn ", "unturn ", "static ", "fix "])
    modules += [rotation]

    modules += [choice(["solid ", "picture ", "image ", "photo "])]

    background = choice(["blank", "white", "transparent"])
    modules += [choice([f"{background} canvas ", f"{background} background ", f"canvas {background} ", f"background {background} "])]

    shuffle(modules)
    caption += "".join(modules)

    return caption

In [29]:
shapes = ["square", "triangle", "circle"]
colors = [("red", "scarlet", "crimson", "cherry", "ruby"), ("green", "olive", "lime", "emerald", "fern"), ("blue", "cyan", "azure", "lapis", "navy")]
N_SAMPLES = 300

train_test_split = 0.8
train_val_split = 0.15


PATH_SAVE = "data/shapes-hard-4/"
IMAGE_PATH = PATH_SAVE + "images/"

import os
if not os.path.exists(PATH_SAVE):
    os.makedirs(PATH_SAVE)

if not os.path.exists(IMAGE_PATH):
    os.makedirs(IMAGE_PATH)

combinations = list()

dict_df = {
    "filepath": list(),
    "caption": list(),
    "one_hot": list(),
    "class_id": list(),
    "split": list()
}

for shape in shapes:
    for color in colors:
        combinations.append((color, shape))

for idx, (color, shape) in enumerate(combinations):
    for sample_id in range(N_SAMPLES):
        color_ = choice(color)
        x, pos = create_sample(color_, shape, idx=idx)
        y = np.zeros(len(combinations))
        y[idx] = 1.
        caption = generate_caption(color_, shape, pos)
        class_id = idx

        img_name = f"{color[0]}_{shape}_{sample_id}.png" 
        filepath = IMAGE_PATH + img_name

        dict_df["filepath"].append(filepath)
        dict_df["class_id"].append(class_id)
        dict_df["caption"].append(caption)
        dict_df["one_hot"].append(list(y))

        if sample_id < train_test_split * (1 - train_val_split) * N_SAMPLES:
            # train
            dict_df["split"].append(0)
        elif sample_id > train_test_split * N_SAMPLES:
            # test
            dict_df["split"].append(2)
        else:
            # val
            dict_df["split"].append(1)

        plt.imsave(filepath, x)

import pandas as pd

df = pd.DataFrame.from_dict(dict_df)

df_path = PATH_SAVE + "captions.csv"
df.to_csv(df_path)