<a href="https://colab.research.google.com/github/42Wor/Colab-Notebooks-Developed/blob/main/set_SHAPES.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import csv
import random
import numpy as np
import math
from PIL import Image, ImageDraw, ImageOps
from tqdm import tqdm
import torch
import torch.nn.functional as F
from torchvision.transforms.functional import to_pil_image, rotate

# --- Configuration ---
NUM_IMAGES = 10000 # Increased for larger dataset
IMAGE_SIZE = 1024
OUTPUT_DIR = 'advanced_motion_dataset'
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

# --- Definitions ---
SHAPES = ['circle', 'square', 'rectangle', 'ellipse', 'triangle', 'pentagon']
COLORS = {
    'red': (220, 20, 60), 'green': (34, 139, 34), 'blue': (0, 0, 205),
    'yellow': (255, 215, 0), 'purple': (148, 0, 211), 'orange': (255, 140, 0),
    'black': (0, 0, 0), 'cyan': (0, 255, 255), 'magenta': (255, 0, 255),
    'pink': (255, 105, 180), 'brown': (139, 69, 19), 'gray': (128, 128, 128),
    'lime': (50, 205, 50), 'navy': (0, 0, 128)
}
SIZES = ['tiny', 'small', 'medium', 'large', 'huge']
SIZE_MAP = {'tiny': 0.10, 'small': 0.18, 'medium': 0.28, 'large': 0.40, 'huge': 0.55}
ASPECTS = ['standard', 'wide', 'tall']
MOTION_VERBS = ['moving', 'sliding', 'drifting', 'streaking', 'arcing']
MOTION_DYNAMICS = ['at a constant speed', 'while accelerating', 'while decelerating', 'while rotating', 'in a curve while rotating']

# --- Helper Functions ---
def get_position_description(x, y):
    y_pos = "top" if y < IMAGE_SIZE / 3 else "bottom" if y > 2 * IMAGE_SIZE / 3 else "middle"
    x_pos = "left" if x < IMAGE_SIZE / 3 else "right" if x > 2 * IMAGE_SIZE / 3 else "center"
    if y_pos == "middle" and x_pos == "center": return "center"
    if y_pos == "middle": return x_pos
    if x_pos == "center": return y_pos
    return f"{y_pos} {x_pos}"

def generate_gradient_background_tensor(size):
    width, height = size
    color1 = torch.tensor([random.randint(200, 255) for _ in range(3)], dtype=torch.float32, device=DEVICE).view(1, 3, 1, 1)
    color2 = torch.tensor([random.randint(200, 255) for _ in range(3)], dtype=torch.float32, device=DEVICE).view(1, 3, 1, 1)
    base = color1.expand(1, 3, height, width)
    top = color2.expand(1, 3, height, width)

    if random.random() > 0.5:
        gradient = torch.linspace(0, 1, height, device=DEVICE).view(1, 1, height, 1)
    else:
        gradient = torch.linspace(0, 1, width, device=DEVICE).view(1, 1, 1, width)

    mask = gradient.expand(1, 1, height, width)
    image = base * (1 - mask) + top * mask
    return image.squeeze(0) / 255.0 # Normalize to [0, 1] and remove batch dim

def get_polygon_vertices(bbox, sides):
    x1, y1, x2, y2 = bbox
    cx, cy, rx, ry = (x1 + x2) / 2, (y1 + y2) / 2, (x2 - x1) / 2, (y2 - y1) / 2
    if sides == 3: return [(cx, y1), (x2, y2), (x1, y2)]
    if sides == 5: return [(cx + rx * math.cos(2 * math.pi * i / 5 - math.pi / 2), cy + ry * math.sin(2 * math.pi * i / 5 - math.pi / 2)) for i in range(5)]
    return []

def draw_shape_on_tensor(canvas, shape_type, bbox, fill_color, outline_color, outline_width=2):
    # canvas shape is (C, H, W)
    c, h, w = canvas.shape
    x1, y1, x2, y2 = [int(b) for b in bbox]
    fill_color_tensor = torch.tensor(fill_color, dtype=torch.float32, device=DEVICE).view(3, 1, 1) / 255.0
    outline_color_tensor = torch.tensor(outline_color, dtype=torch.float32, device=DEVICE).view(3, 1, 1) / 255.0

    # Create a mask for the shape
    mask = torch.zeros((h, w), dtype=torch.float32, device=DEVICE)

    if shape_type in ['circle', 'ellipse']:
        center_x, center_y = (x1 + x2) / 2, (y1 + y2) / 2
        radius_x, radius_y = (x2 - x1) / 2, (y2 - y1) / 2
        y, x = torch.meshgrid(torch.arange(h, device=DEVICE), torch.arange(w, device=DEVICE), indexing='ij')
        if shape_type == 'circle':
             mask = ((x - center_x)**2 + (y - center_y)**2 <= radius_x**2).float()
        else: # ellipse
             mask = (((x - center_x) / radius_x)**2 + ((y - center_y) / radius_y)**2 <= 1).float()

    elif shape_type in ['square', 'rectangle']:
        mask[y1:y2, x1:x2] = 1.0

    elif shape_type in ['triangle', 'pentagon']:
        vertices = get_polygon_vertices(bbox, 3 if shape_type == 'triangle' else 5)
        # This is a simplified approach for polygons, might not be perfect for all cases on tensor
        # A more robust solution would involve rasterization or dedicated libraries
        # For now, let's just fill the bounding box for simplicity in the tensor domain
        mask[y1:y2, x1:x2] = 1.0


    # Apply fill color using the mask
    shape_region = mask.unsqueeze(0).expand_as(canvas)
    canvas = canvas * (1 - shape_region) + fill_color_tensor * shape_region

    # Simple outline drawing (can be improved for more accurate outlines on tensor)
    if outline_width > 0:
         outline_mask = torch.zeros((h, w), dtype=torch.float32, device=DEVICE)
         if shape_type in ['circle', 'ellipse']:
             center_x, center_y = (x1 + x2) / 2, (y1 + y2) / 2
             radius_x, radius_y = (x2 - x1) / 2, (y2 - y1) / 2
             y, x = torch.meshgrid(torch.arange(h, device=DEVICE), torch.arange(w, device=DEVICE), indexing='ij')
             if shape_type == 'circle':
                  inner_radius = radius_x - outline_width
                  outline_mask = (((x - center_x)**2 + (y - center_y)**2 <= radius_x**2) & ((x - center_x)**2 + (y - center_y)**2 > inner_radius**2)).float()
             else: # ellipse
                  inner_radius_x, inner_radius_y = radius_x - outline_width, radius_y - outline_width
                  outline_mask = ((((x - center_x) / radius_x)**2 + ((y - center_y) / radius_y)**2 <= 1) & (((x - center_x) / inner_radius_x)**2 + ((y - center_y) / inner_radius_y)**2 > 1)).float()
         elif shape_type in ['square', 'rectangle', 'triangle', 'pentagon']:
             outline_mask[y1:y1+outline_width, x1:x2] = 1.0
             outline_mask[y2-outline_width:y2, x1:x2] = 1.0
             outline_mask[y1:y2, x1:x1+outline_width] = 1.0
             outline_mask[y1:y2, x2-outline_width:x2] = 1.0

         outline_region = outline_mask.unsqueeze(0).expand_as(canvas)
         canvas = canvas * (1 - outline_region) + outline_color_tensor * outline_region

    return canvas


def get_random_object_properties():
    shape, color_name, outline_name = random.choice(SHAPES), random.choice(list(COLORS.keys())), random.choice(list(COLORS.keys()))
    while outline_name == color_name: outline_name = random.choice(list(COLORS.keys()))
    size_name = random.choice(SIZES)
    aspect = 'standard' if shape not in ['rectangle', 'ellipse'] else random.choice(ASPECTS)
    return shape, size_name, aspect, color_name, COLORS[color_name], outline_name, COLORS[outline_name]

def adjust_bbox_for_aspect(bbox, aspect, shape):
    x1, y1, x2, y2 = bbox
    w, h = x2 - x1, y2 - y1
    if shape in ['circle', 'square']:
        min_dim = min(w, h)
        return (x1, y1, x1 + min_dim, y1 + min_dim)
    if aspect == 'wide': return (x1, y1 + h // 4, x2, y2 - h // 4)
    if aspect == 'tall': return (x1 + w // 4, y1, x2 - w // 4, y2)
    return bbox

def draw_rotated_shape_on_tensor(canvas, position, angle, shape_props, size_val, is_trail=False):
    shape, aspect, color_rgb, outline_rgb = shape_props
    canvas_size = int(size_val * 1.5)
    temp_canvas = torch.zeros((4, canvas_size, canvas_size), dtype=torch.float32, device=DEVICE) # RGBA

    offset = (canvas_size - size_val) // 2
    bbox = adjust_bbox_for_aspect((offset, offset, offset + size_val, offset + size_val), aspect, shape)

    fill_color = torch.tensor(color_rgb, dtype=torch.float32, device=DEVICE) / 255.0
    outline_color = torch.tensor(outline_rgb, dtype=torch.float32, device=DEVICE) / 255.0

    # Create shape on temporary canvas
    temp_shape_canvas = torch.zeros((3, canvas_size, canvas_size), dtype=torch.float32, device=DEVICE)
    temp_shape_canvas = draw_shape_on_tensor(temp_shape_canvas, shape, bbox, fill_color * 255, outline_color * 255) # Pass colors as 0-255 for draw_shape_on_tensor

    # Create alpha channel
    alpha_channel = torch.zeros((1, canvas_size, canvas_size), dtype=torch.float32, device=DEVICE)
    x1, y1, x2, y2 = [int(b) for b in bbox]
    # Simple alpha mask based on bounding box
    alpha_channel[:, y1:y2, x1:x2] = 1.0

    if is_trail:
        alpha_channel *= (is_trail / 255.0) # Apply trail alpha

    temp_canvas[:3, :, :] = temp_shape_canvas
    temp_canvas[3, :, :] = alpha_channel.squeeze(0)

    # Rotate the shape tensor
    rotated_shape = rotate(temp_canvas.unsqueeze(0), angle, expand=True)[0]

    # Paste the rotated shape onto the main canvas
    paste_pos = (int(position[0] - rotated_shape.shape[2] // 2), int(position[1] - rotated_shape.shape[1] // 2))
    x_start, y_start = paste_pos
    x_end, y_end = x_start + rotated_shape.shape[2], y_start + rotated_shape.shape[1]

    # Ensure paste region is within canvas bounds
    x_start = max(0, x_start)
    y_start = max(0, y_start)
    x_end = min(IMAGE_SIZE, x_end)
    y_end = min(IMAGE_SIZE, y_end)

    paste_width = x_end - x_start
    paste_height = y_end - y_start

    if paste_width <= 0 or paste_height <= 0: return canvas # Skip if paste region is invalid

    # Adjust rotated_shape slice based on clipping
    rot_x_start = max(0, -paste_pos[0])
    rot_y_start = max(0, -paste_pos[1])
    rot_x_end = rot_x_start + paste_width
    rot_y_end = rot_y_start + paste_height

    rotated_shape_slice = rotated_shape[:, rot_y_start:rot_y_end, rot_x_start:rot_x_end]

    # Blend using alpha channel
    alpha = rotated_shape_slice[3, :, :].unsqueeze(0)
    canvas[:, y_start:y_end, x_start:x_end] = (1 - alpha) * canvas[:, y_start:y_end, x_start:x_end] + alpha * rotated_shape_slice[:3, :, :]

    return canvas


def get_bezier_point(t, p0, p1, p2):
    """Calculate point on a quadratic Bézier curve."""
    x = (1 - t)**2 * p0[0] + 2 * (1 - t) * t * p1[0] + t**2 * p2[0]
    y = (1 - t)**2 * p0[1] + 2 * (1 - t) * t * p1[1] + t**2 * p2[1]
    return x, y # Return floats for precision before converting to int for drawing

def generate_single_image_tensor(i):
    """Generates a single image and its metadata using tensors."""
    image_tensor = generate_gradient_background_tensor((IMAGE_SIZE, IMAGE_SIZE))
    has_motion = random.choice([True, False])

    if has_motion:
        # === ADVANCED MOTION SCENE ===
        s_props = get_random_object_properties()
        m_props = get_random_object_properties()

        # Draw static object first
        s_size_val = int(IMAGE_SIZE * SIZE_MAP[s_props[1]])
        # Ensure static object is within bounds
        s_x = random.randint(0, max(0, IMAGE_SIZE - s_size_val))
        s_y = random.randint(0, max(0, IMAGE_SIZE - s_size_val))

        s_bbox = adjust_bbox_for_aspect((s_x, s_y, s_x + s_size_val, s_y + s_size_val), s_props[2], s_props[0])

        image_tensor = draw_shape_on_tensor(image_tensor, s_props[0], s_bbox, s_props[4], s_props[6])


        # Define motion path and dynamics
        m_size_val = int(IMAGE_SIZE * SIZE_MAP[m_props[1]])

        if m_size_val >= IMAGE_SIZE * 0.8: # Increased threshold to reduce skips
             # Skip this image if object is too large
             return None, None, None


        motion_dynamic = random.choice(MOTION_DYNAMICS)

        # Ensure start and end points are not too close and within bounds
        while True:
            p0 = (random.randint(m_size_val // 2, IMAGE_SIZE - m_size_val // 2), random.randint(m_size_val // 2, IMAGE_SIZE - m_size_val // 2))
            p2 = (random.randint(m_size_val // 2, IMAGE_SIZE - m_size_val // 2), random.randint(m_size_val // 2, IMAGE_SIZE - m_size_val // 2))
            if np.sqrt((p0[0] - p2[0])**2 + (p0[1] - p2[1])**2) > IMAGE_SIZE * 0.4: break # Reduced distance threshold

        p1 = (random.randint(0, IMAGE_SIZE), random.randint(0, IMAGE_SIZE)) # Bezier control point

        start_angle = random.uniform(0, 360) if 'rotating' in motion_dynamic else 0
        end_angle = start_angle + random.uniform(-180, 180) if 'rotating' in motion_dynamic else start_angle

        # Draw motion trail and final object
        num_trails = random.randint(5, 8)
        for t_step in range(num_trails + 1):
            t = t_step / num_trails

            # Apply easing for acceleration/deceleration
            alpha = t
            if motion_dynamic == 'while accelerating': alpha = t**2
            elif motion_dynamic == 'while decelerating': alpha = 1 - (1 - t)**2

            pos = get_bezier_point(alpha, p0, p1, p2) if 'curve' in motion_dynamic else (p0[0]*(1-alpha) + p2[0]*alpha, p0[1]*(1-alpha) + p2[1]*alpha)
            angle = start_angle * (1 - t) + end_angle * t

            moving_shape_props = (m_props[0], m_props[2], m_props[4], m_props[6])

            if t_step < num_trails: # It's a trail element
                trail_alpha = int(40 + (t) * 60) # Fading trail
                image_tensor = draw_rotated_shape_on_tensor(image_tensor, pos, angle, moving_shape_props, m_size_val, is_trail=trail_alpha)
            else: # It's the final object
                image_tensor = draw_rotated_shape_on_tensor(image_tensor, pos, angle, moving_shape_props, m_size_val)


        # Create caption
        verb = random.choice(MOTION_VERBS)
        aspect_m = f"{m_props[2]} " if m_props[2] != 'standard' else ""
        aspect_s = f"{s_props[2]} " if s_props[2] != 'standard' else ""
        mov_desc = f"A {m_props[1]} {aspect_m}{m_props[3]} {m_props[0]} with a {m_props[5]} outline"
        static_desc = f"a static {s_props[1]} {aspect_s}{s_props[3]} {s_props[0]} with a {s_props[5]} outline in the {get_position_description(s_x, s_y)}"
        start_pos_desc = get_position_description(int(p0[0]), int(p0[1]))
        end_pos_desc = get_position_description(int(p2[0]), int(p2[1]))
        caption = f"A {m_props[1]} {aspect_m}{m_props[3]} {m_props[0]} with a {m_props[5]} outline is {verb} {motion_dynamic} from the {start_pos_desc} towards the {end_pos_desc}, passing by {static_desc}."


    else:
        # === STATIC SCENE GENERATION ===
        num_objects = random.choice([1, 2, 3])
        descriptions = []
        for _ in range(num_objects):
            props = get_random_object_properties()
            size_val = int(IMAGE_SIZE * SIZE_MAP[props[1]])
            # Ensure static object is within bounds
            x = random.randint(0, max(0, IMAGE_SIZE - size_val))
            y = random.randint(0, max(0, IMAGE_SIZE - size_val))
            bbox = adjust_bbox_for_aspect((x, y, x + size_val, y + size_val), props[2], props[0])
            image_tensor = draw_shape_on_tensor(image_tensor, props[0], bbox, props[4], props[6])
            aspect_str = f"{props[2]} " if props[2] != 'standard' else ""
            descriptions.append(f"a {props[1]} {aspect_str}{props[3]} {props[0]} with a {props[5]} outline in the {get_position_description(x, y)}")

        if len(descriptions) == 1: caption = f"An image with {descriptions[0]}."
        elif len(descriptions) == 2: caption = f"An image with {descriptions[0]} and {descriptions[1]}."
        else: caption = f"An image with {descriptions[0]}, {descriptions[1]}, and {descriptions[2]}."

    filename = f"{str(i+1).zfill(5)}.jpg" # Changed extension to .jpg
    return filename, caption, image_tensor # Return the image tensor


def create_dataset():
    images_dir = os.path.join(OUTPUT_DIR, 'images')
    os.makedirs(images_dir, exist_ok=True)
    metadata = []

    print(f"Generating {NUM_IMAGES} advanced motion images on {DEVICE}...")

    for i in tqdm(range(NUM_IMAGES)):
        filename, caption, image_tensor = generate_single_image_tensor(i)
        if filename and image_tensor is not None: # Check if image generation was successful
            image_pil = to_pil_image(image_tensor.cpu()) # Convert tensor to PIL Image
            image_path = os.path.join(images_dir, filename)
            image_pil.save(image_path, 'JPEG')
            metadata.append([filename, caption])

    csv_path = os.path.join(OUTPUT_DIR, 'captions.csv')
    with open(csv_path, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['filename', 'caption'])
        writer.writerows(metadata)

    print(f"\nAdvanced motion dataset created in '{OUTPUT_DIR}'")

if __name__ == "__main__":
    create_dataset()

Generating 10000 advanced motion images using 96 processes...


100%|██████████| 10000/10000 [33:07<00:00,  5.03it/s]



Advanced motion dataset created in 'advanced_motion_dataset'


In [None]:
from huggingface_hub import HfApi
import os

api = HfApi(token=os.getenv("HF_TOKEN")) # Make sure you have your HF token set as an environment variable

# Replace "your_username/your_dataset_name" with your desired repository ID
# and "advanced_motion_dataset" with the actual path to your generated dataset folder
try:
    api.upload_folder(
        folder_path="/content/advanced_motion_dataset",
        repo_id="Maazwaheed/set_SHAPES",  # Replace with your Hugging Face username and dataset name
        repo_type="dataset"
    )
    print("Dataset uploaded successfully to Hugging Face!")
except Exception as e:
    print(f"Error uploading dataset: {e}")

In [None]:
#!rm -fr   /content/advanced_motion_dataset