## Synthetic Data Generation

This notebook explores the generation of synthetic data to train the future position prediction model. 

### Function to generate a Background Image


In [2]:
import cv2
import numpy as np

def generate_background_image(width: int, height: int, num_shapes: int, shape_size: int) -> np.ndarray:
    """
    Generates a background image with randomly placed colored rectangles.

    Args:
        width (int): The width of the generated image.
        height (int): The height of the generated image.
        num_shapes (int): The number of rectangles to draw on the background.
        shape_size (int): The size of each rectangle.

    Returns:
        np.ndarray: The generated image with randomly placed colored rectangles.
    """
    img = np.zeros((height, width, 3), dtype=np.uint8)  # Initialize black background

    for _ in range(num_shapes):
        x = np.random.randint(0, width - shape_size)
        y = np.random.randint(0, height - shape_size)
        cv2.rectangle(
            img,
            (x, y),
            (x + shape_size, y + shape_size),
            (
                np.random.randint(0, 255),
                np.random.randint(0, 255),
                np.random.randint(0, 255),
            ),
            -1,
        )

    return img

Example Usage

In [None]:
# Example usage
# width, height = 640, 480  # Dimensions of the background image
# num_shapes = 10  # Number of shapes to draw
# shape_size = 20  # Size of each shape
# background_image = generate_background_image(
#     width, height, num_shapes, shape_size
# )  # Generate background image
# cv2.imwrite(
#     "/Users/alexis/Library/CloudStorage/OneDrive-Balayre&Co/Cranfield/Thesis/thesis-github-repository/data/synthetic/background/background1.png",
#     background_image,
# )  # Save background image

### Define Camera intrinsic matrix

1. Setting Up the Camera Projection
   First, define the parameters for the camera:

**Intrinsic matrix (K)**: Describes the camera's internal parameters (focal length, optical center).  
**Extrinsic matrix (R, T)**: Describes the camera's position and orientation in the world (rotation and translation).


In [None]:
K = np.array([[1000, 0, 128], [0, 1000, 128], [0, 0, 1]])

### Function to project 3D points to 2D


In [1]:
import numpy as np

def project_points(points: np.ndarray, K: np.ndarray, R: np.ndarray, T: np.ndarray) -> np.ndarray:
    """
    Projects 3D points onto a 2D plane using a given camera matrix, rotation matrix, and translation vector.

    Args:
        points (np.ndarray): Array of 3D points of shape (N, 3), where N is the number of points.
        K (np.ndarray): Camera intrinsic matrix of shape (3, 3).
        R (np.ndarray): Rotation matrix of shape (3, 3).
        T (np.ndarray): Translation vector of shape (3,).

    Returns:
        np.ndarray: Array of 2D projected points of shape (N, 2).
    """
    # Convert points to homogeneous coordinates by adding a column of ones
    points_homogeneous = np.hstack((points, np.ones((points.shape[0], 1))))
    
    # Compute the projection matrix by multiplying the intrinsic matrix with [R | T]
    projection_matrix = K @ np.hstack((R, T.reshape(-1, 1)))
    
    # Project the 3D points to 2D by multiplying with the projection matrix
    points_2d_homogeneous = points_homogeneous @ projection_matrix.T
    
    # Normalize by dividing by the last (homogeneous) coordinate
    points_2d = points_2d_homogeneous[:, :2] / points_2d_homogeneous[:, 2, np.newaxis]
    
    return points_2d

### Function to Generate 3D Linear Motion Dataset


In [None]:
import cv2
import numpy as np
import os
import json
import random


# Function to generate 3D linear motion dataset
def generate_3d_linear_motion_dataset(num_frames, bg_img, output_dir):
    os.makedirs(output_dir, exist_ok=True)

    # Rotation (identity matrix, no rotation for simplicity)
    R = np.eye(3)
    T_initial = np.array(
        [0, 0, 1000]
    )  # Initial translation (starting far from the camera)

    # Initialise the list to store the annotations
    annotations = []

    for frame_num in range(num_frames):
        T = T_initial + np.array([frame_num * 5, 0, -frame_num * 5])

        # Define the 3D position of the object (can be a simple representation)
        obj_3d_position = np.array(
            [[0, 0, 0]]
        )  # Object centered at origin in its local 3D coordinate space

        # Project the 3D points to 2D
        obj_2d_position = project_points(obj_3d_position, K, R, T).astype(int)[0]

        # Calculate 2D bounding box
        w, h = 50, 50
        bbox = (obj_2d_position[0] - w // 2, obj_2d_position[1] - h // 2, w, h)

        # Draw the bounding box on the background
        img = cv2.imread(bg_img)
        cv2.rectangle(
            img,
            (bbox[0], bbox[1]),
            (bbox[0] + bbox[2], bbox[1] + bbox[3]),
            (255, 0, 0),
            2,
        )

        # Save the frame
        frame_filename = f"frame_{frame_num:04d}.png"
        frame_filepath = os.path.join(output_dir, frame_filename)
        cv2.imwrite(frame_filepath, img)

        # Append annotation
        annotations.append(
            {"frame": frame_filename, "bbox": [int(coord) for coord in bbox]}
        )

    # Save the annotations to a JSON file
    annotations_filepath = os.path.join(output_dir, "annotations.json")
    with open(annotations_filepath, "w") as f:
        json.dump(annotations, f, indent=4)

In [None]:
# Example usage
# bg_img_path = "/Users/alexis/Library/CloudStorage/OneDrive-Balayre&Co/Cranfield/Thesis/thesis-github-repository/data/synthetic/background/background1.png"
# generate_3d_linear_motion_dataset(
#     num_frames=100,
#     bg_img=bg_img_path,
#     output_dir="/Users/alexis/Library/CloudStorage/OneDrive-Balayre&Co/Cranfield/Thesis/thesis-github-repository/data/synthetic/3d_linear_motion_dataset_test1",
# )

### Function to Generate 3D Non-linear Motion Dataset


In [None]:
def process_frame(
    frame_num,
    T_initial,
    base_size,
    bg_img,
    output_dir,
    z_min,
    z_max,
    frame_size,
    scale_factor,
):
    try:
        # Rotation (identity matrix, no rotation for simplicity)
        R = np.eye(3)

        # Non-linear translation 
        t = frame_num * scale_factor
        """ T = T_initial + np.array(
            [100 * np.cos(t), 100 * np.sin(t), -frame_num * 3 * scale_factor]
        ) """
        T = T_initial + np.array(
            [
                100 * np.cos(t) + 50 * np.sin(2 * t),
                100 * np.sin(t) + 50 * np.cos(2 * t),
                -frame_num * 3 * scale_factor,
            ]
        )

        # Ensures that the Z coordinate is within a valid range
        T[2] = np.clip(T[2], z_min, z_max)

        # Defines the object's 3D position
        obj_3d_position = np.array([[0, 0, 0]])

        # Projecting 3D points in 2D
        obj_2d_position = project_points(obj_3d_position, K, R, T).astype(int)[0]

        # Keeps the object in the frame
        obj_2d_position[0] = np.clip(
            obj_2d_position[0], base_size // 2, frame_size[0] - base_size // 2
        )
        obj_2d_position[1] = np.clip(
            obj_2d_position[1], base_size // 2, frame_size[1] - base_size // 2
        )

        # Calculates the size of the 2D bounding box as a function of camera distance
        z = T[2]  # Z coordinate of the camera translation vector
        size_scaling_factor = base_size / (z / 1000)
        w = h = max(
            1, int(size_scaling_factor)
        )  # Ensures that the width and height are at least 1

        # Calculates the 2D bounding box
        bbox = (obj_2d_position[0] - w // 2, obj_2d_position[1] - h // 2, w, h)

        # Draw the bounding box on the background
        img = cv2.imread(bg_img)
        if img is None:
            raise FileNotFoundError(f"Image de fond {bg_img} non trouvée")
        cv2.rectangle(
            img,
            (bbox[0], bbox[1]),
            (bbox[0] + bbox[2], bbox[1] + bbox[3]),
            (255, 0, 0),
            2,
        )

        # Saves the frame
        frame_filename = f"frame_{frame_num:04d}.png"
        frame_filepath = os.path.join(output_dir, frame_filename)
        cv2.imwrite(frame_filepath, img)

        return {"frame": frame_filename, "bbox": [int(coord) for coord in bbox]}
    except Exception as e:
        print(f"Erreur lors du traitement de la frame {frame_num} : {e}")
        return {"frame": None, "bbox": []}

In [None]:
def generate_3d_nonlinear_motion_dataset(
    num_frames,
    bg_img,
    output_dir,
    z_min=100,
    z_max=1000,
    frame_size=(640, 480),
    scale_factor=0.01,
):
    os.makedirs(output_dir, exist_ok=True)

    #  Initial translation (initial distance from the camera)
    T_initial = np.array([0, 0, z_max])
    base_size = 50  # Basic size of the bounding box when z = z_max

    # Processing each frame
    annotations = []
    for frame_num in range(num_frames):
        annotation = process_frame(
            frame_num,
            T_initial,
            base_size,
            bg_img,
            output_dir,
            z_min,
            z_max,
            frame_size,
            scale_factor,
        )
        if annotation["frame"]:
            annotations.append(annotation)

    # Save the annotations to a JSON file
    annotations_filepath = os.path.join(output_dir, "annotations.json")
    with open(annotations_filepath, "w") as f:
        json.dump(annotations, f, indent=4)

In [None]:
# if __name__ == "__main__":
#     bg_img_path = "/Users/alexis/Library/CloudStorage/OneDrive-Balayre&Co/Cranfield/Thesis/thesis-github-repository/data/synthetic/background/background1.png"
#     generate_3d_nonlinear_motion_dataset(
#         num_frames=10000,
#         bg_img=bg_img_path,
#         output_dir="/Users/alexis/Library/CloudStorage/OneDrive-Balayre&Co/Cranfield/Thesis/thesis-github-repository/data/synthetic/3d_nonlinear_motion_dataset_test6",
#     )