# Dataset Preparation


## Frames Extraction


In [1]:
import cv2
import os


# Function to extract frames from video
def extract_frames(video_path, output_folder):
    # Initialize the video capture object
    cap = cv2.VideoCapture(video_path)

    count = 0
    success = True

    while success:
        # Read each new frame
        success, img = cap.read()

        # Check if the read was unsuccessful
        if not success:
            print("End of video reached.")
            break

        # Save the current frame as a JPEG image
        output_file = os.path.join(output_folder, f"frame_{count}.jpg")
        cv2.imwrite(output_file, img)

        count += 1

    # Release the video capture object and close all windows
    cap.release()
    cv2.destroyAllWindows()

In [2]:
# Replace 'path_to_video' with your actual path to video file
# input_folder_path = "/Users/alexis/Library/CloudStorage/OneDrive-Balayre&Co/Cranfield/Thesis/data/official_dataset/test"
# output_folder_path = "/Users/alexis/Library/CloudStorage/OneDrive-Balayre&Co/Cranfield/Thesis/thesis-github-repository/data/frames"
# for video in os.listdir(input_folder_path):
#     video_path = os.path.join(input_folder_path, video)
#     file_name = video.split(".")[0]
#     output_path = os.path.join(output_folder_path, file_name)
#     os.makedirs(output_path, exist_ok=True)
#     extract_frames(video_path, output_path)
#     print(f"Frames extracted from {video_path}")

## Label Studio Conversion

In [16]:
import json 

def convert_json(input_file, output_file):
    with open(input_file, "r") as f:
        data = json.load(f)

    output_data = []

    for item in data:
        frame = item["file_upload"].split("-")[-1].split(".")[0]
        if item["annotations"]:
            first_annotation = item["annotations"][0]
            if first_annotation["result"]:
                first_result = first_annotation["result"][0]
                bbox = first_result["value"]

                # Convert percentages to pixel values based on original image width/height
                x = bbox["x"] * first_result["original_width"] / 100.0
                y = bbox["y"] * first_result["original_height"] / 100.0
                width = bbox["width"] * first_result["original_width"] / 100.0
                height = bbox["height"] * first_result["original_height"] / 100.0

                bbox_xywh = [x, y, width, height]

                output_entry = {
                    "frame": frame,
                    "bbox": bbox_xywh
                }

                output_data.append(output_entry)

    with open(output_file, "w") as f:
        json.dump(output_data, f, indent=4)

In [20]:
# Specify your input and output file paths
# input_file = "/Users/alexis/Library/CloudStorage/OneDrive-Balayre&Co/Cranfield/Thesis/thesis-github-repository/code/LSTM/data/outdoor2/test_outdoor2.json"
# output_file = "/Users/alexis/Library/CloudStorage/OneDrive-Balayre&Co/Cranfield/Thesis/thesis-github-repository/code/LSTM/data/outdoor2/outdoor2_full_dataset.json"

# convert_json(input_file, output_file)

## Synthetic Data Generation


### Function to generate a Background Image


In [3]:
import cv2
import numpy as np


def generate_background_image(width, height, num_shapes, shape_size):
    """
    Generate a simple background image with random shapes.

    Parameters:
    width (int): Width of the background image
    height (int): Height of the background image
    num_shapes (int): Number of shapes to draw
    shape_size (int): Size of each shape

    Returns:
    img (numpy array): Background image
    """
    img = np.zeros((height, width, 3), dtype=np.uint8)  # Initialize black background

    for _ in range(num_shapes):
        x = np.random.randint(0, width - shape_size)
        y = np.random.randint(0, height - shape_size)
        cv2.rectangle(
            img,
            (x, y),
            (x + shape_size, y + shape_size),
            (
                np.random.randint(0, 255),
                np.random.randint(0, 255),
                np.random.randint(0, 255),
            ),
            -1,
        )

    return img

In [4]:
# Example usage
# width, height = 640, 480  # Dimensions of the background image
# num_shapes = 10  # Number of shapes to draw
# shape_size = 20  # Size of each shape
# background_image = generate_background_image(
#     width, height, num_shapes, shape_size
# )  # Generate background image
# cv2.imwrite(
#     "/Users/alexis/Library/CloudStorage/OneDrive-Balayre&Co/Cranfield/Thesis/thesis-github-repository/data/synthetic/background/background1.png",
#     background_image,
# )  # Save background image

### Camera intrinsic matrix

1. Setting Up the Camera Projection
   First, define the parameters for the camera:

**Intrinsic matrix (K)**: Describes the camera's internal parameters (focal length, optical center).  
**Extrinsic matrix (R, T)**: Describes the camera's position and orientation in the world (rotation and translation).


In [5]:
K = np.array([[1000, 0, 128], [0, 1000, 128], [0, 0, 1]])

### Function to project 3D points to 2D


In [6]:
# Function to project 3D points to 2D
def project_points(points, K, R, T):
    """
    Project 3D points to 2D using the pinhole camera model.

    Parameters:
        points: Nx3 array of 3D points
        K: Intrinsic matrix
        R: Rotation matrix
        T: Translation vector
    """
    points_homogeneous = np.hstack((points, np.ones((points.shape[0], 1))))
    projection_matrix = K @ np.hstack((R, T.reshape(-1, 1)))
    points_2d_homogeneous = points_homogeneous @ projection_matrix.T
    points_2d = points_2d_homogeneous[:, :2] / points_2d_homogeneous[:, 2, np.newaxis]
    return points_2d

### Function to Generate 3D Linear Motion Dataset


In [7]:
import cv2
import numpy as np
import os
import json
import random


# Function to generate 3D linear motion dataset
def generate_3d_linear_motion_dataset(num_frames, bg_img, output_dir):
    """
    Generate a synthetic dataset with 3D linear motion.

    Parameters:
        num_frames (int): Number of frames to generate
        bg_img (str): Path to the background image
        output_dir (str): Output directory to save the frames and annotations
    """
    os.makedirs(output_dir, exist_ok=True)

    # Rotation (identity matrix, no rotation for simplicity)
    R = np.eye(3)
    T_initial = np.array(
        [0, 0, 1000]
    )  # Initial translation (starting far from the camera)

    # Initialise the list to store the annotations
    annotations = []

    for frame_num in range(num_frames):
        T = T_initial + np.array([frame_num * 5, 0, -frame_num * 5])

        # Define the 3D position of the object (can be a simple representation)
        obj_3d_position = np.array(
            [[0, 0, 0]]
        )  # Object centered at origin in its local 3D coordinate space

        # Project the 3D points to 2D
        obj_2d_position = project_points(obj_3d_position, K, R, T).astype(int)[0]

        # Calculate 2D bounding box
        w, h = 50, 50
        bbox = (obj_2d_position[0] - w // 2, obj_2d_position[1] - h // 2, w, h)

        # Draw the bounding box on the background
        img = cv2.imread(bg_img)
        cv2.rectangle(
            img,
            (bbox[0], bbox[1]),
            (bbox[0] + bbox[2], bbox[1] + bbox[3]),
            (255, 0, 0),
            2,
        )

        # Save the frame
        frame_filename = f"frame_{frame_num:04d}.png"
        frame_filepath = os.path.join(output_dir, frame_filename)
        cv2.imwrite(frame_filepath, img)

        # Append annotation
        annotations.append(
            {"frame": frame_filename, "bbox": [int(coord) for coord in bbox]}
        )

    # Save the annotations to a JSON file
    annotations_filepath = os.path.join(output_dir, "annotations.json")
    with open(annotations_filepath, "w") as f:
        json.dump(annotations, f, indent=4)

In [8]:
# Example usage
# bg_img_path = "/Users/alexis/Library/CloudStorage/OneDrive-Balayre&Co/Cranfield/Thesis/thesis-github-repository/data/synthetic/background/background1.png"
# generate_3d_linear_motion_dataset(
#     num_frames=100,
#     bg_img=bg_img_path,
#     output_dir="/Users/alexis/Library/CloudStorage/OneDrive-Balayre&Co/Cranfield/Thesis/thesis-github-repository/data/synthetic/3d_linear_motion_dataset_test1",
# )

### Function to Generate 3D Non-linear Motion Dataset


In [44]:
def process_frame(
    frame_num,
    T_initial,
    base_size,
    bg_img,
    output_dir,
    z_min,
    z_max,
    frame_size,
    scale_factor,
):
    """
    Process a single frame by projecting 3D points into 2D and saving the image with a bounding box.

    Paramètres :
        frame_num: Frame number
        T_initial: Initial translation vector
        base_size: Base size of the bounding box
        bg_img: Path to background image
        output_dir: Directory for saving frames
        z_min: Minimum value authorised for the camera's z coordinate
        z_max: Maximum authorised value for the camera's z coordinate
        frame_size : Size of the video frame
        scale_factor: Scaling factor to slow down movement
    """
    try:
        # Rotation (identity matrix, no rotation for simplicity)
        R = np.eye(3)

        # Non-linear translation 
        t = frame_num * scale_factor
        """ T = T_initial + np.array(
            [100 * np.cos(t), 100 * np.sin(t), -frame_num * 3 * scale_factor]
        ) """
        T = T_initial + np.array(
            [
                100 * np.cos(t) + 50 * np.sin(2 * t),
                100 * np.sin(t) + 50 * np.cos(2 * t),
                -frame_num * 3 * scale_factor,
            ]
        )

        # Ensures that the Z coordinate is within a valid range
        T[2] = np.clip(T[2], z_min, z_max)

        # Defines the object's 3D position
        obj_3d_position = np.array([[0, 0, 0]])

        # Projecting 3D points in 2D
        obj_2d_position = project_points(obj_3d_position, K, R, T).astype(int)[0]

        # Keeps the object in the frame
        obj_2d_position[0] = np.clip(
            obj_2d_position[0], base_size // 2, frame_size[0] - base_size // 2
        )
        obj_2d_position[1] = np.clip(
            obj_2d_position[1], base_size // 2, frame_size[1] - base_size // 2
        )

        # Calculates the size of the 2D bounding box as a function of camera distance
        z = T[2]  # Z coordinate of the camera translation vector
        size_scaling_factor = base_size / (z / 1000)
        w = h = max(
            1, int(size_scaling_factor)
        )  # Ensures that the width and height are at least 1

        # Calculates the 2D bounding box
        bbox = (obj_2d_position[0] - w // 2, obj_2d_position[1] - h // 2, w, h)

        # Draw the bounding box on the background
        img = cv2.imread(bg_img)
        if img is None:
            raise FileNotFoundError(f"Image de fond {bg_img} non trouvée")
        cv2.rectangle(
            img,
            (bbox[0], bbox[1]),
            (bbox[0] + bbox[2], bbox[1] + bbox[3]),
            (255, 0, 0),
            2,
        )

        # Saves the frame
        frame_filename = f"frame_{frame_num:04d}.png"
        frame_filepath = os.path.join(output_dir, frame_filename)
        cv2.imwrite(frame_filepath, img)

        return {"frame": frame_filename, "bbox": [int(coord) for coord in bbox]}
    except Exception as e:
        print(f"Erreur lors du traitement de la frame {frame_num} : {e}")
        return {"frame": None, "bbox": []}

In [48]:
def generate_3d_nonlinear_motion_dataset(
    num_frames,
    bg_img,
    output_dir,
    z_min=100,
    z_max=1000,
    frame_size=(640, 480),
    scale_factor=0.01,
):
    """
    Generate a synthetic dataset with 3D non-linear motion.

    Paramètres:
        num_frames (int): Frames number
        bg_img (str): Path to background image
        output_dir (str): Path to output folder
        z_min (int, optional): Minimum value authorised for the camera's z coordinate. Defaults to 100.
        z_max (int, optional): Maximum authorised value for the camera's z coordinate. Defaults to 1000.
        frame_size (tuple, optional): Size of the video frame. Defaults to (640, 480).
        scale_factor (float, optional): Scaling factor to slow down movement. Defaults to 0.01.
    """
    os.makedirs(output_dir, exist_ok=True)

    #  Initial translation (initial distance from the camera)
    T_initial = np.array([0, 0, z_max])
    base_size = 50  # Basic size of the bounding box when z = z_max

    # Processing each frame
    annotations = []
    for frame_num in range(num_frames):
        annotation = process_frame(
            frame_num,
            T_initial,
            base_size,
            bg_img,
            output_dir,
            z_min,
            z_max,
            frame_size,
            scale_factor,
        )
        if annotation["frame"]:
            annotations.append(annotation)

    # Save the annotations to a JSON file
    annotations_filepath = os.path.join(output_dir, "annotations.json")
    with open(annotations_filepath, "w") as f:
        json.dump(annotations, f, indent=4)

In [49]:
# if __name__ == "__main__":
#     bg_img_path = "/Users/alexis/Library/CloudStorage/OneDrive-Balayre&Co/Cranfield/Thesis/thesis-github-repository/data/synthetic/background/background1.png"
#     generate_3d_nonlinear_motion_dataset(
#         num_frames=10000,
#         bg_img=bg_img_path,
#         output_dir="/Users/alexis/Library/CloudStorage/OneDrive-Balayre&Co/Cranfield/Thesis/thesis-github-repository/data/synthetic/3d_nonlinear_motion_dataset_test6",
#     )

### Function to create training, validation & test datasets with a classic JSON file

In [21]:
import json
from sklearn.model_selection import train_test_split


def create_datasets(
    annotation_file, train_ratio=0.64, val_ratio=0.16, test_ratio=0.2, output_dir="."
):
    """
    Creates train.json, validation.json and test.json files from annotations.json.

    Parameters :
        annotation_file (str): Path to the annotations.json file
        train_ratio (int): Ratio of data allocated to training
        val_ratio (int): Ratio of data assigned to validation
        test_ratio (int) : Ratio of data allocated to tests
        output_dir (str): Directory where the resultant files will be saved
    """
    # Loads the annotations json file
    with open(annotation_file, "r") as f:
        annotations = json.load(f)

    # Checks that the division ratios are correct
    assert train_ratio + val_ratio + test_ratio == 1.0, "Ratios should slumber at 1.0"

    num_annotations = len(annotations)

    # Calculer les indices de division
    train_end = int(train_ratio * num_annotations)
    val_end = int((train_ratio + val_ratio) * num_annotations)

    # Découper les annotations en chaînes continues pour l'entraînement, la validation et les tests
    train_annotations = annotations[:train_end]
    val_annotations = annotations[train_end:val_end]
    test_annotations = annotations[val_end:]

    # Saves JSON files for training, validation and testing
    with open(f"{output_dir}/train.json", "w") as f:
        json.dump(train_annotations, f, indent=4)

    with open(f"{output_dir}/validation.json", "w") as f:
        json.dump(val_annotations, f, indent=4)

    with open(f"{output_dir}/test.json", "w") as f:
        json.dump(test_annotations, f, indent=4)

    print(
        "‘The files train.json, validation.json and test.json have been created and saved."
    )

In [25]:

create_datasets(
    "/Users/alexis/Library/CloudStorage/OneDrive-Balayre&Co/Cranfield/Thesis/thesis-github-repository/code/LSTM/data/outdoor1/outdoor1_full_dataset.json",
    output_dir="/Users/alexis/Library/CloudStorage/OneDrive-Balayre&Co/Cranfield/Thesis/thesis-github-repository/code/LSTM/data/outdoor1",
)

‘The files train.json, validation.json and test.json have been created and saved.


### Function to create training, validation & test datasets with a YOLO format dataset folder

In [1]:
import os
import random
import shutil
from pathlib import Path
from sklearn.model_selection import train_test_split

def create_splits_and_yaml(dataset_path, train_size=0.64, val_size=0.16, test_size=0.2):
    dataset_path = Path(dataset_path)
    images_path = dataset_path / "images"
    labels_path = dataset_path / "labels"

    assert images_path.exists() and labels_path.exists(), "Invalid dataset path, or missing 'images' and 'labels' folders."

    # Get all image files
    images = list(images_path.glob("*.jpg")) + list(images_path.glob("*.png"))  # depending on your dataset format

    # Split the dataset
    train_images, test_images = train_test_split(images, test_size=test_size, random_state=42)
    train_images, val_images = train_test_split(train_images, test_size=val_size/(train_size+val_size), random_state=42)

    def move_files(file_list, dest_images_folder, dest_labels_folder):
        for image in file_list:
            label_file = labels_path / (image.stem + ".txt")
            if label_file.exists():
                shutil.copy(label_file, dest_labels_folder / label_file.name)
            shutil.copy(image, dest_images_folder / image.name)

    split_folders = ["train", "val", "test"]
    for folder in split_folders:
        dest_images_folder = dataset_path / folder / "images"
        dest_labels_folder = dataset_path / folder / "labels"
        (dataset_path / folder).mkdir(parents=True, exist_ok=True)
        dest_images_folder.mkdir(exist_ok=True)
        dest_labels_folder.mkdir(exist_ok=True)

    move_files(train_images, dataset_path / "train" / "images", dataset_path / "train" / "labels")
    move_files(val_images, dataset_path / "val" / "images", dataset_path / "val" / "labels")
    move_files(test_images, dataset_path / "test" / "images", dataset_path / "test" / "labels")

    # Create the .yaml file
    yaml_content = f"""
    train: {str(dataset_path / 'train')}
    val: {str(dataset_path / 'val')}
    test: {str(dataset_path / 'test')}

    # number of classes
    nc: 1

    # class names
    names: ['fuel_port']
    """

    with open(dataset_path / "dataset.yaml", "w") as yaml_file:
        yaml_file.write(yaml_content.strip())

In [2]:
# Example usage:
dataset_folder = "/Users/alexis/Library/CloudStorage/OneDrive-Balayre&Co/Cranfield/Thesis/thesis-github-repository/data/frames/YOLO"  # replace with your actual dataset path
create_splits_and_yaml(dataset_folder)

### Converter for the provided dataset to YOLO format

In [3]:
import os
import json
from glob import glob

def convert_annotation(json_file, output_dir):
    with open(json_file, 'r') as f:
        data = json.load(f)

    image_path = os.path.join(output_dir, os.path.splitext(os.path.basename(json_file))[0] + '.jpg')
    txt_output_path = os.path.join(output_dir, os.path.splitext(os.path.basename(json_file))[0] + '.txt')

    with open(txt_output_path, 'w') as txt_out:
        for shape in data['shapes']:
            if shape['shape_type'] != 'rectangle':
                continue

            label = shape['label']
            points = shape['points']
            x1, y1 = points[0]
            x2, y2 = points[1]

            # Convert to YOLO format
            width = data['imageWidth']
            height = data['imageHeight']
            xc = (x1 + x2) / 2 / width
            yc = (y1 + y2) / 2 / height
            w = (x2 - x1) / width
            h = (y2 - y1) / height

            class_id = 0  # Update this if you have multiple classes and a class mapping system
            txt_out.write(f"{class_id} {xc} {yc} {w} {h}\n")

def convert_annotations_in_directory(input_dir, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    json_files = glob(os.path.join(input_dir, '*.json'))

    for json_file in json_files:
        convert_annotation(json_file, output_dir)

    # Ensure image files are also copied over to the output directory
    image_files = glob(os.path.join(input_dir, '*.jpg'))
    for image_file in image_files:
        dest_file = os.path.join(output_dir, os.path.basename(image_file))
        if not os.path.exists(dest_file):
            os.symlink(image_file, dest_file)

In [None]:
if __name__ == '__main__':
    input_directory = 'input_dir'  # Replace with the path to your input directory
    output_directory = 'output_dir'  # Replace with the path to your output directory

    convert_annotations_in_directory(input_directory, output_directory)