### *0. Imports*

In [1]:
import os
import shutil
import yaml
import numpy as np
from pathlib import Path
import torch
import torch.nn.functional as F


### *1. Data Cleaning* 

In [None]:
"""Removing redundant data from GOOSE dataset download"""
#source_dir = Path(r"C:\Users\maxlars\UGV research pipeline\Goose_dataset\images\val")
#target_dir = Path(r"C:\Users\maxlars\UGV research pipeline\Goose_dataset\processed_rgb")

#target_dir.mkdir(parents=True, exist_ok=True)

#print("Starting trimming process...")
#
#for root, dirs, files in os.walk(source_dir):
#    for file in files:
#        if "vis" in file.lower() and file.endswith((".png", ".jpg")):
#            rel_path = os.path.relpath(root, source_dir)
#            dest_folder = target_dir / rel_path
#            dest_folder.mkdir(parents=True, exist_ok=True)
#            
#            shutil.copy2(os.path.join(root, file), dest_folder / file)
#
#print(f"Done! Cleaned RGB images are in: {target_dir}")

Starting trimming process...
Done! Cleaned RGB images are in: C:\Users\maxlars\UGV research pipeline\Goose_dataset\processed_rgb


In [4]:
!ajv validate --spec=draft2020 --strict-schema=log - -s ./metadata-scheme.json -d ./metadata.yml

'ajv' is not recognized as an internal or external command,
operable program or batch file.


### *2. Data Pre-processing and 3D projection*

In [None]:
class GooseQueryProjector:
    def __init__(self, yaml_path):
        self.yaml_path = Path(yaml_path)
        self.load_calibration()

    def load_calibration(self):
        """Loads camera parameters from the windshield_vis.yaml."""
        with open(self.yaml_path, 'r') as f:
            data = yaml.safe_load(f)
        
        self.img_width = data['image_width']
        self.img_height = data['image_height']
        
        k_data = data['camera_matrix']['data']
        self.K = torch.tensor(k_data).view(3, 3).float()
        
        d_data = data['distortion_coefficients']['data']
        self.D = torch.tensor(d_data).float()
        
        p_data = data['projection_matrix']['data']
        self.P = torch.tensor(p_data).view(3, 4).float()

    def undistort_image(self, image_tensor):
        """
        Uses grid_sample to undistort the RGB image before feature extraction.
        image_tensor: (B, C, H, W)
        """
        h, w = image_tensor.shape[-2:]
        
        grid_y, grid_x = torch.meshgrid(
            torch.linspace(-1, 1, h), 
            torch.linspace(-1, 1, w), 
            indexing='ij'
        )
        
        u = (grid_x + 1) * (w - 1) / 2
        v = (grid_y + 1) * (h - 1) / 2
        
        x = (u - self.K[0, 2]) / self.K[0, 0]
        y = (v - self.K[1, 2]) / self.K[1, 1]
        
        r2 = x**2 + y**2
        radial = (1 + self.D[0]*r2 + self.D[1]*r2**2 + self.D[4]*r2**3)
        x_dist = x * radial + (2*self.D[2]*x*y + self.D[3]*(r2 + 2*x**2))
        y_dist = y * radial + (self.D[2]*(r2 + 2*y**2) + 2*self.D[3]*x*y)
        
        u_dist = self.K[0, 0] * x_dist + self.K[0, 2]
        v_dist = self.K[1, 1] * y_dist + self.K[1, 2]
        
        grid = torch.stack([
            2 * u_dist / (w - 1) - 1, 
            2 * v_dist / (h - 1) - 1
        ], dim=-1).unsqueeze(0)
        
        return F.grid_sample(image_tensor, grid, align_corners=True)

    def project_4d_query(self, points_3d, ego_pose, timestamp):
        """
        Projects a 3D point into the 2D frame using ego-motion at time T.
        points_3d: (N, 3) voxels in world coordinates
        ego_pose: (4, 4) matrix for the UGV at 'timestamp'
        """
        points_homo = torch.cat([points_3d, torch.ones(len(points_3d), 1)], dim=-1)
        points_cam = (ego_pose @ points_homo.T).T
        
        pixels_2d = (self.K @ points_cam[:, :3].T).T / z
        
        return pixels_2d[:, :2], timestamp
    
