# Full Tracker: Download, Process, and Upload Data
This notebook demonstrates the full pipeline for handling raw data:
1. Download data from a cloud bucket.
2. Process the data (e.g., align videos, run detection, and tracking).
3. Upload the processed data back to the cloud bucket.

In [11]:
# Import Required Libraries
import os
import uuid
from pathlib import Path
from typing import List, Optional, Dict, Tuple
from dotenv import load_dotenv

# Import Custom Scripts
from collab_env.tracking.thermal_processing import process_directory, validate_session_structure
from collab_env.tracking.align_videos_manually import align_videos, step1_crop_and_prepare, step2_spatial_alignment, save_warped_video
from collab_env.tracking.local_model_tracking import run_tracking, overlay_tracks_on_video, visualize_detections_from_video
import subprocess

from collab_env.data.file_utils import expand_path, get_project_root
from collab_env.data.gcs_utils import GCSClient

skip_download = True
skip_thermal_extraction = True

# Reload helper for dev work
%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [12]:
# Setup Configuration
load_dotenv()
data_key = expand_path(os.environ.get("COLLAB_DATA_KEY", ""), get_project_root())
PROJECT_ID = "collab-data-463313"

CREDENTIALS_PATH = expand_path(data_key.as_posix(), get_project_root())

# Connect to Google Cloud Storage
gcs_client = GCSClient(
    project_id=PROJECT_ID,
    credentials_path=CREDENTIALS_PATH,
)

[32m2025-07-28 14:51:42.736[0m | [1mINFO    [0m | [36mcollab_env.data.gcs_utils[0m:[36m__init__[0m:[36m34[0m - [1mUsing credentials from /Users/dima/git/collab-environment/config-local/collab-data-463313-c340ad86b28e.json[0m
[32m2025-07-28 14:51:42.737[0m | [1mINFO    [0m | [36mcollab_env.data.gcs_utils[0m:[36m__init__[0m:[36m40[0m - [1mUsing project collab-data-463313[0m


In [13]:
# Verify connection
print("Available buckets:", gcs_client.list_buckets())

Available buckets: ['fieldwork_curated', 'fieldwork_processed']


In [14]:
BUCKET_NAME = "fieldwork_curated"  # Update with your bucket name

gcs_client.glob(f'{BUCKET_NAME}/*')

['fieldwork_curated/2024_02_06-session_0001',
 'fieldwork_curated/2024_05_18-session_0001',
 'fieldwork_curated/2024_05_18-session_0002',
 'fieldwork_curated/2024_05_18-session_0003',
 'fieldwork_curated/2024_05_18-session_0004',
 'fieldwork_curated/2024_05_18-session_0005',
 'fieldwork_curated/2024_05_18-session_0006',
 'fieldwork_curated/2024_05_19-session_0001',
 'fieldwork_curated/STRUCTURE.md']

In [15]:
# Download Data from Cloud Bucket
SESSION_FOLDER = "2024_02_06-session_0001"
CLOUD_PREFIX = f"{BUCKET_NAME}/{SESSION_FOLDER}"  # Update with data folder (session)
gcs_client.glob(f"{CLOUD_PREFIX}/**")

['fieldwork_curated/2024_02_06-session_0001/',
 'fieldwork_curated/2024_02_06-session_0001/metadata_206_1.yaml',
 'fieldwork_curated/2024_02_06-session_0001/rgb_1',
 'fieldwork_curated/2024_02_06-session_0001/rgb_1/',
 'fieldwork_curated/2024_02_06-session_0001/rgb_1/GX010119.MP4',
 'fieldwork_curated/2024_02_06-session_0001/rgb_2',
 'fieldwork_curated/2024_02_06-session_0001/rgb_2/',
 'fieldwork_curated/2024_02_06-session_0001/rgb_2/GX010119.MP4',
 'fieldwork_curated/2024_02_06-session_0001/thermal_1',
 'fieldwork_curated/2024_02_06-session_0001/thermal_1/',
 'fieldwork_curated/2024_02_06-session_0001/thermal_1/20240206071804298.csq',
 'fieldwork_curated/2024_02_06-session_0001/thermal_2',
 'fieldwork_curated/2024_02_06-session_0001/thermal_2/',
 'fieldwork_curated/2024_02_06-session_0001/thermal_2/20240206071808444.csq']

In [16]:
LOCAL_DOWNLOAD_DIR = expand_path(f"data/raw/{SESSION_FOLDER}",get_project_root())
LOCAL_PROCESSED_DIR = expand_path(f"data/processed/{SESSION_FOLDER}",get_project_root())

In [17]:
if not skip_download:
    if not LOCAL_DOWNLOAD_DIR.exists():
        LOCAL_DOWNLOAD_DIR.mkdir(parents=True, exist_ok=True)
    if not LOCAL_PROCESSED_DIR.exists():
        LOCAL_PROCESSED_DIR.mkdir(parents=True, exist_ok=True)

    for blob in gcs_client.glob(f"{CLOUD_PREFIX}/**"):
        relative_path = Path(blob).relative_to(f"{CLOUD_PREFIX}")
        local_name = relative_path.name
        suffix = relative_path.suffix
        print(f"local_name: {local_name}, suffix: {suffix}")
        if len(str(suffix))>0:
            #print("File!")
            parent_folder = relative_path.parent
            if not Path(LOCAL_DOWNLOAD_DIR / parent_folder).exists():
                print(f"Creating folder: {LOCAL_DOWNLOAD_DIR / parent_folder}")
                Path(LOCAL_DOWNLOAD_DIR / parent_folder).mkdir(parents=True, exist_ok=True)
            # print(f"parent_folder: {parent_folder}")
            local_path = LOCAL_DOWNLOAD_DIR / parent_folder / local_name
            print(f"Downloading file: {blob} to {local_path}")
            gcs_client.gcs.get_file(blob, str(local_path))
        else:
            
            if not Path(LOCAL_PROCESSED_DIR / relative_path).exists():
                print(f"Creating folder: {LOCAL_PROCESSED_DIR / relative_path}")
                Path(LOCAL_PROCESSED_DIR / relative_path).mkdir(parents=True, exist_ok=True)
        # check if there is an extension, if not this is a folder and we need to create it
        

    #print("Downloaded files:", list(LOCAL_DOWNLOAD_DIR.iterdir()))



In [18]:
LOCAL_DOWNLOAD_DIR

PosixPath('/Users/dima/git/collab-environment/data/raw/2024_02_06-session_0001')

In [19]:
# Validate session structure
print("Validating session structure...")
issues = validate_session_structure(LOCAL_DOWNLOAD_DIR)
print(f"Issues found: {issues if len(issues)>0 else 'None'}")

Validating session structure...
Session structure is valid.
Issues found: None


In [20]:
if not skip_thermal_extraction:
    #thermal files processing
    print("Processing thermal files...")

    # call with preview=False to choose the vmin/vmax automatically, otherwise the user will be asked to choose the vmin/vmax
    # process_directory(folder_path=LOCAL_DOWNLOAD_DIR, out_path=LOCAL_DOWNLOAD_DIR, color='magma', preview=True, max_frames=None, fps=30)
    process_directory(folder_path=LOCAL_DOWNLOAD_DIR, out_path=LOCAL_PROCESSED_DIR, color='magma', preview=True, max_frames=100, fps=30)



In [21]:
#default parameters for alignment

frame_size = (640, 480)  # Default frame size
max_frames = 10  # Process all frames by default
warp_to = "rgb"  # Default warp to rgb, thermal is changing, not rgb
rotation_angle = 0.0  # Default rotation angle
skip_homography = False  # Default to not skip homography
skip_translation = True  # Default to skip translation
camera_numbers = [1, 2]  

  
for camera in camera_numbers:
    print(f"Processing camera {camera}...")
    
    # Dynamically find the RGB and thermal MP4 files
    rgb_dir = LOCAL_DOWNLOAD_DIR / f"rgb_{camera}"
    thermal_dir = LOCAL_PROCESSED_DIR / f"thermal_{camera}"
    
    # Find the MP4 file in the RGB directory
    rgb_video_files = list(rgb_dir.glob("*.MP4")) + list(rgb_dir.glob("*.mp4"))
    print('files in rgb_dir:', rgb_video_files)
    if len(rgb_video_files) == 0:
        print(f"No MP4 file found in {rgb_dir}. Skipping camera {camera}.")
        continue
    elif len(rgb_video_files) > 1:
        print(f"Multiple MP4 files found in {rgb_dir}. Using the first one.")
    rgb_video_path = rgb_video_files[0]
    
    # Find the MP4 file in the thermal directory
    thermal_video_files = list(thermal_dir.glob("*.mp4")) + list(thermal_dir.glob("*.MP4"))
    print('files in thermal_dir:', thermal_video_files)
    if len(thermal_video_files) == 0:
        print(f"No MP4 file found in {thermal_dir}. Skipping camera {camera}.")
        continue
    elif len(thermal_video_files) > 1:
        print(f"Multiple MP4 files found in {thermal_dir}. Using the first one.")
    thermal_video_path = thermal_video_files[0]
    
    print(f"RGB video path: {rgb_video_path}")
    print(f"Thermal video path: {thermal_video_path}")

    output_dir_rgb = LOCAL_PROCESSED_DIR / 'aligned' / f"rgb_{camera}"
    output_dir_thm = LOCAL_PROCESSED_DIR / 'aligned' / f"thermal_{camera}"
    output_dir_rgb.mkdir(parents=True, exist_ok=True)
    output_dir_thm.mkdir(parents=True, exist_ok=True)

    # Align videos
    print(f"Aligning videos for camera {camera}...")

    align_videos(
        rgb_video_path,
        thermal_video_path,
        output_dir_rgb,
        output_dir_thm,
        frame_size=frame_size,
        max_frames=max_frames,
        warp_to=warp_to,
        rotation_angle=rotation_angle,
        skip_homography=skip_homography,
        skip_translation=skip_translation,
    )

Processing camera 1...
files in rgb_dir: [PosixPath('/Users/dima/git/collab-environment/data/raw/2024_02_06-session_0001/rgb_1/GX010119.MP4')]
files in thermal_dir: [PosixPath('/Users/dima/git/collab-environment/data/processed/2024_02_06-session_0001/thermal_1/thermal_-20_20.mp4'), PosixPath('/Users/dima/git/collab-environment/data/processed/2024_02_06-session_0001/thermal_1/thermal_-5_21.mp4')]
Multiple MP4 files found in /Users/dima/git/collab-environment/data/processed/2024_02_06-session_0001/thermal_1. Using the first one.
RGB video path: /Users/dima/git/collab-environment/data/raw/2024_02_06-session_0001/rgb_1/GX010119.MP4
Thermal video path: /Users/dima/git/collab-environment/data/processed/2024_02_06-session_0001/thermal_1/thermal_-20_20.mp4
Aligning videos for camera 1...
Draw a rectangle on the LEFT (RGB) image to crop. Press ENTER or SPACE when done.
Select a ROI and then press SPACE or ENTER button!
Cancel the selection process by pressing c button!
Selected crop (on RGB, re

Cropping/Rotating video: 100%|██████████| 10/10 [00:00<00:00, 30.28it/s]


✅ Saved cropped/rotated video to /Users/dima/git/collab-environment/data/processed/2024_02_06-session_0001/aligned/rgb_1/cropped_rgb.mp4
✅ Saved cropped RGB video to /Users/dima/git/collab-environment/data/processed/2024_02_06-session_0001/aligned/rgb_1/cropped_rgb.mp4
Skipping translation step.
Click at least 4 corresponding points in the overlay. Adjust alpha with [ and ]. Press any key when done.
Now select the same 8 points in the other image, in the same order.
Click at least 4 corresponding points in the image, then press any key to continue.


RuntimeError: Invalid point selection for homography.

In [None]:
from scripts.local_model_inference import process_video_with_rfdetr

# Detection and tracking
print("Running detection and tracking...")
for camera in camera_numbers:
    print(f"Running detection and tracking on: thermal_{camera}")
    
    # Define paths for the thermal video and model inference
    thermal_video_path = LOCAL_PROCESSED_DIR / 'aligned' / f"thermal_{camera}" / f"warped_thermal_{camera}.mp4"
    if not thermal_video_path.exists():
        print(f"Thermal video not found for camera {camera}. Skipping...")
        continue

    # Run local_model_inference script
    try:
        output_csv_path = LOCAL_PROCESSED_DIR / 'aligned' / f"thermal_{camera}" / "output_results.csv"
        output_video_path = LOCAL_PROCESSED_DIR / 'aligned' / f"thermal_{camera}" / "annotated_warped_thermal.mp4"
        checkpoint_path = "scripts/model/weights.pt"

        process_video_with_rfdetr(
            video_path=thermal_video_path,
            output_csv_path=output_csv_path,
            output_video_path=output_video_path,
            checkpoint_path=checkpoint_path,
            confidence=0.5
        )
    except Exception as e:
        print(f"Error during object detection for camera {camera}: {e}")
        continue

    # Run tracking
    print(f"Running tracking on: thermal_{camera}")
    run_tracking(LOCAL_PROCESSED_DIR, "thermal", camera)

    tracked_csv = LOCAL_PROCESSED_DIR / 'aligned' / f"thermal_{camera}" / f"thermal_{camera}_tracks.csv"
    if not tracked_csv.exists():
        print(f"Tracking CSV not found for camera {camera}. Skipping visualization.")
        continue

    # Visualization
    visualize_detections_from_video(
        csv_path=tracked_csv,
        video_path=thermal_video_path,
        output_video_path=LOCAL_PROCESSED_DIR / 'aligned' / f"thermal_{camera}" / f"visualized_thermal_{camera}.mp4"
    )
    print(f"Visualizing tracks for rgb camera {camera}...")
    overlay_tracks_on_video(
        csv_path=tracked_csv,
        frame_dir=LOCAL_PROCESSED_DIR / 'aligned' / f"rgb_{camera}" / 'annotated_frames',
        output_video=LOCAL_PROCESSED_DIR / 'aligned' / f"rgb_{camera}" / f"overlayed_tracks_{camera}.mp4"
    )


In [None]:
# Upload Processed Data to Cloud Bucket
CLOUD_PROCESSED_PREFIX = "your-cloud-processed-prefix"  # Update with your processed data prefix
for file in LOCAL_PROCESSED_DIR.iterdir():
    cloud_path = f"{BUCKET_NAME}/{CLOUD_PROCESSED_PREFIX}/{file.name}"
    gcs_client.upload_file(str(file), cloud_path)

print("Uploaded processed files:", list(LOCAL_PROCESSED_DIR.iterdir()))