# Full Tracker: Download, Process, and Upload Data
This notebook demonstrates the full pipeline for handling raw data:
1. Download data from a cloud bucket.
2. Process the data (e.g., align videos, run detection, and tracking).
3. Upload the processed data back to the cloud bucket.

In [None]:
# Import Required Libraries
import os
import uuid
from pathlib import Path
from typing import List, Optional, Dict, Tuple

# Reload helper for dev work
%load_ext autoreload
%autoreload 2


# Import Custom Scripts
from scripts.thermal_processing import process_directory, validate_session_structure
from scripts.align_videos_manually import align_videos, step1_crop_and_prepare, step2_spatial_alignment, save_warped_video
from scripts.local_model_tracking import run_tracking, overlay_tracks_on_video, visualize_detections_from_video
import subprocess

# from Desktop.labeling_data.data_structure.Dima_collab.collab_data.file_utils import expand_path, get_project_root
# from Desktop.labeling_data.data_structure.Dima_collab.collab_data.gcs_utils import GCSClient

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
# Setup Configuration

#doing it with full path because I'm too lazy for actual directory management 
CREDENTIALS_PATH = "/Users/inesaitsahalia/Desktop/labeling_data/data_structure/Dima_collab/collab_data/api-keys/collab-data-463313-c340ad86b28e.json"
PROJECT_ID = "collab-data-463313"
BUCKET_NAME = "fieldwork_data"  # Update with your bucket name
LOCAL_DOWNLOAD_DIR = Path("/path/to/local/download")  # Update with your local download directory
LOCAL_PROCESSED_DIR = Path("/path/to/local/processed")  # Update with your local processed directory

In [None]:
# Connect to Google Cloud Storage
gcs_client = GCSClient(
    project_id=PROJECT_ID,
    credentials_path=CREDENTIALS_PATH,
    )

# Verify connection
print("Available buckets:", gcs_client.list_buckets())

In [None]:
# Download Data from Cloud Bucket
CLOUD_PREFIX = "your-cloud-prefix"  # Update with your cloud prefix
LOCAL_DOWNLOAD_DIR.mkdir(parents=True, exist_ok=True)

for blob in gcs_client.glob(f"{BUCKET_NAME}/{CLOUD_PREFIX}/**"):
    local_path = LOCAL_DOWNLOAD_DIR / Path(blob).name
    gcs_client.download_file(blob, str(local_path))

print("Downloaded files:", list(LOCAL_DOWNLOAD_DIR.iterdir()))

In [None]:
# Process the Data
print("Processing data...")

# Validate session structure
print("Validating session structure...")
validate_session_structure(LOCAL_DOWNLOAD_DIR)

#thermal files processing
print("Processing thermal files...")
process_directory(folder_path=LOCAL_DOWNLOAD_DIR, out_path=LOCAL_DOWNLOAD_DIR, color='magma', preview=True, max_frames=None, fps=30)


In [None]:
#default parameters for alignment

frame_size = (640, 480)  # Default frame size
max_frames = 10  # Process all frames by default
warp_to = "rgb"  # Default warp to rgb, thermal is changing, not rgb
rotation_angle = 0.0  # Default rotation angle
skip_homography = False  # Default to not skip homography
skip_translation = True  # Default to skip translation
camera_numbers = [1, 2]  

  
for camera in camera_numbers:
    print(f"Processing camera {camera}...")
    
    # Dynamically find the RGB and thermal MP4 files
    rgb_dir = LOCAL_DOWNLOAD_DIR / f"rgb_{camera}"
    thermal_dir = LOCAL_DOWNLOAD_DIR / f"thermal_{camera}"
    
    # Find the MP4 file in the RGB directory
    rgb_video_files = list(rgb_dir.glob("*.MP4")) + list(rgb_dir.glob("*.mp4"))
    print('files in rgb_dir:', rgb_video_files)
    if len(rgb_video_files) == 0:
        print(f"No MP4 file found in {rgb_dir}. Skipping camera {camera}.")
        continue
    elif len(rgb_video_files) > 1:
        print(f"Multiple MP4 files found in {rgb_dir}. Using the first one.")
    rgb_video_path = rgb_video_files[0]
    
    # Find the MP4 file in the thermal directory
    thermal_video_files = list(thermal_dir.glob("*.mp4")) + list(thermal_dir.glob("*.MP4"))
    print('files in thermal_dir:', thermal_video_files)
    if len(thermal_video_files) == 0:
        print(f"No MP4 file found in {thermal_dir}. Skipping camera {camera}.")
        continue
    elif len(thermal_video_files) > 1:
        print(f"Multiple MP4 files found in {thermal_dir}. Using the first one.")
    thermal_video_path = thermal_video_files[0]
    
    print(f"RGB video path: {rgb_video_path}")
    print(f"Thermal video path: {thermal_video_path}")

    output_dir_rgb = LOCAL_PROCESSED_DIR / 'aligned' / f"rgb_{camera}"
    output_dir_thm = LOCAL_PROCESSED_DIR / 'aligned' / f"thermal_{camera}"
    output_dir_rgb.mkdir(parents=True, exist_ok=True)
    output_dir_thm.mkdir(parents=True, exist_ok=True)

    # Align videos
    print(f"Aligning videos for camera {camera}...")

    align_videos(
        rgb_video_path,
        thermal_video_path,
        output_dir_rgb,
        output_dir_thm,
        frame_size=frame_size,
        max_frames=max_frames,
        warp_to=warp_to,
        rotation_angle=rotation_angle,
        skip_homography=skip_homography,
        skip_translation=skip_translation,
    )

In [None]:
# Detection and tracking
print("Running detection and tracking...")
for camera in camera_numbers:
    print(f"Running detection and tracking on: thermal_{camera}")
    
    # Define paths for the thermal video and model inference
    thermal_video_path = LOCAL_PROCESSED_DIR / 'aligned' / f"thermal_{camera}" / f"warped_thermal_{camera}.mp4"
    if not thermal_video_path.exists():
        print(f"Thermal video not found for camera {camera}. Skipping...")
        continue

    # Run local_model_inference script
    print(f"Running object detection on: {thermal_video_path}")
    try:
        subprocess.run(
            [
                "python",
                "scripts/local_model_inference.py",
                "--vid_name", thermal_video_path.name,
                "--root_dir", str(thermal_video_path.parent),
                "--model_weights", "scripts/model/weights.pt"
            ],
            check=True
        )
    except subprocess.CalledProcessError as e:
        print(f"Error during object detection for camera {camera}: {e}")
        continue

    # Run tracking
    print(f"Running tracking on: thermal_{camera}")
    run_tracking(LOCAL_PROCESSED_DIR, "thermal", camera)

    tracked_csv = LOCAL_PROCESSED_DIR / 'aligned' / f"thermal_{camera}" / f'thermal_{camera}_tracks.csv'
    if not tracked_csv.exists():
        print(f"Tracking CSV not found for camera {camera}. Skipping visualization.")
        continue

    #visualization
    visualize_detections_from_video(
        csv_path=tracked_csv,
        video_path=thermal_video_path,
        output_video_path=LOCAL_PROCESSED_DIR / 'aligned' / f"thermal_{camera}" / f"visualized_thermal_{camera}.mp4"
    )
    print(f"Visualizing tracks for rgb camera {camera}...")
    overlay_tracks_on_video(
        csv_path=tracked_csv,
        frame_dir=LOCAL_PROCESSED_DIR / 'aligned' / f"rgb_{camera}"/'annotated_frames',
        output_video=LOCAL_PROCESSED_DIR / 'aligned' / f"rgb_{camera}" / f"overlayed_tracks_{camera}.mp4"
    )

In [None]:
# Upload Processed Data to Cloud Bucket
CLOUD_PROCESSED_PREFIX = "your-cloud-processed-prefix"  # Update with your processed data prefix
for file in LOCAL_PROCESSED_DIR.iterdir():
    cloud_path = f"{BUCKET_NAME}/{CLOUD_PROCESSED_PREFIX}/{file.name}"
    gcs_client.upload_file(str(file), cloud_path)

print("Uploaded processed files:", list(LOCAL_PROCESSED_DIR.iterdir()))