# Test HDF5 to_hdf5() and from_hdf5() Methods

This notebook tests the new HDF5 serialization methods for the Flight class, demonstrating:
- Saving raw flight data with metadata
- Synchronizing and versioning with automatic timestamp format (rev_YYYYMMDD_hhmm)
- Loading and reconstructing Flight objects
- Handling multiple sync versions with version selection

In [None]:
import sys
sys.path.insert(0, '/home/gabriele/Documents/data_loader')

import h5py
from datetime import datetime
import polars as pl
import numpy as np
from pathlib import Path
import json

# Suppress warnings
import warnings
warnings.filterwarnings('ignore')

## Section 1: Create Sample Flight Data

First, let's create a sample Flight object with drone and sensor data to test saving and loading.

In [None]:
from pils.flight import Flight, DroneData, PayloadData

# Create sample flight info
flight_info = {
    "drone_data_folder_path": "/tmp/test_flight/drone",
    "aux_data_folder_path": "/tmp/test_flight/aux",
    "flight_name": "Test Flight 001",
    "drone_id": "DJI-M300-001"
}

# Create Flight object
flight = Flight(flight_info)

# Set metadata
flight.set_metadata({
    "takeoff_time": datetime(2025, 2, 2, 10, 30),
    "landing_time": datetime(2025, 2, 2, 11, 15),
    "pilot": "John Doe",
    "weather": "Clear skies",
    "temperature": 22.5,
    "notes": "Test flight for HDF5 serialization"
})

# Create sample drone data
drone_df = pl.DataFrame({
    "timestamp": [1000 + i*10 for i in range(100)],
    "latitude": np.linspace(40.0, 40.01, 100),
    "longitude": np.linspace(-74.0, -73.99, 100),
    "altitude": np.linspace(100, 150, 100),
    "roll": np.random.normal(0, 5, 100),
    "pitch": np.random.normal(0, 5, 100),
    "yaw": np.random.normal(0, 10, 100),
})

# Create sample sensor data
gps_df = pl.DataFrame({
    "timestamp": [1000 + i*10 for i in range(100)],
    "lat": np.linspace(40.0, 40.01, 100),
    "lon": np.linspace(-74.0, -73.99, 100),
    "accuracy": np.random.uniform(1, 5, 100),
    "satellites": np.random.randint(8, 15, 100),
})

imu_df = pl.DataFrame({
    "timestamp": [1000 + i*10 for i in range(100)],
    "accel_x": np.random.normal(0, 0.1, 100),
    "accel_y": np.random.normal(0, 0.1, 100),
    "accel_z": np.random.normal(9.8, 0.1, 100),
    "gyro_x": np.random.normal(0, 0.01, 100),
    "gyro_y": np.random.normal(0, 0.01, 100),
    "gyro_z": np.random.normal(0, 0.01, 100),
})

# Assign data to flight
flight.raw_data.drone_data = DroneData(drone_df, None)
flight.raw_data.payload_data = PayloadData()
flight.raw_data.payload_data.gps = gps_df
flight.raw_data.payload_data.imu = imu_df

print("✓ Created sample flight with drone and sensor data")
print(f"  Drone data shape: {flight.raw_data.drone_data.drone.shape}")
print(f"  GPS data shape: {flight.raw_data.payload_data.gps.shape}")
print(f"  IMU data shape: {flight.raw_data.payload_data.imu.shape}")
print(f"  Metadata: {flight.metadata}")

## Section 2: Save Raw Data Without Synchronization

Test the `to_hdf5()` method to save raw flight data to an HDF5 file.

In [None]:
output_file = Path("/tmp/test_flight_001.h5")
output_file.parent.mkdir(parents=True, exist_ok=True)

# Save raw data only
version_raw = flight.to_hdf5(str(output_file), synchronize=False)
print(f"✓ Saved flight with raw data to {output_file}")
print(f"  Version: {version_raw}")

# Inspect HDF5 structure
with h5py.File(str(output_file), 'r') as f:
    def print_structure(name, obj):
        indent = "  " * (name.count('/'))
        if isinstance(obj, h5py.Dataset):
            print(f"{indent}Dataset: {name} {obj.shape}")
        elif isinstance(obj, h5py.Group):
            print(f"{indent}Group: {name}")
            if obj.attrs:
                for key, val in obj.attrs.items():
                    print(f"{indent}  @{key}: {val}")
    
    print("\nHDF5 File Structure:")
    f.visititems(print_structure)

## Section 3: Save with Synchronized Data (Version 1)

Now save with synchronization enabled. This should create a synchronized_data group with rev_YYYYMMDD_hhmm versioning.

In [None]:
import time

# Save with synchronization (version 1)
sync_version_1 = flight.to_hdf5(
    str(output_file),
    synchronize=True,
    sync_params={'target_rate_hz': 50.0},
    sync_method='linear'
)

print(f"✓ Synchronized and saved version 1: {sync_version_1}")

# Show HDF5 structure with synchronized_data
with h5py.File(str(output_file), 'r') as f:
    print("\nUpdated HDF5 File Structure:")
    
    def print_structure(name, obj, depth=0):
        indent = "  " * depth
        if isinstance(obj, h5py.Dataset):
            print(f"{indent}Dataset: {name.split('/')[-1]} {obj.shape}")
        elif isinstance(obj, h5py.Group):
            print(f"{indent}Group: {name.split('/')[-1]}")
            if obj.attrs:
                for key, val in obj.attrs.items():
                    val_str = str(val)[:60]  # Truncate long values
                    print(f"{indent}  @{key}: {val_str}")
    
    for key in f.keys():
        f.visititems(lambda name, obj: print_structure(name, obj))

## Section 4: Save Multiple Synchronized Versions

Add another synchronized version with different parameters to test version management.

In [None]:
# Wait a minute to ensure different timestamp
time.sleep(2)

# Save version 2 with different sync params
sync_version_2 = flight.to_hdf5(
    str(output_file),
    synchronize=True,
    sync_params={'target_rate_hz': 100.0},  # Different rate
    sync_method='linear'
)

print(f"✓ Synchronized and saved version 2: {sync_version_2}")

# List all available sync versions
with h5py.File(str(output_file), 'r') as f:
    if "synchronized_data" in f:
        sync_group = f["synchronized_data"]
        versions = sorted(list(sync_group.keys()))
        print(f"\nAvailable synchronized versions: {versions}")
        
        for version in versions:
            v_group = sync_group[version]
            print(f"\n  {version}:")
            print(f"    sync_method: {v_group.attrs.get('sync_method', 'N/A')}")
            print(f"    date: {v_group.attrs.get('date', 'N/A')}")
            print(f"    code_version: {v_group.attrs.get('code_version', 'N/A')}")
            if 'sync_params' in v_group.attrs:
                params = json.loads(v_group.attrs['sync_params'])
                print(f"    sync_params: {params}")

## Section 5: Load Flight from HDF5 - Latest Version

Test the `from_hdf5()` class method to reconstruct a Flight object. By default, it should load the latest synchronized version.

In [None]:
# Load flight with latest sync version (default)
loaded_flight = Flight.from_hdf5(str(output_file))

print("✓ Loaded flight from HDF5 (latest sync version)")
print(f"\n  Flight Info:")
for key, val in loaded_flight.flight_info.items():
    print(f"    {key}: {val}")

print(f"\n  Metadata:")
for key, val in loaded_flight.metadata.items():
    print(f"    {key}: {val}")

print(f"\n  Raw Data Shapes:")
if loaded_flight.raw_data.drone_data:
    print(f"    Drone: {loaded_flight.raw_data.drone_data.drone.shape}")
if loaded_flight.raw_data.payload_data:
    for sensor in loaded_flight.raw_data.payload_data.list_loaded_sensors():
        data = getattr(loaded_flight.raw_data.payload_data, sensor)
        print(f"    {sensor}: {data.shape}")

print(f"\n  Synchronized Data:")
if hasattr(loaded_flight.raw_data, 'synchronized_data'):
    print(f"    Shape: {loaded_flight.raw_data.synchronized_data.shape}")
    print(f"    Columns: {loaded_flight.raw_data.synchronized_data.columns[:5]}...")  # First 5 cols