In [None]:
import h5py
import numpy as np
from tqdm import tqdm
from scipy import ndimage
import os

# Input path of HDF5 files on Kaggle
base_path = "/kaggle/input/hdf5-chunk1" 

# Output directory writable on Kaggle
output_dir = "/kaggle/working"

dataset_types = ['train', 'val', 'test']

for dataset_type in dataset_types:
    input_path = os.path.join(base_path, f"gas_and_brake_{dataset_type}_comma_chunk_1_w_imgs.h5py")
    output_path = os.path.join(output_dir, f"filtered_chunk1_{dataset_type}.hdf5")

    print(f"🔄 Processing {dataset_type} from: {input_path}")

    if not os.path.exists(input_path):
        print(f"❌ File not found: {input_path}")
        continue

    with h5py.File(input_path, "r") as h5_file, h5py.File(output_path, "w") as h_out:
        keys = list(h5_file.keys())

        for key in tqdm(keys, desc=f"Processing {dataset_type}"):
            group_in = h5_file[key]

            #if 'desired_dist' not in group_in:
             #   continue

            #desired = np.array(group_in['desired_dist'][()])
            #filtered = ndimage.median_filter(desired, size=12)

            #if (filtered == 0).mean() > 0.2:
             #   continue  # scarta campioni poco informativi

            group_out = h_out.create_group(key)

            for col in group_in.keys():
                dt = np.float32 if col != 'image' else int
                group_out.create_dataset(
                    col,
                    data=group_in[col],
                    compression='gzip',
                    compression_opts=6,
                    chunks=True
                )

    print(f"✅ Saved in: {output_path}\n")

In [None]:
import h5py
import os
import cv2
import numpy as np
from tqdm import tqdm

# === CONFIGURATION ===
input_file = "/Users/gabriele/Desktop/Magistrale/Explainable_and_trustworthy_AI/progetti/venv/concept_gridlock/filtered_chunk1_test2.hdf5"  # or val/test
output_dir = "/Users/gabriele/Desktop/Magistrale/Explainable_and_trustworthy_AI/progetti/venv/concept_gridlock/videos_test2"  # Output directory
os.makedirs(output_dir, exist_ok=True)

# === MAIN FUNCTION ===
with h5py.File(input_file, "r") as h5f:
    for seq_key in tqdm(h5f.keys(), desc="Processing sequences"):
        group = h5f[seq_key]

        if 'image' not in group:
            print(f"❌ No image for {seq_key}")
            continue

        images = group['image'][()]  # shape (N, H, W, 3)

        
        if len(images) < 2:
            print(f"⚠️ Sequence too short: {seq_key}")
            continue

        height, width = images[0].shape[0], images[0].shape[1]
        video_name = seq_key.replace("|", "_") + ".mp4"  
        video_path = os.path.join(output_dir, video_name)
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(video_path, fourcc, 10, (width, height))  # 10 FPS

        for img in images:
            img_bgr = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_RGB2BGR)
            out.write(img_bgr)

        out.release()
        print(f"🎥 Video saved: {video_path}")