In [1]:
import pickle
import zipfile
import os
from pathlib import Path

def load_trajectories_from_zip(zip_path):
    """
    Load pickle data from a zip file containing trajectory data.
    
    Args:
        zip_path (str): Path to the zip file
        
    Returns:
        The loaded pickle data
    """
    if not os.path.exists(zip_path):
        raise FileNotFoundError(f"Zip file not found: {zip_path}")
    
    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_file:
            # List all files in the zip
            file_list = zip_file.namelist()
            print(f"Files in zip: {file_list}")
            
            # Look for pickle files
            pickle_files = [f for f in file_list if f.endswith('.pkl') or f.endswith('.pickle')]
            
            if not pickle_files:
                # If no .pkl files, try to load any file as pickle
                print("No .pkl files found, attempting to load first file as pickle...")
                if file_list:
                    pickle_files = [file_list[0]]
                else:
                    raise ValueError("No files found in zip archive")
            
            # Load the first pickle file
            pickle_file = pickle_files[0]
            print(f"Loading pickle file: {pickle_file}")
            
            with zip_file.open(pickle_file) as f:
                data = pickle.load(f)
                
            return data
            
    except zipfile.BadZipFile:
        raise ValueError(f"Invalid zip file: {zip_path}")
    except pickle.UnpicklingError as e:
        raise ValueError(f"Error unpickling data: {e}")
    except Exception as e:
        raise Exception(f"Unexpected error: {e}")

def inspect_data(data):
    """
    Inspect the loaded data structure.
    
    Args:
        data: The loaded data
    """
    print(f"\nData type: {type(data)}")
    
    if isinstance(data, dict):
        print(f"Dictionary keys: {list(data.keys())}")
        for key, value in data.items():
            print(f"  {key}: {type(value)} - {len(value) if hasattr(value, '__len__') else 'N/A'}")
    elif isinstance(data, list):
        print(f"List length: {len(data)}")
        if data:
            print(f"First item type: {type(data[0])}")
            if isinstance(data[0], dict):
                print(f"First item keys: {list(data[0].keys())}")
    elif isinstance(data, tuple):
        print(f"Tuple length: {len(data)}")
        for i, item in enumerate(data):
            print(f"  Item {i}: {type(item)}")
    else:
        print(f"Data attributes: {dir(data)}")

if __name__ == "__main__":
    # Path to your zip file
    zip_path = "/Users/lockewang/FIG/software-control/external_data/ui_tars_roll_out/trajectories_batch_10_901-1000.zip"
    
    try:
        # Load the data
        print("Loading trajectories from zip file...")
        trajectories = load_trajectories_from_zip(zip_path)
        
        # Inspect the data structure
        print("\n=== Data Inspection ===")
        inspect_data(trajectories)
        
        # Example: If it's a list of trajectories, show some basic info
        if isinstance(trajectories, list) and trajectories:
            print(f"\n=== Sample Trajectory Info ===")
            sample_traj = trajectories[0]
            if isinstance(sample_traj, dict):
                print(f"Sample trajectory keys: {list(sample_traj.keys())}")
                for key, value in sample_traj.items():
                    if hasattr(value, 'shape'):
                        print(f"  {key} shape: {value.shape}")
                    elif hasattr(value, '__len__'):
                        print(f"  {key} length: {len(value)}")
                    else:
                        print(f"  {key}: {type(value)}")
        
        print(f"\nSuccessfully loaded {len(trajectories) if hasattr(trajectories, '__len__') else 'data'} from zip file!")
        
    except Exception as e:
        print(f"Error loading data: {e}") 

Loading trajectories from zip file...
Files in zip: ['901-1000/', '901-1000/worker_16_task_openweb_1301_20250602_162500/', '901-1000/worker_16_task_openweb_1301_20250602_162500/screenshots/', '901-1000/worker_16_task_openweb_1301_20250602_162500/screenshots/screenshot_0059_ACTION_CLICK_COMPLETE.png', '901-1000/worker_16_task_openweb_1301_20250602_162500/screenshots/screenshot_0061_ACTION_WAIT_COMPLETE.png', '901-1000/worker_16_task_openweb_1301_20250602_162500/screenshots/screenshot_0049_ACTION_WAIT_COMPLETE.png', '901-1000/worker_16_task_openweb_1301_20250602_162500/screenshots/screenshot_0016_ACTION_CLICK_COMPLETE.png', '901-1000/worker_16_task_openweb_1301_20250602_162500/screenshots/screenshot_0009_ACTION_CLICK_COMPLETE.png', '901-1000/worker_16_task_openweb_1301_20250602_162500/screenshots/screenshot_0028_ACTION_WAIT_COMPLETE.png', '901-1000/worker_16_task_openweb_1301_20250602_162500/screenshots/screenshot_0034_ACTION_CLICK_COMPLETE.png', '901-1000/worker_16_task_openweb_1301_202