# Exploring the Lift HDF5 Demonstration Data

This notebook explores the structure and contents of the robosuite demonstration data stored in `data/lift.hdf5`.

In [1]:
import h5py
import pandas as pd
import numpy as np

## Load and Explore HDF5 Structure

In [12]:
# Open the HDF5 file and explore its structure
hdf5_path = "data/can.hdf5"
f = h5py.File(hdf5_path, 'r')

# List all demos
demo_keys = [k for k in f['data'].keys() if k.startswith('demo_')]
demo_keys = sorted(demo_keys, key=lambda x: int(x.split('_')[1]))
print(f"Number of demonstrations: {len(demo_keys)}")
print(f"Demo keys: {demo_keys[:5]}...")  # Show first 5

Number of demonstrations: 200
Demo keys: ['demo_0', 'demo_1', 'demo_2', 'demo_3', 'demo_4']...


In [13]:
# Look at the structure of a single demo
demo = f['data/demo_1']
print("Keys in demo_0:", list(demo.keys()))
print("\nShapes:")
print(f"  states: {demo['states'].shape}")
print(f"  actions: {demo['actions'].shape}")

Keys in demo_0: ['actions', 'controller_info', 'interventions', 'policy_acting', 'states', 'user_acting', 'user_info']

Shapes:
  states: (118, 71)
  actions: (118, 7)


## View States (Observations) as DataFrame

In [14]:
# Load states from demo_0 into a DataFrame
states = demo['states'][:]
states_df = pd.DataFrame(states, columns=[f'state_{i}' for i in range(states.shape[1])])
states_df.insert(0, 'timestep', range(len(states_df)))
states_df

Unnamed: 0,timestep,state_0,state_1,state_2,state_3,state_4,state_5,state_6,state_7,state_8,...,state_61,state_62,state_63,state_64,state_65,state_66,state_67,state_68,state_69,state_70
0,0,0.00,0.022310,0.173317,0.026341,-2.602605,-0.002929,2.892690,0.780304,0.020833,...,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,1,0.05,0.020786,0.177717,0.019085,-2.602210,-0.005040,2.889988,0.786096,0.021076,...,-3.502376e+00,0.164347,1.358296e-02,-0.000090,-0.003950,-0.001326,0.000534,0.069533,-0.062991,0.000001
2,2,0.10,0.018932,0.186666,0.012945,-2.597274,-0.005161,2.886058,0.789470,0.023076,...,-3.972872e+00,0.135964,1.184919e-02,-0.000124,-0.004328,-0.005575,0.000016,0.155462,-0.025527,0.000029
3,3,0.15,0.016580,0.197763,0.008353,-2.586970,-0.002538,2.880157,0.786733,0.026332,...,-4.438250e+00,0.112483,1.033675e-02,-0.000129,-0.006999,-0.003487,0.000166,0.143516,-0.099797,0.000034
4,4,0.20,0.013408,0.212364,0.002999,-2.572448,0.001297,2.873127,0.779066,0.030334,...,-4.897996e+00,0.093058,9.017336e-03,-0.000119,-0.004450,-0.006141,0.000120,0.167372,-0.020625,0.000049
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113,113,5.65,0.425585,1.277438,0.269629,-0.521266,0.005412,1.876426,2.247526,0.034775,...,-1.909421e-07,0.004095,-4.063007e-08,-0.006077,-0.028036,0.088491,-1.576693,-0.004137,-0.003174,0.000047
114,114,5.70,0.432358,1.296408,0.261712,-0.481256,0.006918,1.849548,2.261263,0.036657,...,4.235504e-07,-0.000989,-2.548162e-08,0.001450,-0.128101,0.294756,0.135934,3.050759,-7.139976,-0.005998
115,115,5.75,0.438488,1.314808,0.254350,-0.440348,0.010407,1.825596,2.272340,0.037759,...,-8.016569e-07,-0.004095,-1.616134e-08,0.006077,-0.028958,0.066866,0.051590,0.709681,-1.665555,-0.000429
116,116,5.80,0.443204,1.331949,0.245834,-0.398300,0.014344,1.799024,2.281691,0.038337,...,5.690486e-07,0.000989,-3.824350e-08,-0.001450,0.026067,-0.054410,-0.026741,-0.604305,1.281712,0.005988


## View Actions as DataFrame

In [15]:
# Load actions from demo_0 into a DataFrame
# Actions are typically: [dx, dy, dz, droll, dpitch, dyaw, gripper]
actions = demo['actions'][:]
action_names = ['dx', 'dy', 'dz', 'droll', 'dpitch', 'dyaw', 'gripper']
actions_df = pd.DataFrame(actions, columns=action_names)
actions_df.insert(0, 'timestep', range(len(actions_df)))
actions_df

Unnamed: 0,timestep,dx,dy,dz,droll,dpitch,dyaw,gripper
0,0,0.024,-0.031,-0.055,-0.004034,0.111751,-0.059279,-1.0
1,1,0.224,-0.266,-0.112,-0.000967,0.108490,-0.043047,-1.0
2,2,0.343,-0.396,-0.078,0.004697,0.097995,-0.023659,-1.0
3,3,0.332,-0.408,-0.217,-0.001387,0.078910,-0.008765,-1.0
4,4,0.452,-0.507,-0.105,-0.005147,0.070747,0.004131,-1.0
...,...,...,...,...,...,...,...,...
113,113,-0.068,-0.400,-0.067,-0.013631,0.016073,-0.101690,-1.0
114,114,-0.003,-0.030,0.111,-0.009163,0.013590,-0.089896,-1.0
115,115,0.078,-0.423,0.257,-0.007688,0.016070,-0.080291,-1.0
116,116,0.031,-0.280,0.234,-0.003425,0.017767,-0.068258,-1.0


## Summary Statistics

In [6]:
# Summary statistics for states
print("States Statistics:")
states_df.describe()

States Statistics:


Unnamed: 0,timestep,state_0,state_1,state_2,state_3,state_4,state_5,state_6,state_7,state_8,...,state_22,state_23,state_24,state_25,state_26,state_27,state_28,state_29,state_30,state_31
count,59.0,59.0,59.0,59.0,59.0,59.0,59.0,59.0,59.0,59.0,...,59.0,59.0,59.0,59.0,59.0,59.0,59.0,59.0,59.0,59.0
mean,29.0,1.45,0.056727,0.700582,-0.027925,-2.085242,-0.080577,2.856204,1.117612,0.034371,...,-0.009586,0.142349,-0.000772,-0.000427,-0.001657964,-0.00136874,0.015853,-0.000328,-0.001137,0.007598531
std,17.175564,0.858778,0.050387,0.248819,0.036405,0.242404,0.039027,0.106795,0.179825,0.007789,...,0.249352,0.20727,0.031037,0.027078,0.004258973,0.007146104,0.07145343,0.028194,0.036585,0.04857814
min,0.0,0.0,-0.04141,0.215418,-0.076971,-2.594136,-0.118015,2.719365,0.773022,0.019696,...,-0.552435,-0.284098,-0.095804,-0.085343,-0.01781963,-0.03773398,-0.2789803,-0.174,-0.151893,-0.2739733
25%,14.5,0.725,0.020252,0.503246,-0.065211,-2.235206,-0.113977,2.752984,0.987434,0.030036,...,-0.25145,-0.029959,0.000238,-0.000837,-2.020576e-07,-1.398623e-07,1.168755e-12,-9e-06,2e-06,-4.790362e-14
50%,29.0,1.45,0.074179,0.81377,-0.030878,-1.969741,-0.095334,2.848559,1.219127,0.039092,...,-0.025411,0.108269,0.000407,-0.000459,-1.312994e-07,-1.393542e-07,2.593247e-08,-9e-06,2e-06,-4.79008e-14
75%,43.5,2.175,0.098396,0.90646,0.011712,-1.903678,-0.055776,2.95009,1.259339,0.039476,...,0.226438,0.338058,0.000846,-0.000117,-1.312994e-07,-1.393542e-07,0.0006545039,-9e-06,2e-06,-4.790079e-14
max,58.0,2.9,0.116594,0.933915,0.017972,-1.847932,-0.006404,3.02621,1.27953,0.039697,...,0.350027,0.511735,0.085759,0.084133,0.003718749,0.01699561,0.226086,0.06401,0.152022,0.1351905


In [None]:
# Summary statistics for actions
print("Actions Statistics:")
actions_df.describe()

## Dataset Overview (All Demos)

In [16]:
# Get overview of all demos
demo_info = []
for key in demo_keys:
    d = f[f'data/{key}']
    demo_info.append({
        'demo': key,
        'episode_length': d['states'].shape[0],
        'state_dim': d['states'].shape[1],
        'action_dim': d['actions'].shape[1],
    })

demo_overview_df = pd.DataFrame(demo_info)
print(f"Total demos: {len(demo_overview_df)}")
print(f"Total timesteps: {demo_overview_df['episode_length'].sum()}")
print(f"\nEpisode length statistics:")
demo_overview_df

Total demos: 200
Total timesteps: 23207

Episode length statistics:


Unnamed: 0,demo,episode_length,state_dim,action_dim
0,demo_0,118,71,7
1,demo_1,118,71,7
2,demo_2,113,71,7
3,demo_3,98,71,7
4,demo_4,102,71,7
...,...,...,...,...
195,demo_195,110,71,7
196,demo_196,114,71,7
197,demo_197,114,71,7
198,demo_198,126,71,7


In [8]:
# Clean up
f.close()