In [None]:
import h5py
import numpy as np

In [18]:
# ---------------- VARIABLES TO STORE
all_actions = []
all_observations = []
all_rewards_to_go = []
all_time_to_go = []
all_goal_vector = []
# -----------------------------------

## Dataset 1

In [19]:
file_path = "../data/external/diverse_medium_maze_data.hdf5"

with h5py.File(file_path, "r+") as f:
    for key in f.keys():
        # del episode_87["key-id"] removing a thing
        curr_episode = f[key]

        # split observations into the useful and useless observations (1 more observation than action) if not done before
        actions = curr_episode["actions"][:]
        infos = curr_episode["infos"]
        observations = curr_episode["observations"]["observation"][:-1]
        goal_locations = curr_episode["observations"]["desired_goal"][:-1]
        current_locations = curr_episode["observations"]["achieved_goal"][:-1]
        rewards = curr_episode["rewards"][:]
        terminations = curr_episode["terminations"][:]
        truncations = curr_episode["truncations"][:]

        reward_sum = np.sum(rewards)
        rewards_to_go = np.zeros_like(rewards)  # desired reward
        time_to_go = np.zeros_like(rewards)  # desired horizon
        goal_vector = np.zeros_like(goal_locations)  # goal vector
        for i in range(len(rewards)):
            # save the rewards-to-go
            rewards_to_go[i] = reward_sum
            reward_sum -= rewards[i]
            # save the time-to-go
            time_to_go[i] = len(rewards) - i
            # save the goal vector (extra t)
            goal_x, goal_y = goal_locations[i]
            current_x, current_y = current_locations[i]
            goal_vector[i] = np.array([goal_x - current_x, goal_y - current_y])

        if "rewards_to_go" not in curr_episode.keys():
            curr_episode["rewards_to_go"] = rewards_to_go
            curr_episode["time_to_go"] = time_to_go
            curr_episode["goal_vector"] = goal_vector

        # save the stuff
        all_actions.append(actions)
        all_observations.append(observations)
        all_rewards_to_go.append(rewards_to_go)
        all_time_to_go.append(time_to_go)
        all_goal_vector.append(goal_vector)

    print(rewards[-5:], rewards_to_go[-5:], time_to_go[-5:])
    # Dimensions:
    print(f"Keys in {key}:", list(curr_episode.keys()))
    print(f"{key} Action dimensions: {actions.shape}")
    print(f"Observation dimensions: {observations.shape}")
    print(f"Reward dimensions: {rewards.shape}")
    print(f"Termination dimensions: {terminations.shape}")
    print(f"Truncation dimensions: {truncations.shape}")
print(len(all_actions))

[0. 0. 0. 0. 0.] [0. 0. 0. 0. 0.] [5. 4. 3. 2. 1.]
Keys in episode_999: ['actions', 'goal_vector', 'infos', 'observations', 'rewards', 'rewards_to_go', 'terminations', 'time_to_go', 'truncations']
episode_999 Action dimensions: (1000, 8)
Observation dimensions: (1000, 27)
Reward dimensions: (1000,)
Termination dimensions: (1000,)
Truncation dimensions: (1000,)
1000


## Dataset 2

In [20]:
file_path = "../data/external/play_medium_maze_data.hdf5"

with h5py.File(file_path, "r+") as f:
    for key in f.keys():
        # del episode_87["key-id"] removing a thing
        curr_episode = f[key]

        # split observations into the useful and useless observations (1 more observation than action) if not done before
        actions = curr_episode["actions"][:]
        infos = curr_episode["infos"]
        observations = curr_episode["observations"]["observation"][:-1]
        goal_locations = curr_episode["observations"]["desired_goal"][:-1]
        current_locations = curr_episode["observations"]["achieved_goal"][:-1]
        rewards = curr_episode["rewards"][:]
        terminations = curr_episode["terminations"][:]
        truncations = curr_episode["truncations"][:]

        reward_sum = np.sum(rewards)
        rewards_to_go = np.zeros_like(rewards)  # desired reward
        time_to_go = np.zeros_like(rewards)  # desired horizon
        goal_vector = np.zeros_like(goal_locations)  # goal vector
        for i in range(len(rewards)):
            # save the rewards-to-go
            rewards_to_go[i] = reward_sum
            reward_sum -= rewards[i]
            # save the time-to-go
            time_to_go[i] = len(rewards) - i
            # save the goal vector (extra t)
            goal_x, goal_y = goal_locations[i]
            current_x, current_y = current_locations[i]
            goal_vector[i] = np.array([goal_x - current_x, goal_y - current_y])

        if "rewards_to_go" not in curr_episode.keys():
            curr_episode["rewards_to_go"] = rewards_to_go
            curr_episode["time_to_go"] = time_to_go
            curr_episode["goal_vector"] = goal_vector

        # save the stuff
        all_actions.append(actions)
        all_observations.append(observations)
        all_rewards_to_go.append(rewards_to_go)
        all_time_to_go.append(time_to_go)
        all_goal_vector.append(goal_vector)

    print(rewards[-5:], rewards_to_go[-5:], time_to_go[-5:])
    # Dimensions:
    print(f"Keys in {key}:", list(curr_episode.keys()))
    print(f"{key} Action dimensions: {actions.shape}")
    print(f"Observation dimensions: {observations.shape}")
    print(f"Reward dimensions: {rewards.shape}")
    print(f"Termination dimensions: {terminations.shape}")
    print(f"Truncation dimensions: {truncations.shape}")
print(len(all_actions))


[1. 1. 1. 1. 1.] [5. 4. 3. 2. 1.] [5. 4. 3. 2. 1.]
Keys in episode_999: ['actions', 'goal_vector', 'infos', 'observations', 'rewards', 'rewards_to_go', 'terminations', 'time_to_go', 'truncations']
episode_999 Action dimensions: (1000, 8)
Observation dimensions: (1000, 27)
Reward dimensions: (1000,)
Termination dimensions: (1000,)
Truncation dimensions: (1000,)
2000


## Dataset 3

In [21]:
file_path = "../data/external/umaze_diverse_data.hdf5"

with h5py.File(file_path, "r+") as f:
    for key in f.keys():
        # del episode_87["key-id"] removing a thing
        curr_episode = f[key]

        # split observations into the useful and useless observations (1 more observation than action) if not done before
        actions = curr_episode["actions"][:]
        infos = curr_episode["infos"]
        observations = curr_episode["observations"]["observation"][:-1]
        goal_locations = curr_episode["observations"]["desired_goal"][:-1]
        current_locations = curr_episode["observations"]["achieved_goal"][:-1]
        rewards = curr_episode["rewards"][:]
        terminations = curr_episode["terminations"][:]
        truncations = curr_episode["truncations"][:]

        reward_sum = np.sum(rewards)
        rewards_to_go = np.zeros_like(rewards)  # desired reward
        time_to_go = np.zeros_like(rewards)  # desired horizon
        goal_vector = np.zeros_like(goal_locations)  # goal vector
        for i in range(len(rewards)):
            # save the rewards-to-go
            rewards_to_go[i] = reward_sum
            reward_sum -= rewards[i]
            # save the time-to-go
            time_to_go[i] = len(rewards) - i
            # save the goal vector (extra t)
            goal_x, goal_y = goal_locations[i]
            current_x, current_y = current_locations[i]
            goal_vector[i] = np.array([goal_x - current_x, goal_y - current_y])

        if "rewards_to_go" not in curr_episode.keys():
            curr_episode["rewards_to_go"] = rewards_to_go
            curr_episode["time_to_go"] = time_to_go
            curr_episode["goal_vector"] = goal_vector

        # save the stuff
        all_actions.append(actions)
        all_observations.append(observations)
        all_rewards_to_go.append(rewards_to_go)
        all_time_to_go.append(time_to_go)
        all_goal_vector.append(goal_vector)

    print(rewards[-5:], rewards_to_go[-5:], time_to_go[-5:])
    # Dimensions:
    print(f"Keys in {key}:", list(curr_episode.keys()))
    print(f"{key} Action dimensions: {actions.shape}")
    print(f"Observation dimensions: {observations.shape}")
    print(f"Reward dimensions: {rewards.shape}")
    print(f"Termination dimensions: {terminations.shape}")
    print(f"Truncation dimensions: {truncations.shape}")
print(len(all_actions))

[1. 1. 1. 1. 1.] [5. 4. 3. 2. 1.] [5. 4. 3. 2. 1.]
Keys in episode_999: ['actions', 'goal_vector', 'infos', 'observations', 'rewards', 'rewards_to_go', 'terminations', 'time_to_go', 'truncations']
episode_999 Action dimensions: (700, 8)
Observation dimensions: (700, 27)
Reward dimensions: (700,)
Termination dimensions: (700,)
Truncation dimensions: (700,)
3430


## Dataset 4

In [22]:
file_path = "../data/external/umaze_play_data.hdf5"

with h5py.File(file_path, "r+") as f:
    for key in f.keys():
        # del episode_87["key-id"] removing a thing
        curr_episode = f[key]

        # split observations into the useful and useless observations (1 more observation than action) if not done before
        actions = curr_episode["actions"][:]
        infos = curr_episode["infos"]
        observations = curr_episode["observations"]["observation"][:-1]
        goal_locations = curr_episode["observations"]["desired_goal"][:-1]
        current_locations = curr_episode["observations"]["achieved_goal"][:-1]
        rewards = curr_episode["rewards"][:]
        terminations = curr_episode["terminations"][:]
        truncations = curr_episode["truncations"][:]

        reward_sum = np.sum(rewards)
        rewards_to_go = np.zeros_like(rewards)  # desired reward
        time_to_go = np.zeros_like(rewards)  # desired horizon
        goal_vector = np.zeros_like(goal_locations)  # goal vector
        for i in range(len(rewards)):
            # save the rewards-to-go
            rewards_to_go[i] = reward_sum
            reward_sum -= rewards[i]
            # save the time-to-go
            time_to_go[i] = len(rewards) - i
            # save the goal vector (extra t)
            goal_x, goal_y = goal_locations[i]
            current_x, current_y = current_locations[i]
            goal_vector[i] = np.array([goal_x - current_x, goal_y - current_y])

        if "rewards_to_go" not in curr_episode.keys():
            curr_episode["rewards_to_go"] = rewards_to_go
            curr_episode["time_to_go"] = time_to_go
            curr_episode["goal_vector"] = goal_vector

        # save the stuff
        all_actions.append(actions)
        all_observations.append(observations)
        all_rewards_to_go.append(rewards_to_go)
        all_time_to_go.append(time_to_go)
        all_goal_vector.append(goal_vector)

    print(rewards[-5:], rewards_to_go[-5:], time_to_go[-5:])
    # Dimensions:
    print(f"Keys in {key}:", list(curr_episode.keys()))
    print(f"{key} Action dimensions: {actions.shape}")
    print(f"Observation dimensions: {observations.shape}")
    print(f"Reward dimensions: {rewards.shape}")
    print(f"Termination dimensions: {terminations.shape}")
    print(f"Truncation dimensions: {truncations.shape}")
print(len(all_actions))

[1. 1. 1. 1. 1.] [5. 4. 3. 2. 1.] [5. 4. 3. 2. 1.]
Keys in episode_999: ['actions', 'goal_vector', 'infos', 'observations', 'rewards', 'rewards_to_go', 'terminations', 'time_to_go', 'truncations']
episode_999 Action dimensions: (700, 8)
Observation dimensions: (700, 27)
Reward dimensions: (700,)
Termination dimensions: (700,)
Truncation dimensions: (700,)
4860


## Saving the data

In [23]:
# combine the data into single vectors
all_actions_list = np.concatenate(all_actions, axis=0)
all_observations_list = np.concatenate(all_observations, axis=0)
all_rewards_to_go_list = np.concatenate(all_rewards_to_go, axis=0)
all_time_to_go_list = np.concatenate(all_time_to_go, axis=0)
all_goal_vector_list = np.concatenate(all_goal_vector, axis=0)

In [24]:
print(all_actions_list.shape, 
      all_observations_list.shape,
      all_rewards_to_go_list.shape,
      all_time_to_go_list.shape,
      all_goal_vector_list.shape)

(4000000, 8) (4000000, 27) (4000000,) (4000000,) (4000000, 2)


In [25]:
output_file_path = "../data/processed/antmaze_merged_concatenated.hdf5"

In [26]:
with h5py.File(output_file_path, "w") as f:
    group = f.create_group("concatenated_data")
    group.create_dataset("actions", data=all_actions_list)
    group.create_dataset("observations", data=all_observations_list)
    group.create_dataset("rewards_to_go", data=all_rewards_to_go_list)
    group.create_dataset("time_to_go", data=all_time_to_go_list)
    group.create_dataset("goal_vector", data=all_goal_vector_list)
    print(
        f"Concatenated data saved under 'concatenated_data' in {output_file_path}"
    )
    print(f"Actions shape: {all_actions_list.shape}")
    print(f"Observations shape: {all_observations_list.shape}")
    print(f"Rewards_to_go shape: {all_rewards_to_go_list.shape}")
    print(f"Time_to_go shape: {all_time_to_go_list.shape}")
    print(f"Goal_vector shape: {all_goal_vector_list.shape}")


Concatenated data saved under 'concatenated_data' in ../data/processed/antmaze_merged_concatenated.hdf5
Actions shape: (4000000, 8)
Observations shape: (4000000, 27)
Rewards_to_go shape: (4000000,)
Time_to_go shape: (4000000,)
Goal_vector shape: (4000000, 2)
