In [1]:
import os
import pickle as pkl
import h5py
import numpy as np

In [2]:
def convert_pkl_folder_to_d4rl(input_folder, output_folder):
    try:
        # Check if input folder exists
        if not os.path.exists(input_folder):
            print(f"Input folder '{input_folder}' does not exist.")
            return

        # Create the output folder if it doesn't exist
        os.makedirs(output_folder, exist_ok=True)

        # List all .pkl files in the input folder
        pkl_files = [f for f in os.listdir(input_folder) if f.endswith(".pkl")]
        if not pkl_files:
            print(f"No .pkl files found in {input_folder}.")
            return

        for pkl_file in pkl_files:
            input_pkl_path = os.path.join(input_folder, pkl_file)
            output_h5_path = os.path.join(
                output_folder, pkl_file.replace(".pkl", ".h5")
            )

            # Load the .pkl file
            with open(input_pkl_path, "rb") as file:
                data = pkl.load(file)

            # Ensure the data is a dictionary and contains the required keys
            if not isinstance(data, dict):
                print(
                    f"Skipping {input_pkl_path}, expected a dictionary but got {type(data)}"
                )
                continue

            required_keys = ["observations", "actions", "rewards", "terminals"]
            for key in required_keys:
                if key not in data:
                    print(f"Skipping {input_pkl_path}, missing required key: {key}")
                    continue

            # Create the HDF5 file for D4RL format
            with h5py.File(output_h5_path, "w") as f:
                # Store observations, actions, rewards, and terminals
                f.create_dataset("observations", data=np.array(data["observations"]))
                f.create_dataset("actions", data=np.array(data["actions"]))
                f.create_dataset("rewards", data=np.array(data["rewards"]))
                f.create_dataset("terminals", data=np.array(data["terminals"]))

                # Add additional metadata if needed (D4RL uses 'infos' for additional information)
                if "infos" in data:
                    f.create_dataset("infos", data=np.array(data["infos"]))

                # Optionally, store episode lengths (D4RL also uses 'episode_lengths' for the lengths of episodes)
                if "episode_lengths" in data:
                    f.create_dataset(
                        "episode_lengths", data=np.array(data["episode_lengths"])
                    )

            print(
                f"Converted {input_pkl_path} to D4RL format and saved as {output_h5_path}"
            )

    except Exception as e:
        print(f"Error during conversion: {e}")

In [None]:
input_folder = "Data"  # Folder containing .pkl files
output_folder = "Training"  # Folder to save D4RL-compatible .h5 files
convert_pkl_folder_to_d4rl(input_folder, output_folder)