In [1]:
import minari
from torch.utils.data import DataLoader
import torch

import minari
import numpy as np
from typing import Dict


import sys
sys.path.append("..")
from algorithms.utils.wrapper_gym import get_env
from algorithms.utils.dataset import qlearning_dataset


In [2]:
import os
os.environ["MINARI_DATASETS_PATH"] = "/home/luanagbmartins/Documents/CEIA/offline_to_online/CORL/datasets"

In [3]:
import os 
print(os.getenv("MINARI_DATASETS_PATH"))

/home/luanagbmartins/Documents/CEIA/offline_to_online/CORL/datasets


In [4]:
from collections import defaultdict

In [5]:
def discounted_cumsum(x: np.ndarray, gamma: float) -> np.ndarray:
    cumsum = np.zeros_like(x)
    cumsum[-1] = x[-1]
    for t in reversed(range(x.shape[0] - 1)):
        cumsum[t] = x[t] + gamma * cumsum[t + 1]
    return cumsum

In [None]:
dataset = minari.load_dataset("playground/G1JoystickRoughTerrain-expert-v0")

traj, traj_len = [], []

obs = []

data_ = defaultdict(list)
for episode in dataset.iterate_episodes():
    data_["observations"] = episode.observations
    data_["actions"] = episode.actions
    data_["rewards"] = episode.rewards
    data_["terminals"] = episode.terminations | episode.truncations

    episode_data = {k: np.array(v, dtype=np.float32) for k, v in data_.items()}
    # return-to-go if gamma=1.0, just discounted returns else
    episode_data["returns"] = discounted_cumsum(
        episode_data["rewards"], gamma=1.0
    )
    traj.append(episode_data)
    traj_len.append(episode_data["actions"].shape[0])
    # reset trajectory buffer
    data_ = defaultdict(list)

    obs.append(episode.observations)

# needed for normalization, weighted sampling, other stats can be added also
info = {
    "obs_mean": np.concatenate(obs).mean(0, keepdims=True),
    "obs_std": np.concatenate(obs).std(0, keepdims=True) + 1e-6,
    "traj_lens": np.array(traj_len),
}

In [None]:
def qlearning_dataset(dataset: minari.MinariDataset) -> Dict[str, np.ndarray]:
    obs, next_obs, actions, rewards, dones = [], [], [], [], []

    for episode in dataset.iterate_episodes():
        obs.append(episode.observations[:-1].astype(np.float32))
        next_obs.append(episode.observations[1:].astype(np.float32))
        actions.append(episode.actions.astype(np.float32))
        rewards.append(episode.rewards)
        dones.append(episode.terminations | episode.truncations)

    return {
        "observations": np.concatenate(obs),
        "actions": np.concatenate(actions),
        "next_observations": np.concatenate(next_obs),
        "rewards": np.concatenate(rewards),
        "terminals": np.concatenate(dones),
    }


qdataset = qlearning_dataset(dataset)

In [None]:
actions = qdataset["actions"].flatten()
print(f"Min action value: {actions.min()}")
print(f"Max action value: {actions.max()}")

In [None]:
class ReplayBuffer(torch.utils.data.Dataset):
    def __init__(self, dataset_dict):
        self.observations = dataset_dict["observations"]
        self.actions = dataset_dict["actions"]
        self.rewards = dataset_dict["rewards"]
        self.next_observations = dataset_dict["next_observations"]
        self.terminals = dataset_dict["terminals"]
        self.size = len(self.observations)

    def __len__(self):
        return self.size

    def __getitem__(self, idx):
        return [
            torch.from_numpy(self.observations[idx]),
            torch.from_numpy(self.actions[idx]),
            torch.tensor(self.rewards[idx], dtype=torch.float32),
            torch.from_numpy(self.next_observations[idx]),
            torch.tensor(self.terminals[idx], dtype=torch.float32),
        ]


# Create the replay buffer and dataloader
replay_buffer = ReplayBuffer(qdataset)
dataloader = DataLoader(replay_buffer, batch_size=128, shuffle=True)

# Now you can iterate over the dataloader
batch = next(iter(dataloader))

In [None]:
len(batch), batch[0].shape

In [17]:
#!/usr/bin/env python3
"""
Script to analyze the Go1Footstand dataset and calculate appropriate target returns
for Decision Transformer training.
"""

import minari
import numpy as np
from collections import defaultdict

def analyze_dataset(dataset_id):
    """Analyze dataset and calculate return statistics."""
    print(f"Loading dataset: {dataset_id}")
    dataset = minari.load_dataset(dataset_id)
    
    returns = []
    episode_lengths = []
    
    for episode in dataset.iterate_episodes():
        # Calculate episode return
        episode_return = sum(episode.rewards)
        episode_length = len(episode.rewards)
        
        returns.append(episode_return)
        episode_lengths.append(episode_length)
    
    returns = np.array(returns)
    returns = np.nan_to_num(returns)
    episode_lengths = np.array(episode_lengths)

    print(f"\nDataset Statistics for {dataset_id}:")
    print(f"Number of episodes: {len(returns)}")
    print(f"Episode lengths - Mean: {episode_lengths.mean():.1f}, Std: {episode_lengths.std():.1f}")
    print(f"Episode lengths - Min: {episode_lengths.min()}, Max: {episode_lengths.max()}")
    print(f"\nReturn Statistics:")
    print(f"Mean return: {returns.mean():.1f}")
    print(f"Std return: {returns.std():.1f}")
    print(f"Min return: {returns.min():.1f}")
    print(f"Max return: {returns.max():.1f}")
    print(f"25th percentile: {np.percentile(returns, 25):.1f}")
    print(f"50th percentile (median): {np.percentile(returns, 50):.1f}")
    print(f"75th percentile: {np.percentile(returns, 75):.1f}")
    print(f"90th percentile: {np.percentile(returns, 90):.1f}")
    print(f"95th percentile: {np.percentile(returns, 95):.1f}")
    
    # Calculate target returns for DT
    # Typically use high percentile values as target returns
    target_return_high = np.percentile(returns, 95)  # 95th percentile
    target_return_medium = np.percentile(returns, 75)  # 75th percentile
    
    print(f"\nSuggested target_returns for DT:")
    print(f"High target: {target_return_high:.0f}")
    print(f"Medium target: {target_return_medium:.0f}")
    print(f"Config format: target_returns: [{target_return_high:.0f}, {target_return_medium:.0f}]")
    
    return target_return_high, target_return_medium

In [20]:
dataset = "playground/H1InplaceGaitTracking-expert-v0"
analyze_dataset(dataset)

Loading dataset: playground/H1InplaceGaitTracking-expert-v0

Dataset Statistics for playground/H1InplaceGaitTracking-expert-v0:
Number of episodes: 1100
Episode lengths - Mean: 976.0, Std: 98.1
Episode lengths - Min: 320, Max: 1000

Return Statistics:
Mean return: 34.6
Std return: 4.8
Min return: 11.5
Max return: 38.5
25th percentile: 32.9
50th percentile (median): 36.4
75th percentile: 37.5
90th percentile: 38.0
95th percentile: 38.3

Suggested target_returns for DT:
High target: 38
Medium target: 37
Config format: target_returns: [38, 37]


(np.float32(38.337242), np.float32(37.462265))