# Minari EDA

This notebook loads a Minari dataset and explores trajectory statistics for Monte Carlo / MDP analysis.


In [8]:
# Install if needed (run once)
# !pip install minari gymnasium

import minari
import numpy as np

# Choose a dataset ID from: https://minari.farama.org/
# Example: https://minari.farama.org/datasets/mujoco/walker2d/simple-v0/
DATASET_ID = "mujoco/walker2d/simple-v0"


In [9]:
# Optional: list available remote datasets
try:
    remote = minari.list_remote_datasets()
except Exception:
    from minari.storage import hosting

    remote = hosting.list_remote_datasets()

remote_ids = sorted(remote.keys())
print("Remote datasets (sample):")
print("\n".join(remote_ids[:20]))


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get

Remote datasets (sample):
D4RL/antmaze/large-diverse-v1
D4RL/antmaze/large-play-v1
D4RL/antmaze/medium-diverse-v1
D4RL/antmaze/medium-play-v1
D4RL/antmaze/umaze-diverse-v1
D4RL/antmaze/umaze-v1
D4RL/door/cloned-v2
D4RL/door/expert-v2
D4RL/door/human-v2
D4RL/hammer/cloned-v2
D4RL/hammer/expert-v2
D4RL/hammer/human-v2
D4RL/kitchen/complete-v2
D4RL/kitchen/mixed-v2
D4RL/kitchen/partial-v2
D4RL/minigrid/fourrooms-random-v0
D4RL/minigrid/fourrooms-v0
D4RL/pen/cloned-v2
D4RL/pen/expert-v2
D4RL/pen/human-v2


In [10]:
dataset = minari.load_dataset(DATASET_ID, download=True)
print("Dataset:", dataset)
print("Episodes:", len(dataset))

# Peek at first episode
ep0 = dataset[0]
print("Episode type:", type(ep0))
print("Episode attributes:", [a for a in dir(ep0) if not a.startswith("_")][:20])
print("Episode length:", len(ep0.observations))



Downloading mujoco/walker2d/simple-v0 from Farama servers...


Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Fetching 2 files: 100%|██████████| 2/2 [00:20<00:00, 10.43s/it]



Dataset mujoco/walker2d/simple-v0 downloaded to C:\Users\ethan\.minari\datasets\mujoco\walker2d\simple-v0
Dataset: <minari.dataset.minari_dataset.MinariDataset object at 0x000001FAAE462CD0>
Episodes: 1017


AttributeError: 'EpisodeData' object has no attribute 'keys'

In [None]:
# Basic EDA: episode lengths and returns
lengths = []
returns = []

for ep in dataset:
    lengths.append(len(ep.rewards))
    returns.append(np.sum(ep.rewards))

lengths = np.array(lengths)
returns = np.array(returns)

print("Episode length: mean", lengths.mean(), "std", lengths.std())
print("Return: mean", returns.mean(), "std", returns.std())

import matplotlib.pyplot as plt

plt.hist(lengths, bins=30)
plt.title("Episode Length Distribution")
plt.xlabel("length")
plt.ylabel("count")
plt.show()

plt.hist(returns, bins=30)
plt.title("Return Distribution")
plt.xlabel("return")
plt.ylabel("count")
plt.show()


In [None]:
# Monte Carlo: sample episodes and estimate mean return
rng = np.random.default_rng(7)

MC_RUNS = 1000
SAMPLE_SIZE = 50

mc_means = []
for _ in range(MC_RUNS):
    idx = rng.choice(len(returns), size=SAMPLE_SIZE, replace=True)
    mc_means.append(returns[idx].mean())

mc_means = np.array(mc_means)
print("MC mean return:", mc_means.mean())
print("MC std of mean:", mc_means.std(ddof=1))
print("95% interval:", np.percentile(mc_means, [2.5, 97.5]))

plt.hist(mc_means, bins=30)
plt.title("Monte Carlo: Mean Return Distribution")
plt.xlabel("mean return")
plt.ylabel("count")
plt.show()
