In [None]:
import os
os.chdir('/mnt/newssd/lerobot')
!ls

In [None]:
import lerobot
print(lerobot.available_datasets)

In [None]:
!pip install humanize
!sudo apt-get update && sudo apt-get install tree

In [None]:
#Download aloha static datasets
from huggingface_hub import snapshot_download
import os

# Target directory
base_output_dir = "/mnt/newssd/lerobot/lerobot/datasets"
os.makedirs(base_output_dir, exist_ok=True)

# Dataset list
dataset_names = [
    "aloha_static_candy",
    "aloha_static_coffee",
    "aloha_static_coffee_new",
    "aloha_static_cups_open",
    "aloha_static_fork_pick_up",
    "aloha_static_pingpong_test",
    "aloha_static_pro_pencil",
    "aloha_static_screw_driver",
    "aloha_static_tape",
    "aloha_static_thread_velcro",
    "aloha_static_towel",
    "aloha_static_vinh_cup",
    "aloha_static_vinh_cup_left",
    "aloha_static_ziploc_slide"
]

# Loop through each dataset and download
for name in dataset_names:
    repo_id = f"lerobot/{name}"
    local_dir = os.path.join(base_output_dir, name)
    print(f"📥 Downloading {repo_id} to {local_dir}...")
    try:
        snapshot_download(repo_id=repo_id, repo_type="dataset", local_dir=local_dir, local_dir_use_symlinks=False)
        print(f"✅ Done: {name}")
    except Exception as e:
        print(f"❌ Failed to download {name}: {e}")

In [None]:
snapshot_download(
    repo_id="Qipei/Task_PicupBrick_50fps_with_mountingpoints",
    repo_type="dataset",
    local_dir="/mnt/newssd/lerobot/lerobot/datasets/Task_PicupBrick_50fps_with_mountingpoints",
)

In [None]:
import pandas as pd
import glob

files = sorted(glob.glob('/mnt/newssd/lerobot/lerobot/datasets/Task_PicupBrick_50fps_with_mountingpoints_cotraining_test/data/chunk-000/*.parquet'))
for i, f in enumerate(files[:50]):  # 只对前50集补列
    df = pd.read_parquet(f)
    if 'next.done' not in df.columns:
        df['next.done'] = False
    df.to_parquet(f, index=False)

In [None]:
import pandas as pd
import numpy as np
import glob
import os

# 你的parquet文件路径
parquet_dir = "/mnt/newssd/lerobot/lerobot/datasets/Task_PicupBrick_50fps_with_mountingpoints_cotraining_test/data/chunk-000/"
all_files = sorted(glob.glob(os.path.join(parquet_dir, "episode_*.parquet")))

# 取前50和后50
first_50 = all_files[:50]
last_50 = all_files[-50:]

def analyze(files, name):
    print(f"== {name} ==")
    columns_set = set()
    dtype_dict = {}
    action_shapes = []
    state_shapes = []
    action_means = []
    state_means = []
    for f in files:
        df = pd.read_parquet(f)
        columns_set.update(df.columns)
        for col in df.columns:
            dtype_dict.setdefault(col, set()).add(df[col].dtype)
        # 统计 action/state shape
        if 'action' in df.columns:
            action_shapes += [np.array(x).shape for x in df['action']]
            # 也可以统计均值
            action_means.append(np.mean([np.mean(x) for x in df['action'] if isinstance(x, (list, np.ndarray))]))
        if 'observation.state' in df.columns:
            state_shapes += [np.array(x).shape for x in df['observation.state']]
            state_means.append(np.mean([np.mean(x) for x in df['observation.state'] if isinstance(x, (list, np.ndarray))]))
    print("字段：", columns_set)
    print("各字段dtype：", {k: list(v) for k, v in dtype_dict.items()})
    if action_shapes:
        print("action shape统计：", set(action_shapes))
        print("action 均值均值：", np.mean(action_means) if action_means else "N/A")
    if state_shapes:
        print("observation.state shape统计：", set(state_shapes))
        print("state 均值均值：", np.mean(state_means) if state_means else "N/A")
    print()

analyze(first_50, "前50集")
analyze(last_50, "后50集")

In [None]:
!python lerobot/scripts/train.py \
  --config_path='/mnt/newssd/lerobot/outputs/train/act_Task_PicupBrick_50fps_with_mountingpoints_cotraining/checkpoints/last/pretrained_model/train_config.json' \
  --resume=true \
  --steps=400000

In [None]:
import os
import pandas as pd

base_dir = "/mnt/newssd/lerobot/lerobot/datasets/Task_PicupBrick_50fps_with_mountingpoints_aug900_with_static/data"

for chunk in os.listdir(base_dir):
    chunk_path = os.path.join(base_dir, chunk)
    if not os.path.isdir(chunk_path):
        continue
    for file in os.listdir(chunk_path):
        if file.endswith(".parquet"):
            file_path = os.path.join(chunk_path, file)
            df = pd.read_parquet(file_path)
            cols_to_drop = []
            for col in ["next.done", "observation.effort"]:
                if col in df.columns:
                    cols_to_drop.append(col)
            if cols_to_drop:
                print(f"Fixing {file_path} (removing {cols_to_drop})")
                df = df.drop(columns=cols_to_drop)
                df.to_parquet(file_path, index=False)

In [None]:
import os
import pandas as pd

# Directory containing all chunks (change as needed)
base_dir = "/mnt/newssd/lerobot/lerobot/datasets/Task_PicupBrick_50fps_with_mountingpoints_aug900_with_static/data"
DESIRED_LEN = 20


In [35]:
#remove the no need column
import os
import pandas as pd

dir_path = '/mnt/newssd/lerobot/lerobot/datasets/Task_PicupBrick_50fps_with_mountingpoints_cotrain/data/chunk-000'

for fname in os.listdir(dir_path):
    if fname.endswith('.parquet'):
        fpath = os.path.join(dir_path, fname)
        df = pd.read_parquet(fpath)
        if 'observation.effort' in df.columns:
            print('Dropping observation.effort from:', fname)
            df = df.drop(columns=['observation.effort'])
            df.to_parquet(fpath)

Dropping observation.effort from: episode_000587.parquet
Dropping observation.effort from: episode_000535.parquet
Dropping observation.effort from: episode_000265.parquet
Dropping observation.effort from: episode_000430.parquet
Dropping observation.effort from: episode_000195.parquet
Dropping observation.effort from: episode_000173.parquet
Dropping observation.effort from: episode_000271.parquet
Dropping observation.effort from: episode_000435.parquet
Dropping observation.effort from: episode_000486.parquet
Dropping observation.effort from: episode_000124.parquet
Dropping observation.effort from: episode_000506.parquet
Dropping observation.effort from: episode_000406.parquet
Dropping observation.effort from: episode_000494.parquet
Dropping observation.effort from: episode_000132.parquet
Dropping observation.effort from: episode_000321.parquet
Dropping observation.effort from: episode_000442.parquet
Dropping observation.effort from: episode_000543.parquet
Dropping observation.effort fro

In [34]:
import os
import pandas as pd

dir_path = '/mnt/newssd/lerobot/lerobot/datasets/Task_PicupBrick_50fps_with_mountingpoints_cotrain/data/chunk-000'
expected = set([
    'action',
    'observation.state',
    'timestamp',
    'frame_index',
    'episode_index',
    'index',
    'task_index'
])

for fname in os.listdir(dir_path):
    if fname.endswith('.parquet'):
        fpath = os.path.join(dir_path, fname)
        df = pd.read_parquet(fpath)
        extra = set(df.columns) - expected
        missing = expected - set(df.columns)
        if extra:
            print(f"{fname} has extra columns: {extra}")
        if missing:
            print(f"{fname} is missing columns: {missing}")

episode_000587.parquet has extra columns: {'observation.effort'}
episode_000535.parquet has extra columns: {'observation.effort'}
episode_000265.parquet has extra columns: {'observation.effort'}
episode_000430.parquet has extra columns: {'observation.effort'}
episode_000195.parquet has extra columns: {'observation.effort'}
episode_000173.parquet has extra columns: {'observation.effort'}
episode_000271.parquet has extra columns: {'observation.effort'}
episode_000435.parquet has extra columns: {'observation.effort'}
episode_000486.parquet has extra columns: {'observation.effort'}
episode_000124.parquet has extra columns: {'observation.effort'}
episode_000506.parquet has extra columns: {'observation.effort'}
episode_000406.parquet has extra columns: {'observation.effort'}
episode_000494.parquet has extra columns: {'observation.effort'}
episode_000132.parquet has extra columns: {'observation.effort'}
episode_000321.parquet has extra columns: {'observation.effort'}
episode_000442.parquet ha

In [36]:
!python lerobot/scripts/train.py \
  --dataset.repo_id=local \
  --dataset.root=/mnt/newssd/lerobot/lerobot/datasets/Task_PicupBrick_50fps_with_mountingpoints_cotrain \
  --output_dir=outputs/train/act_Task_PicupBrick_50fps_with_mountingpoints_cotrain \
  --policy.type=act \
  --policy.device=cuda \
  --batch_size=8 \
  --steps=200000 \
  --save_freq=20000 \
  --wandb.enable=false \
  --policy.chunk_size=75 \
  --policy.n_action_steps=75 \
  --policy.n_encoder_layers=4 \
  --policy.n_decoder_layers=7 \
  --policy.dim_feedforward=3200 \
  --policy.dim_model=512 \
  --policy.n_heads=8 \
  --policy.dropout=0.1 \
  --optimizer.lr=1e-5 \
  --seed=1000

INFO 2025-05-22 14:50:33 ts/train.py:111 {'batch_size': 8,
 'dataset': {'episodes': None,
             'image_transforms': {'enable': False,
                                  'max_num_transforms': 3,
                                  'random_order': False,
                                  'tfs': {'brightness': {'kwargs': {'brightness': [0.8,
                                                                                   1.2]},
                                                         'type': 'ColorJitter',
                                                         'weight': 1.0},
                                          'contrast': {'kwargs': {'contrast': [0.8,
                                                                               1.2]},
                                                       'type': 'ColorJitter',
                                                       'weight': 1.0},
                                          'hue': {'kwargs': {'hue': [-0.05,
                 

In [None]:
!python lerobot/scripts/cotrain.py \
  --dataset.repo_id=local \
  --dataset.root=/mnt/newssd/lerobot/lerobot/datasets/Task_PicupBrick_50fps_with_mountingpoints_cotrain \
  --output_dir=outputs/train/act_Task_PicupBrick_50fps_with_mountingpoints_cotrain_692data \
  --policy.type=act \
  --policy.device=cuda \
  --batch_size=8 \
  --job_name=act_692_cotrain\
  --steps=400000 \
  --save_freq=20000 \
  --wandb.enable=true \
  --policy.chunk_size=75 \
  --policy.n_action_steps=75 \
  --policy.n_encoder_layers=4 \
  --policy.n_decoder_layers=7 \
  --policy.dim_feedforward=3200 \
  --policy.dim_model=512 \
  --policy.n_heads=8 \
  --policy.dropout=0.1 \
  --optimizer.lr=1e-5 \
  --seed=1000

INFO 2025-05-22 15:25:05 /cotrain.py:112 {'batch_size': 8,
 'dataset': {'episodes': None,
             'image_transforms': {'enable': False,
                                  'max_num_transforms': 3,
                                  'random_order': False,
                                  'tfs': {'brightness': {'kwargs': {'brightness': [0.8,
                                                                                   1.2]},
                                                         'type': 'ColorJitter',
                                                         'weight': 1.0},
                                          'contrast': {'kwargs': {'contrast': [0.8,
                                                                               1.2]},
                                                       'type': 'ColorJitter',
                                                       'weight': 1.0},
                                          'hue': {'kwargs': {'hue': [-0.05,
                 