In [1]:
# Check if we're running on GPU
import torch
print(f"🔍 CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"🎮 GPU: {torch.cuda.get_device_name(0)}")
    print(f"💾 GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
else:
    print("⚠️ Warning: No GPU detected. Training will be very slow!")
    print("Please change runtime type to GPU in Runtime > Change runtime type")


🔍 CUDA available: True
🎮 GPU: NVIDIA A100-SXM4-40GB
💾 GPU Memory: 42.5 GB


In [2]:
# Clone the Isaac-GR00T repository
print("📂 Cloning Isaac-GR00T repository...")
!git clone https://github.com/NVIDIA/Isaac-GR00T
%cd Isaac-GR00T
print("✅ Repository cloned successfully!")


📂 Cloning Isaac-GR00T repository...
fatal: destination path 'Isaac-GR00T' already exists and is not an empty directory.
/content/Isaac-GR00T
✅ Repository cloned successfully!


In [3]:
# Install dependencies
print("📦 Installing dependencies...")
%pip install --upgrade setuptools
%pip install -e .[base]
%pip install --no-build-isolation flash-attn==2.7.1.post4
%pip install huggingface_hub
%pip install matplotlib
%pip install tyro
%pip install wandb
print("✅ Dependencies installed successfully!")


📦 Installing dependencies...
Obtaining file:///content/Isaac-GR00T
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: gr00t
  Building editable for gr00t (pyproject.toml) ... [?25l[?25hdone
  Created wheel for gr00t: filename=gr00t-1.1.0-0.editable-py3-none-any.whl size=14947 sha256=fb4848049f9dfce640733a0136566bceb9b583646882d5085b04f9ec447c7019
  Stored in directory: /tmp/pip-ephem-wheel-cache-gb3qi5xl/wheels/71/1a/03/e24813e2169a4b7ef7402479b693d45c53fb388a5e0b49bac6
Successfully built gr00t
Installing collected packages: gr00t
  Attempting uninstall: gr00t
    Found existing installation: gr00t 1.1.0
    Uninstalling gr00t-1.1.0:
      Successfully uninstalled gr00t-1.1.0
Successfully installed gr00t-1.1.0
Collecting flash-attn==2.7.1.post

In [4]:
# Setup Weights & Biases (optional)
try:
    from google.colab import userdata
    wandb_api_key = userdata.get('WANDB_API_KEY')

    if wandb_api_key:
        import wandb
        wandb.login(key=wandb_api_key)
        print("✅ Successfully logged into Weights & Biases!")
        use_wandb = True
    else:
        print("⚠️ WANDB_API_KEY not found in secrets. Skipping wandb setup.")
        print("💡 You can add it later in Colab Secrets for experiment tracking.")
        use_wandb = False
except Exception as e:
    print(f"⚠️ Could not setup wandb: {e}")
    print("💡 Training will continue without wandb tracking.")
    use_wandb = False

# Setup Hugging Face Hub (optional)
try:
    from huggingface_hub import login
    hf_token = userdata.get('HF_TOKEN')

    if hf_token:
        login(token=hf_token)
        print("✅ Successfully logged into Hugging Face Hub!")
        use_hf_hub = True
    else:
        print("⚠️ HUGGINGFACE_TOKEN not found in secrets. Skipping HF Hub setup.")
        print("💡 You can add it later in Colab Secrets for model sharing.")
        use_hf_hub = False
except Exception as e:
    print(f"⚠️ Could not setup Hugging Face Hub: {e}")
    print("💡 Model will not be automatically pushed to Hugging Face.")
    use_hf_hub = False


[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


✅ Successfully logged into Weights & Biases!
✅ Successfully logged into Hugging Face Hub!


In [5]:
## Step 3: 📥 Dataset Preparation

# Create directory for datasets
!mkdir -p demo_data
print("📁 Created demo_data directory")

# Download a sample dataset from Hugging Face
# You can replace this with your own dataset
print("⬇️ Downloading dataset from Hugging Face...")
!huggingface-cli download \
    --repo-type dataset HelloCephalopod/block_pickup_17 \
    --local-dir ./demo_data/block_pickup_17
print("✅ Dataset downloaded successfully!")


📁 Created demo_data directory
⬇️ Downloading dataset from Hugging Face...
Fetching 106 files:   0% 0/106 [00:00<?, ?it/s]Downloading 'data/chunk-000/episode_000004.parquet' to 'demo_data/block_pickup_17/.cache/huggingface/download/data/chunk-000/jFtolRe5Vvw8HkW1jrJcK1IP93I=.371ff5c8fdfb790d11caf11e9c25c3e6d0f7347b1bd7493ff924568d5e83bd2b.incomplete'
Downloading 'data/chunk-000/episode_000000.parquet' to 'demo_data/block_pickup_17/.cache/huggingface/download/data/chunk-000/cNsBuwyxGLgaOECPx2lW57L780o=.e00ae6d49a8ae970999fef57389bfc3166de2e037f7574147242744a618bf886.incomplete'
Downloading '.gitattributes' to 'demo_data/block_pickup_17/.cache/huggingface/download/wPaCkH-WbT7GsmxMKKrNZTV4nSM=.1ef325f1b111266a6b26e0196871bd78baa8c2f3.incomplete'
Downloading 'data/chunk-000/episode_000003.parquet' to 'demo_data/block_pickup_17/.cache/huggingface/download/data/chunk-000/I4KmEhnhp7DGsGV1cF9fTGwH1Z8=.ad2f2e274c67812c8e162262e8099f0d0a583e2ff19880147aa7517fa2281c6c.incomplete'
Downloading 'data

In [6]:
# Copy the single camera modality configuration
print("⚙️ Setting up single camera modality configuration...")
import json
so100_modality_config = {
    "state": {
        "single_arm": {
            "start": 0,
            "end": 5
        },
        "gripper": {
            "start": 5,
            "end": 6
        }
    },
    "action": {
        "single_arm": {
            "start": 0,
            "end": 5
        },
        "gripper": {
            "start": 5,
            "end": 6
        }
    },
    "video": {
        "webcam": {
            "original_key": "observation.images.front"
        }
    },
    "annotation": {
        "human.task_description": {
            "original_key": "task_index"
        }
    }
}

with open('./demo_data/block_pickup_17/meta/modality.json', 'w') as f:
    json.dump(so100_modality_config, f, indent=2)

print("✅ Single camera modality configuration set!")


⚙️ Setting up single camera modality configuration...
✅ Single camera modality configuration set!


In [7]:
# Add the gr00t module to Python path
import sys
sys.path.append('.')

from gr00t.data.dataset import LeRobotSingleDataset
from gr00t.data.embodiment_tags import EmbodimentTag
from gr00t.experiment.data_config import DATA_CONFIG_MAP
import matplotlib.pyplot as plt
import numpy as np
print("✅ Modules imported successfully!")


  check_for_updates()
`use_fast` is set to `True` but the image processor class does not have a fast version.  Falling back to the slow version.


✅ Modules imported successfully!


In [8]:
# Load the dataset
print("📊 Loading dataset...")
dataset_path = "./demo_data/block_pickup_17"
embodiment_tag = EmbodimentTag("new_embodiment")

# Get data configuration for single camera setup
data_config_cls = DATA_CONFIG_MAP["so100"]
modality_configs = data_config_cls.modality_config()
transforms = data_config_cls.transform()

# Create dataset
dataset = LeRobotSingleDataset(
    dataset_path=dataset_path,
    modality_configs=modality_configs,
    transforms=transforms,
    embodiment_tag=embodiment_tag,
    video_backend="torchvision_av",
)

print(f"✅ Dataset loaded successfully!")
print(f"📈 Number of episodes: {len(dataset)}")


📊 Loading dataset...
Failed to load dataset statistics: [Errno 2] No such file or directory: 'demo_data/block_pickup_17/meta/stats.json'
Calculating dataset statistics for block_pickup_17


Collecting all parquet files...: 100%|██████████| 50/50 [00:00<00:00, 301.07it/s]


Computing statistics for action...
Computing statistics for observation.state...
Computing statistics for timestamp...
Computing statistics for frame_index...
Computing statistics for episode_index...
Computing statistics for index...
Computing statistics for task_index...
Initialized dataset block_pickup_17 with EmbodimentTag.NEW_EMBODIMENT
✅ Dataset loaded successfully!
📈 Number of episodes: 22482


In [9]:
# Visualize a sample from the dataset
print("🎬 Visualizing dataset sample...")
sample = dataset[0]
print("🔑 Sample keys:", list(sample.keys()))

# Display video frames
if 'video.webcam' in sample:
    video_frames = sample['video.webcam']
    print(f"📹 Video shape: {video_frames.shape}")

    # Show first few frames
    fig, axes = plt.subplots(1, min(5, video_frames.shape[0]), figsize=(15, 3))
    if video_frames.shape[0] == 1:
        axes = [axes]

    for i, ax in enumerate(axes[:min(5, video_frames.shape[0])]):
        frame = video_frames[i].numpy()
        # Convert from CxHxW to HxWxC
        frame = np.transpose(frame, (1, 2, 0))
        # Normalize to 0-1 range
        frame = (frame - frame.min()) / (frame.max() - frame.min())
        ax.imshow(frame)
        ax.set_title(f'Frame {i}')
        ax.axis('off')

    plt.tight_layout()
    plt.show()

# Display state and action information
if 'state.single_arm' in sample:
    print(f"🦾 State shape: {sample['state.single_arm'].shape}")
    print(f"📊 State values: {sample['state.single_arm'][0]}")

if 'action.single_arm' in sample:
    print(f"🎯 Action shape: {sample['action.single_arm'].shape}")
    print(f"📊 Action values: {sample['action.single_arm'][0]}")

if 'annotation.human.task_description' in sample:
    print(f"📝 Task description: {sample['annotation.human.task_description']}")


🎬 Visualizing dataset sample...
🔑 Sample keys: ['state', 'state_mask', 'segmentation_target', 'segmentation_target_mask', 'has_real_action', 'action', 'action_mask', 'eagle_content', 'embodiment_id']


In [10]:
# Create output directory for checkpoints
!mkdir -p ./checkpoints
print("📁 Created checkpoints directory")

checkpoint_dirs = 'checkpoints/'


📁 Created checkpoints directory


In [11]:
# Fine-tuning configuration
import subprocess
import os

# Set environment variables for better performance
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ['TOKENIZERS_PARALLELISM'] = 'false'

# Configure wandb reporting based on setup
report_to = 'wandb' if use_wandb else 'none'

# Fine-tuning command optimized for Colab
cmd = [
    'python', 'scripts/gr00t_finetune.py',
    '--dataset-path', './demo_data/block_pickup_17',
    '--output-dir', './checkpoints',
    '--max-steps', '40000',
    '--data-config', 'so100',
    '--video-backend', 'torchvision_av',
    '--batch-size', '16',
    '--save-steps', '10000',
    '--learning-rate', '1e-4',
    '--report-to', report_to  # Use wandb if available, otherwise none
]

print("🎯 Fine-tuning command configured:")
print(' '.join(cmd))
print("\n⚙️ Configuration optimized for:")
print("   • Single camera setup (so100 config)")
print("   • Colab GPU memory constraints")
print("   • Reduced training steps for demo")
print(f"   • Experiment tracking: {'Weights & Biases' if use_wandb else 'Disabled'}")


🎯 Fine-tuning command configured:
python scripts/gr00t_finetune.py --dataset-path ./demo_data/block_pickup_17 --output-dir ./checkpoints --max-steps 40000 --data-config so100 --video-backend torchvision_av --batch-size 16 --save-steps 10000 --learning-rate 1e-4 --report-to wandb

⚙️ Configuration optimized for:
   • Single camera setup (so100 config)
   • Colab GPU memory constraints
   • Reduced training steps for demo
   • Experiment tracking: Weights & Biases


In [12]:
# Start fine-tuning
print("🚀 Starting fine-tuning...")
print("⏳ This may take a while. You can monitor progress in the output below.")
print("☕ Time for a coffee break!\n")

try:
    result = subprocess.run(cmd, capture_output=True, text=True, check=True)
    print("🎉 Fine-tuning completed successfully!")
    print("📊 Output:", result.stdout)
except subprocess.CalledProcessError as e:
    print(f"❌ Fine-tuning failed with error: {e}")
    print(f"🔍 Error output: {e.stderr}")

🚀 Starting fine-tuning...
⏳ This may take a while. You can monitor progress in the output below.
☕ Time for a coffee break!

🎉 Fine-tuning completed successfully!
📊 Output: 
GR00T FINE-TUNING CONFIGURATION:
dataset_path: ['./demo_data/block_pickup_17']
output_dir: ./checkpoints
data_config: so100
batch_size: 16
max_steps: 40000
num_gpus: 1
save_steps: 10000
base_model_path: nvidia/GR00T-N1.5-3B
tune_llm: False
tune_visual: False
tune_projector: True
tune_diffusion_model: True
resume: False
learning_rate: 0.0001
weight_decay: 1e-05
warmup_ratio: 0.05
lora_rank: 0
lora_alpha: 16
lora_dropout: 0.1
lora_full_model: False
dataloader_num_workers: 8
report_to: wandb
embodiment_tag: new_embodiment
video_backend: torchvision_av
balance_dataset_weights: True
balance_trajectory_weights: True

Using 1 GPUs
Initialized dataset block_pickup_17 with EmbodimentTag.NEW_EMBODIMENT
Loading pretrained dual brain from nvidia/GR00T-N1.5-3B
Tune backbone vision tower: False
Tune backbone LLM: False
Tune acti

In [13]:
# Check if checkpoints were created
print("📁 Checking for checkpoints...")
!ls -la ./checkpoints/


📁 Checking for checkpoints...
total 7408924
drwxr-xr-x  8 root root       4096 Jul  1 02:10 .
drwxr-xr-x 14 root root       4096 Jun 30 22:22 ..
drwxr-xr-x  3 root root       4096 Jun 30 23:19 checkpoint-10000
drwxr-xr-x  3 root root       4096 Jul  1 00:16 checkpoint-20000
drwxr-xr-x  3 root root       4096 Jul  1 01:13 checkpoint-30000
drwxr-xr-x  3 root root       4096 Jul  1 02:10 checkpoint-40000
-rw-r--r--  1 root root       1706 Jul  1 02:10 config.json
drwxr-xr-x  2 root root       4096 Jun 30 22:22 experiment_cfg
-rw-r--r--  1 root root 4999367032 Jul  1 02:10 model-00001-of-00002.safetensors
-rw-r--r--  1 root root 2586508600 Jul  1 02:10 model-00002-of-00002.safetensors
-rw-r--r--  1 root root     104530 Jul  1 02:10 model.safetensors.index.json
drwxr-xr-x  2 root root       4096 Jun 30 22:22 runs
-rw-r--r--  1 root root     698012 Jul  1 02:10 trainer_state.json
-rw-r--r--  1 root root       5304 Jul  1 02:10 training_args.bin


In [16]:
# Push fine-tuned model to Hugging Face Hub
latest_checkpoint = './checkpoints/checkpoint-40000'
print(f"Latest checkpoint: {latest_checkpoint}")

if use_hf_hub:
    # Get user input for model repository name
    import getpass
    print("🏷️ Model Repository Setup")
    print("Enter details for your Hugging Face model repository:")

    # You can customize these or make them interactive
    default_username = "HelloCephalopod"  # Users should replace this
    default_model_name = "gr00t-n1.5-so100-finetuned"

    print(f"📝 Suggested repository name: {default_username}/{default_model_name}")
    print("💡 You can change this in the code above if needed")

    repo_name = f"{default_username}/{default_model_name}"

    try:
        from huggingface_hub import HfApi, create_repo

        # Create repository if it doesn't exist
        print(f"📁 Creating repository: {repo_name}")
        create_repo(repo_id=repo_name, exist_ok=True, private=False)

        # Upload the model files
        api = HfApi()
        print(f"⬆️ Uploading model files from {latest_checkpoint}...")

        # Upload all files in the checkpoint directory
        api.upload_folder(
            folder_path=latest_checkpoint,
            repo_id=repo_name,
            repo_type="model",
            commit_message=f"Fine-tuned GR00T N1.5 model on SO-101 dataset"
        )

        # Create a model card
        model_card_content = f"""---
license: apache-2.0
base_model: nvidia/GR00T-N1.5-3B
tags:
- robotics
- gr00t
- fine-tuned
- so-100
- single-camera
library_name: transformers
---

# GR00T N1.5 Fine-tuned on SO-100 Dataset

This model is a fine-tuned version of [nvidia/GR00T-N1.5-3B](https://huggingface.co/nvidia/GR00T-N1.5-3B) on a SO-101 robot arm dataset with single camera setup.

## Model Details

- **Base Model**: nvidia/GR00T-N1.5-3B
- **Fine-tuned on**: SO-100 pick and place
- **Camera Setup**: Single camera (webcam)
- **Embodiment**: SO-100 robot arm
- **Training Steps**: 1000 (demo configuration)

## Usage

```python
from gr00t.model.gr00t_n1 import GR00T_N1_5

# Load the fine-tuned model
model = GR00T_N1_5.from_pretrained("{repo_name}")

# Use for inference on SO-100 robot
# ... your inference code here
```

## Training Details

- **Training Data**: SO-100 pick and place block task
- **Training Steps**: 1000
- **Batch Size**: 8
- **Learning Rate**: 1e-4
- **Hardware**: Google Colab GPU

## Intended Use

This model is intended for research and educational purposes with SO-101 robot arms performing table cleanup tasks.

## Limitations

- Trained on a small dataset (demo purposes)
- Single camera setup only
- Specific to SO-100 embodiment

## Citation

If you use this model, please cite the original GR00T paper and acknowledge the fine-tuning work.
"""

        # Upload model card
        api.upload_file(
            path_or_fileobj=model_card_content.encode(),
            path_in_repo="README.md",
            repo_id=repo_name,
            repo_type="model",
            commit_message="Add model card"
        )

        print(f"🎉 Successfully uploaded model to: https://huggingface.co/{repo_name}")
        print(f"🔗 Model URL: https://huggingface.co/{repo_name}")
        print("✅ Model card created with training details")

    except Exception as e:
        print(f"❌ Failed to upload to Hugging Face: {e}")
        print("💡 You can manually upload the model files later")

elif not use_hf_hub:
    print("⚠️ Hugging Face Hub not configured. Skipping model upload.")
    print("💡 Set up HUGGINGFACE_TOKEN in Colab Secrets to enable automatic uploads")
elif not checkpoint_dirs:
    print("❌ No model checkpoints found to upload.")
else:
    print("ℹ️ Model upload skipped.")


Latest checkpoint: ./checkpoints/checkpoint-40000
🏷️ Model Repository Setup
Enter details for your Hugging Face model repository:
📝 Suggested repository name: HelloCephalopod/gr00t-n1.5-so100-finetuned
💡 You can change this in the code above if needed
📁 Creating repository: HelloCephalopod/gr00t-n1.5-so100-finetuned
⬆️ Uploading model files from ./checkpoints/checkpoint-40000...


Upload 5 LFS files:   0%|          | 0/5 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.59G [00:00<?, ?B/s]

rng_state.pth:   0%|          | 0.00/14.2k [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

optimizer.pt:   0%|          | 0.00/8.55G [00:00<?, ?B/s]

scheduler.pt:   0%|          | 0.00/1.06k [00:00<?, ?B/s]

🎉 Successfully uploaded model to: https://huggingface.co/HelloCephalopod/gr00t-n1.5-so100-finetuned
🔗 Model URL: https://huggingface.co/HelloCephalopod/gr00t-n1.5-so100-finetuned
✅ Model card created with training details
