In [None]:
# Check if we're running on GPU
import torch
print(f"🔍 CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"🎮 GPU: {torch.cuda.get_device_name(0)}")
    print(f"💾 GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
else:
    print("⚠️ Warning: No GPU detected. Training will be very slow!")
    print("Please change runtime type to GPU in Runtime > Change runtime type")


In [None]:
# Clone the Isaac-GR00T repository
print("📂 Cloning Isaac-GR00T repository...")
!git clone https://github.com/NVIDIA/Isaac-GR00T
%cd Isaac-GR00T
print("✅ Repository cloned successfully!")


In [None]:
# Install dependencies
print("📦 Installing dependencies...")
%pip install --upgrade setuptools
%pip install -e .[base]
%pip install --no-build-isolation flash-attn==2.7.1.post4
%pip install huggingface_hub
%pip install matplotlib
%pip install tyro
%pip install wandb
print("✅ Dependencies installed successfully!")


In [None]:
# Setup Weights & Biases (optional)
try:
    from google.colab import userdata
    wandb_api_key = userdata.get('WANDB_API_KEY')

    if wandb_api_key:
        import wandb
        wandb.login(key=wandb_api_key)
        print("✅ Successfully logged into Weights & Biases!")
        use_wandb = True
    else:
        print("⚠️ WANDB_API_KEY not found in secrets. Skipping wandb setup.")
        print("💡 You can add it later in Colab Secrets for experiment tracking.")
        use_wandb = False
except Exception as e:
    print(f"⚠️ Could not setup wandb: {e}")
    print("💡 Training will continue without wandb tracking.")
    use_wandb = False

# Setup Hugging Face Hub (optional)
try:
    from huggingface_hub import login
    hf_token = userdata.get('HF_TOKEN')

    if hf_token:
        login(token=hf_token)
        print("✅ Successfully logged into Hugging Face Hub!")
        use_hf_hub = True
    else:
        print("⚠️ HUGGINGFACE_TOKEN not found in secrets. Skipping HF Hub setup.")
        print("💡 You can add it later in Colab Secrets for model sharing.")
        use_hf_hub = False
except Exception as e:
    print(f"⚠️ Could not setup Hugging Face Hub: {e}")
    print("💡 Model will not be automatically pushed to Hugging Face.")
    use_hf_hub = False


In [None]:
## Step 3: 📥 Dataset Preparation

# Create directory for datasets
!mkdir -p demo_data
print("📁 Created demo_data directory")

# Download a sample dataset from Hugging Face
# You can replace this with your own dataset
print("⬇️ Downloading dataset from Hugging Face...")
!huggingface-cli download \
    --repo-type dataset HelloCephalopod/block_pickup_17 \
    --local-dir ./demo_data/block_pickup_17
print("✅ Dataset downloaded successfully!")


In [None]:
# Copy the single camera modality configuration
print("⚙️ Setting up single camera modality configuration...")
!cp getting_started/examples/so100__modality.json ./demo_data/so101-table-cleanup/meta/modality.json
print("✅ Modality configuration set up!")


In [None]:
# Let's examine the dataset structure
print("🔍 Dataset structure:")
!ls -la ./demo_data/block_pickup_17


In [None]:
# Check the modality configuration
import json
with open('./demo_data/block_pickup_17/meta/modality.json', 'r') as f:
    modality_config = json.load(f)
print("⚙️ Modality Configuration:")
print(json.dumps(modality_config, indent=2))


In [None]:
# Add the gr00t module to Python path
import sys
sys.path.append('.')

from gr00t.data.dataset import LeRobotSingleDataset
from gr00t.data.embodiment_tags import EmbodimentTag
from gr00t.experiment.data_config import DATA_CONFIG_MAP
import matplotlib.pyplot as plt
import numpy as np
print("✅ Modules imported successfully!")


In [None]:
# Load the dataset
print("📊 Loading dataset...")
dataset_path = "./demo_data/block_pickup_17"
embodiment_tag = EmbodimentTag("new_embodiment")

# Get data configuration for single camera setup
data_config_cls = DATA_CONFIG_MAP["so100"]
modality_configs = data_config_cls.modality_config()
transforms = data_config_cls.transform()

# Create dataset
dataset = LeRobotSingleDataset(
    dataset_path=dataset_path,
    modality_configs=modality_configs,
    transforms=transforms,
    embodiment_tag=embodiment_tag,
    video_backend="torchvision_av",
)

print(f"✅ Dataset loaded successfully!")
print(f"📈 Number of episodes: {len(dataset)}")


In [None]:
# Visualize a sample from the dataset
print("🎬 Visualizing dataset sample...")
sample = dataset[0]
print("🔑 Sample keys:", list(sample.keys()))

# Display video frames
if 'video.webcam' in sample:
    video_frames = sample['video.webcam']
    print(f"📹 Video shape: {video_frames.shape}")

    # Show first few frames
    fig, axes = plt.subplots(1, min(5, video_frames.shape[0]), figsize=(15, 3))
    if video_frames.shape[0] == 1:
        axes = [axes]

    for i, ax in enumerate(axes[:min(5, video_frames.shape[0])]):
        frame = video_frames[i].numpy()
        # Convert from CxHxW to HxWxC
        frame = np.transpose(frame, (1, 2, 0))
        # Normalize to 0-1 range
        frame = (frame - frame.min()) / (frame.max() - frame.min())
        ax.imshow(frame)
        ax.set_title(f'Frame {i}')
        ax.axis('off')

    plt.tight_layout()
    plt.show()

# Display state and action information
if 'state.single_arm' in sample:
    print(f"🦾 State shape: {sample['state.single_arm'].shape}")
    print(f"📊 State values: {sample['state.single_arm'][0]}")

if 'action.single_arm' in sample:
    print(f"🎯 Action shape: {sample['action.single_arm'].shape}")
    print(f"📊 Action values: {sample['action.single_arm'][0]}")

if 'annotation.human.task_description' in sample:
    print(f"📝 Task description: {sample['annotation.human.task_description']}")


In [None]:
# Create output directory for checkpoints
!mkdir -p ./checkpoints
print("📁 Created checkpoints directory")


In [None]:
# Fine-tuning configuration
import subprocess
import os

# Set environment variables for better performance
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ['TOKENIZERS_PARALLELISM'] = 'false'

# Configure wandb reporting based on setup
report_to = 'wandb' if use_wandb else 'none'

# Fine-tuning command optimized for Colab
cmd = [
    'python', 'scripts/gr00t_finetune.py',
    '--dataset-path', './demo_data/block_pickup_17',
    '--num-gpus', '1',
    '--output-dir', './checkpoints',
    '--max-steps', '1000',  # Reduced for demo purposes
    '--data-config', 'so100',  # Single camera configuration
    '--video-backend', 'torchvision_av',
    '--batch-size', '8',  # Reduced batch size for Colab
    '--save-steps', '200',
    '--learning-rate', '1e-4',
    '--no-tune_diffusion_model',  # Disable diffusion model tuning to save memory
    '--report-to', report_to  # Use wandb if available, otherwise none
]

print("🎯 Fine-tuning command configured:")
print(' '.join(cmd))
print("\n⚙️ Configuration optimized for:")
print("   • Single camera setup (so100 config)")
print("   • Colab GPU memory constraints")
print("   • Reduced training steps for demo")
print(f"   • Experiment tracking: {'Weights & Biases' if use_wandb else 'Disabled'}")


In [None]:
# Start fine-tuning
print("🚀 Starting fine-tuning...")
print("⏳ This may take a while. You can monitor progress in the output below.")
print("☕ Time for a coffee break!\n")

try:
    result = subprocess.run(cmd, capture_output=True, text=True, check=True)
    print("🎉 Fine-tuning completed successfully!")
    print("📊 Output:", result.stdout)
except subprocess.CalledProcessError as e:
    print(f"❌ Fine-tuning failed with error: {e}")
    print(f"🔍 Error output: {e.stderr}")
    print("\n🔧 Trying with reduced memory settings...")

    # Try with even more reduced settings
    cmd_reduced = cmd + ['--batch-size', '4', '--tune_projector', 'False']
    try:
        result = subprocess.run(cmd_reduced, capture_output=True, text=True, check=True)
        print("🎉 Fine-tuning completed successfully with reduced settings!")
        print("📊 Output:", result.stdout)
    except subprocess.CalledProcessError as e2:
        print(f"❌ Fine-tuning still failed: {e2}")
        print(f"🔍 Error output: {e2.stderr}")
        print("\n💡 Try reducing batch size further or using a different GPU runtime.")


In [None]:
# Check if checkpoints were created
print("📁 Checking for checkpoints...")
!ls -la ./checkpoints/


In [None]:
# Find the latest checkpoint
import glob
checkpoint_dirs = glob.glob('./checkpoints/checkpoint-*')
if checkpoint_dirs:
    latest_checkpoint = max(checkpoint_dirs, key=os.path.getctime)
    print(f"✅ Latest checkpoint found: {latest_checkpoint}")
    !ls -la {latest_checkpoint}
else:
    print("❌ No checkpoints found. Training may have failed or not completed yet.")


In [None]:
# Evaluate the fine-tuned model (if checkpoints exist)
if checkpoint_dirs:
    latest_checkpoint = max(checkpoint_dirs, key=os.path.getctime)

    eval_cmd = [
        'python', 'scripts/eval_policy.py',
        '--plot',
        '--embodiment_tag', 'new_embodiment',
        '--model_path', latest_checkpoint,
        '--data_config', 'so100',
        '--dataset_path', './demo_data/block_pickup_17',
        '--video_backend', 'torchvision_av',
        '--modality_keys', 'single_arm', 'gripper'
    ]

    print("📊 Evaluating fine-tuned model...")
    print(' '.join(eval_cmd))

    try:
        result = subprocess.run(eval_cmd, capture_output=True, text=True, check=True)
        print("🎉 Evaluation completed successfully!")
        print("📊 Output:", result.stdout)
    except subprocess.CalledProcessError as e:
        print(f"❌ Evaluation failed: {e}")
        print(f"🔍 Error output: {e.stderr}")
else:
    print("❌ No checkpoints available for evaluation.")


In [None]:
# Create a zip file of the checkpoints for download
if checkpoint_dirs:
    print("📦 Creating zip file for download...")
    !zip -r gr00t_finetuned_model.zip ./checkpoints/
    print("✅ Model checkpoints zipped as 'gr00t_finetuned_model.zip'")
    print("📥 You can download this file from the Colab file browser.")

    # Show file size
    !ls -lh gr00t_finetuned_model.zip
else:
    print("❌ No checkpoints to save.")


In [None]:
# Push fine-tuned model to Hugging Face Hub
if use_hf_hub and checkpoint_dirs:
    latest_checkpoint = max(checkpoint_dirs, key=os.path.getctime)

    # Get user input for model repository name
    import getpass
    print("🏷️ Model Repository Setup")
    print("Enter details for your Hugging Face model repository:")

    # You can customize these or make them interactive
    default_username = "your-username"  # Users should replace this
    default_model_name = "gr00t-n1.5-so100-finetuned"

    print(f"📝 Suggested repository name: {default_username}/{default_model_name}")
    print("💡 You can change this in the code above if needed")

    repo_name = f"{default_username}/{default_model_name}"

    try:
        from huggingface_hub import HfApi, create_repo

        # Create repository if it doesn't exist
        print(f"📁 Creating repository: {repo_name}")
        create_repo(repo_id=repo_name, exist_ok=True, private=False)

        # Upload the model files
        api = HfApi()
        print(f"⬆️ Uploading model files from {latest_checkpoint}...")

        # Upload all files in the checkpoint directory
        api.upload_folder(
            folder_path=latest_checkpoint,
            repo_id=repo_name,
            repo_type="model",
            commit_message=f"Fine-tuned GR00T N1.5 model on SO-101 dataset"
        )

        # Create a model card
        model_card_content = f"""---
license: apache-2.0
base_model: nvidia/GR00T-N1.5-3B
tags:
- robotics
- gr00t
- fine-tuned
- so-101
- single-camera
library_name: transformers
---

# GR00T N1.5 Fine-tuned on SO-101 Dataset

This model is a fine-tuned version of [nvidia/GR00T-N1.5-3B](https://huggingface.co/nvidia/GR00T-N1.5-3B) on a SO-101 robot arm dataset with single camera setup.

## Model Details

- **Base Model**: nvidia/GR00T-N1.5-3B
- **Fine-tuned on**: SO-101 table cleanup dataset
- **Camera Setup**: Single camera (webcam)
- **Embodiment**: SO-101 robot arm
- **Training Steps**: 1000 (demo configuration)

## Usage

```python
from gr00t.model.gr00t_n1 import GR00T_N1_5

# Load the fine-tuned model
model = GR00T_N1_5.from_pretrained("{repo_name}")

# Use for inference on SO-101 robot
# ... your inference code here
```

## Training Details

- **Training Data**: SO-100 pick and place block task
- **Training Steps**: 1000
- **Batch Size**: 8
- **Learning Rate**: 1e-4
- **Hardware**: Google Colab GPU

## Intended Use

This model is intended for research and educational purposes with SO-101 robot arms performing table cleanup tasks.

## Limitations

- Trained on a small dataset (demo purposes)
- Single camera setup only
- Specific to SO-100 embodiment

## Citation

If you use this model, please cite the original GR00T paper and acknowledge the fine-tuning work.
"""

        # Upload model card
        api.upload_file(
            path_or_fileobj=model_card_content.encode(),
            path_in_repo="README.md",
            repo_id=repo_name,
            repo_type="model",
            commit_message="Add model card"
        )

        print(f"🎉 Successfully uploaded model to: https://huggingface.co/{repo_name}")
        print(f"🔗 Model URL: https://huggingface.co/{repo_name}")
        print("✅ Model card created with training details")

    except Exception as e:
        print(f"❌ Failed to upload to Hugging Face: {e}")
        print("💡 You can manually upload the model files later")

elif not use_hf_hub:
    print("⚠️ Hugging Face Hub not configured. Skipping model upload.")
    print("💡 Set up HUGGINGFACE_TOKEN in Colab Secrets to enable automatic uploads")
elif not checkpoint_dirs:
    print("❌ No model checkpoints found to upload.")
else:
    print("ℹ️ Model upload skipped.")
