# Robot Arm Training with PPO in Google Colab

This notebook implements the training of a robot arm using PPO algorithm in Google Colab with GPU acceleration.

In [2]:
# Mount Google Drive to access/save models
from google.colab import drive
drive.mount('/content/drive')

ModuleNotFoundError: No module named 'google'

In [None]:
# Install required packages
!pip install gymnasium
!pip install stable-baselines3[extra]
!pip install mujoco

In [None]:
# Clone the repository
!git clone https://github.com/CyrilEtornam/RobotArm.git
%cd RobotArm

In [None]:
# Test cell - verify environment setup
import torch
print(f'CUDA available: {torch.cuda.is_available()}')
if torch.cuda.is_available():
    print(f'GPU device: {torch.cuda.get_device_name(0)}')

In [None]:
import os
import torch
import numpy as np
from gymnasium.wrappers import TimeLimit
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import EvalCallback, CheckpointCallback
from stable_baselines3.common.vec_env import DummyVecEnv

from envs.robot_arm_env import RobotArmEnv

# Update paths for Google Colab
DRIVE_PATH = '/content/drive/MyDrive/RobotArm'
LOG_DIR = os.path.join(DRIVE_PATH, 'logs/ppo_robot_arm')
MODEL_DIR = os.path.join(DRIVE_PATH, 'models/ppo_robot_arm')
EVAL_DIR = os.path.join(DRIVE_PATH, 'eval/ppo_robot_arm')

# Create directories
os.makedirs(LOG_DIR, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(EVAL_DIR, exist_ok=True)

# Training parameters
SEED = 42
TIMESTEPS = 100_000  # Reduced for testing
EP_LEN = 200

def make_env():
    def _init():
        env = RobotArmEnv(xml_path='lowCostRobotArm/robotScene.xml')
        env = TimeLimit(env, max_episode_steps=EP_LEN)
        env = Monitor(env, filename=os.path.join(LOG_DIR, 'monitor.csv'), allow_early_resets=True)
        return env
    return _init

# Create and wrap the environment
env = DummyVecEnv([make_env()])

# Callbacks
eval_callback = EvalCallback(
    env,
    best_model_save_path=MODEL_DIR,
    log_path=EVAL_DIR,
    eval_freq=5000,  # More frequent evaluation for testing
    deterministic=True,
    render=False
)

checkpoint_callback = CheckpointCallback(
    save_freq=10000,  # More frequent saving for testing
    save_path=MODEL_DIR,
    name_prefix='ppo_robot_arm'
)

# Initialize the model with GPU support
model = PPO(
    'MlpPolicy',
    env,
    verbose=1,
    tensorboard_log=LOG_DIR,
    seed=SEED,
    device='cuda'
)

# Train the model
model.learn(
    total_timesteps=TIMESTEPS,
    callback=[eval_callback, checkpoint_callback],
    progress_bar=True
)

# Save the final model
final_model_path = os.path.join(MODEL_DIR, 'ppo_robot_arm_final.zip')
model.save(final_model_path)
print(f'Final model saved to {final_model_path}')

## How to use this notebook

1. Click 'Runtime' -> 'Change runtime type' and select 'GPU' as the hardware accelerator
2. Run the cells in order
3. When prompted, authenticate to mount your Google Drive
4. The training will start automatically and save checkpoints to your Google Drive
5. You can monitor training progress in the output

The trained models and logs will be saved in your Google Drive under the 'RobotArm' folder.