In [1]:
# step_001 - no control at all
# step_002 - stand up reward and parallel feet penalty
# step_003 - step_002 and forward reward
# step_004 - step_003 and control penalty, knee flex reward and foot up reward
# step_005 - stand up reward only

current_step = 'step_005'

In [2]:
!apt install swig cmake ffmpeg xvfb python3-opengl
!pip install pyvirtualdisplay imageio[ffmpeg]

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
cmake is already the newest version (3.22.1-1ubuntu1.22.04.2).
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
xvfb is already the newest version (2:21.1.4-2ubuntu1.7~22.04.15).
Suggested packages:
  libgle3 python3-numpy python3-tk swig-doc swig-examples swig4.0-examples
  swig4.0-doc
The following NEW packages will be installed:
  freeglut3 libglu1-mesa python3-opengl swig swig4.0
0 upgraded, 5 newly installed, 0 to remove and 35 not upgraded.
Need to get 1,940 kB of archives.
After this operation, 13.6 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 freeglut3 amd64 2.8.1-6 [74.0 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/main amd64 libglu1-mesa amd64 9.0.2-1 [145 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/universe amd64 python3-opengl all 3.1.5+dfsg-1 [605 kB]
Get:4 http://archive.ubuntu.com/ubuntu jammy/u

In [3]:
import os

NVIDIA_ICD_CONFIG_PATH = '/usr/share/glvnd/egl_vendor.d/10_nvidia.json'
if not os.path.exists(NVIDIA_ICD_CONFIG_PATH):
  with open(NVIDIA_ICD_CONFIG_PATH, 'w') as f:
    f.write("""{
    "file_format_version" : "1.0.0",
    "ICD" : {
        "library_path" : "libEGL_nvidia.so.0"
    }
}
""")

%env MUJOCO_GL=egl

from pyvirtualdisplay import Display
virtual_display = Display(visible=0, size=(1920, 1080))
virtual_display.start()

env: MUJOCO_GL=egl


<pyvirtualdisplay.display.Display at 0x7d57f90afd10>

In [4]:
# Prepare to load data from google drive
from google.colab import drive
import datetime

# CONNECT TO GOOGLE DRIVE
gdrive_path = '/content/drive'
drive.mount(gdrive_path)

# DEFINE WORK DIRECTORY
workDir = os.path.join(gdrive_path, 'My Drive', 'OP3_TRAINNING', current_step)
print('WorkDir:', workDir)

log_dir = os.path.join(workDir, datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
print('LogDir:', log_dir)

# create folder if it doesn't exists
if not os.path.exists(log_dir):
  os.makedirs(log_dir)

tf_log_dir = os.path.join(log_dir, 'tensorboard_logs')
print('TfLogDir:', tf_log_dir)

# create folder if it doesn't exists
if not os.path.exists(tf_log_dir):
  os.makedirs(tf_log_dir)

# best models
to_load_path = os.path.join(workDir, 'best')
print('to_load_path:', to_load_path)
algos = ["a2c", "ddpg", "ppo", "sac", "td3"]
for algo in algos:
  algo_load_path = os.path.join(to_load_path, algo)
  print('AlgoLoadPath:', algo_load_path)
  if not os.path.exists(algo_load_path):
    os.makedirs(algo_load_path)

Mounted at /content/drive
WorkDir: /content/drive/My Drive/OP3_TRAINNING/step_005
LogDir: /content/drive/My Drive/OP3_TRAINNING/step_005/20250912-233108
TfLogDir: /content/drive/My Drive/OP3_TRAINNING/step_005/20250912-233108/tensorboard_logs
to_load_path: /content/drive/My Drive/OP3_TRAINNING/step_005/best
AlgoLoadPath: /content/drive/My Drive/OP3_TRAINNING/step_005/best/a2c
AlgoLoadPath: /content/drive/My Drive/OP3_TRAINNING/step_005/best/ddpg
AlgoLoadPath: /content/drive/My Drive/OP3_TRAINNING/step_005/best/ppo
AlgoLoadPath: /content/drive/My Drive/OP3_TRAINNING/step_005/best/sac
AlgoLoadPath: /content/drive/My Drive/OP3_TRAINNING/step_005/best/td3


In [5]:
# clean content folder
import os
import shutil

# location
location = "/content"

# directories
dirs = ["sample_data"]

for dir in dirs:
    path = os.path.join(location, dir)
    try:
        shutil.rmtree(path)
    except OSError as e:
        print("Error: %s : %s" % (path, e.strerror))

In [6]:
# Install Darwin Model
model_path = '/content/op3'
%cd /

if os.path.isdir(model_path):
  print(f"The directory '{model_path}' exists - git pull")
  %cd {model_path}
  !git pull
else:
  print(f"The directory '{model_path}' does not exist - git clone")
  !git clone https://github.com/Gianzanti/op3_model.git {model_path}


/
The directory '/content/op3' does not exist - git clone
Cloning into '/content/op3'...
remote: Enumerating objects: 143, done.[K
remote: Counting objects: 100% (143/143), done.[K
remote: Compressing objects: 100% (106/106), done.[K
remote: Total 143 (delta 44), reused 132 (delta 33), pack-reused 0 (from 0)[K
Receiving objects: 100% (143/143), 14.62 MiB | 15.74 MiB/s, done.
Resolving deltas: 100% (44/44), done.


In [7]:
%cd /
!pip install -e {model_path}

/
Obtaining file:///content/op3
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
Collecting mujoco>=3.3.5 (from op3==0.1.12)
  Downloading mujoco-3.3.5-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.0/42.0 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting stable-baselines3 (from op3==0.1.12)
  Downloading stable_baselines3-2.7.0-py3-none-any.whl.metadata (4.8 kB)
Collecting glfw (from mujoco>=3.3.5->op3==0.1.12)
  Downloading glfw-2.10.0-py2.py27.py3.py30.py31.py32.py33.py34.py35.py36.py37.py38.p39.p310.p311.p312.p313-none-manylinux_2_28_x86_64.whl.metadata (5.4 kB)
Downloading mujoco-3.3.5-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (6.7 MB)
[2K   [90m━━━━

In [8]:
# Install RL Zoo
trainner_path = '/content/rl-zoo'
%cd /

if os.path.isdir(trainner_path):
  print(f"The directory '{trainner_path}' exists - git pull")
  %cd {trainner_path}
  !git pull
else:
  print(f"The directory '{trainner_path}' does not exist - git clone")
  !git clone https://github.com/Gianzanti/rl-zoo.git {trainner_path}


/
The directory '/content/rl-zoo' does not exist - git clone
Cloning into '/content/rl-zoo'...
remote: Enumerating objects: 3683, done.[K
remote: Counting objects: 100% (43/43), done.[K
remote: Compressing objects: 100% (24/24), done.[K
remote: Total 3683 (delta 33), reused 19 (delta 19), pack-reused 3640 (from 3)[K
Receiving objects: 100% (3683/3683), 8.56 MiB | 4.61 MiB/s, done.
Resolving deltas: 100% (2249/2249), done.


In [9]:
%cd /
!pip install -e {trainner_path}

/
Obtaining file:///content/rl-zoo
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
Collecting sb3_contrib<3.0,>=2.7.0 (from rl_zoo3==2.7.0)
  Downloading sb3_contrib-2.7.0-py3-none-any.whl.metadata (4.1 kB)
Collecting huggingface_sb3<4.0,>=3.0 (from rl_zoo3==2.7.0)
  Downloading huggingface_sb3-3.0-py3-none-any.whl.metadata (6.3 kB)
Collecting optuna>=3.0 (from rl_zoo3==2.7.0)
  Downloading optuna-4.5.0-py3-none-any.whl.metadata (17 kB)
Collecting pytablewriter~=1.2 (from rl_zoo3==2.7.0)
  Downloading pytablewriter-1.2.1-py3-none-any.whl.metadata (38 kB)
Collecting shimmy~=2.0 (from rl_zoo3==2.7.0)
  Downloading Shimmy-2.0.0-py3-none-any.whl.metadata (3.5 kB)
Collecting colorlog (from optuna>=3.0->rl_zoo3==2.7.0)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collec

In [10]:
%cd {trainner_path}

algos = {
  "a2c": {
    "lr": 7e-4, "dev": "cpu"
  },
  'ddpg': {
    'lr': 1e-3, 'dev': 'cuda'
  },
  'ppo': {
    'lr': 3e-4, 'dev': 'cpu'
  },
  'sac': {
    'lr': 3e-4, 'dev': 'cuda'
  },
  'td3': {
    'lr': 1e-3, 'dev': 'cuda'
  }
}

n_timestep = 3_000_000
# save_freq = 100_000
# eval_freq = 100_000
save_freq = min(100_000, int(n_timestep / 10))
eval_freq = min(200_000, int(n_timestep / 10))
max_episode_steps = 1000
wrapper = [{"gymnasium.wrappers.TimeLimit": {"max_episode_steps": max_episode_steps}}]
n_envs = 16

# weights
keep_alive_weight = 1.0
control_weight = 0.00 #1e-3
target_distance = 5.5
velocity_weight = 0.00 #3.0
reach_target_reward = 20.0
knee_flex_weight = 0.00 #1e-3
feet_up_weight = 0.00 #1e-3
feet_misalign_weight = 0.00 #0.5


# keep_alive_weight: float = 1.0,
# control_weight: float = 1e-3,
# target_distance: float = 100.0,
# velocity_weight: float = 3.0,
# reach_target_reward: float = 100.0,
# knee_flex_weight: float = 1e-3,
# feet_up_weight: float = 1e-3,
# feet_misalign_weight: float = 0.05,

for algo, value in algos.items():
  print('Training:', algo)
  config = f'research_config/{algo}.yml'
  best = os.path.join(to_load_path, algo, 'best_model.zip')
  if not os.path.exists(best):
    best = ''
  else:
    best = f'-i {best}'

  train_cmd = f'python3 train.py --algo {algo} --env DarwinOp3-v3 -conf {config} \
-f "{log_dir}" --tensorboard-log "{tf_log_dir}" --save-freq {save_freq} \
--vec-env subproc --eval-freq {eval_freq} --n-eval-envs 3 --eval-episodes 10 \
--env-kwargs keep_alive_weight:{keep_alive_weight} control_weight:{control_weight} \
target_distance:{target_distance} velocity_weight:{velocity_weight} \
reach_target_reward:{reach_target_reward} knee_flex_weight:{knee_flex_weight} \
feet_up_weight:{feet_up_weight} feet_misalign_weight:{feet_misalign_weight} \
--hyperparams n_envs:{n_envs} learning_rate:{value["lr"]} \
n_timesteps:{n_timestep} env_wrapper:"{wrapper}" --device {value["dev"]} {best} -P'

  print(train_cmd)
  !{train_cmd}

  video_cmd = f'python3 -m rl_zoo3.record_video --algo {algo} \
--env DarwinOp3-v3 -n 3000 --load-best -o "{log_dir}" -f "{log_dir}"'
  print(video_cmd)
  !{video_cmd}


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
| rollout/           |          |
|    ep_len_mean     | 210      |
|    ep_rew_mean     | 209      |
| time/              |          |
|    episodes        | 44380    |
|    fps             | 1078     |
|    time_elapsed    | 1589     |
|    total_timesteps | 1713520  |
| train/             |          |
|    actor_loss      | -3.27    |
|    critic_loss     | 0.0298   |
|    learning_rate   | 0.001    |
|    n_updates       | 85175    |
---------------------------------
[2K---------------------------------
| mean_episode/      |          |
|    _pos_x          | -0.00963 |
|    _pos_y          | -0.077   |
|    _pos_z          | 0.283    |
|    _vel_x          | 0.0686   |
|    _vel_y          | -0.0547  |
|    _vel_z          | -0.0365  |
|    distance        | 0.0981   |
|    p_control       | 0        |
|    p_not_parallel  | 0        |
|    r_feet_up       | 0        |
|    r_forward       | 0        |
|    r_health