In [5]:
# step_001 - no control at all
# step_002 - stand up reward and parallel feet penalty
# step_003 - step_002 and forward reward
# step_004 - step_003 and control penalty, knee flex reward and foot up reward
# step_007 - stand up reward only

current_step = 'step_007'

In [4]:
!apt install swig cmake ffmpeg xvfb python3-opengl
!pip install pyvirtualdisplay imageio[ffmpeg]

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
cmake is already the newest version (3.22.1-1ubuntu1.22.04.2).
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
xvfb is already the newest version (2:21.1.4-2ubuntu1.7~22.04.15).
Suggested packages:
  libgle3 python3-numpy python3-tk swig-doc swig-examples swig4.0-examples
  swig4.0-doc
The following NEW packages will be installed:
  freeglut3 libglu1-mesa python3-opengl swig swig4.0
0 upgraded, 5 newly installed, 0 to remove and 35 not upgraded.
Need to get 1,940 kB of archives.
After this operation, 13.6 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 freeglut3 amd64 2.8.1-6 [74.0 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/main amd64 libglu1-mesa amd64 9.0.2-1 [145 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/universe amd64 python3-opengl all 3.1.5+dfsg-1 [605 kB]
Get:4 http://archive.ubuntu.com/ubuntu jammy/u

In [6]:
import os

NVIDIA_ICD_CONFIG_PATH = '/usr/share/glvnd/egl_vendor.d/10_nvidia.json'
if not os.path.exists(NVIDIA_ICD_CONFIG_PATH):
  with open(NVIDIA_ICD_CONFIG_PATH, 'w') as f:
    f.write("""{
    "file_format_version" : "1.0.0",
    "ICD" : {
        "library_path" : "libEGL_nvidia.so.0"
    }
}
""")

%env MUJOCO_GL=egl

from pyvirtualdisplay import Display
virtual_display = Display(visible=0, size=(1920, 1080))
virtual_display.start()

env: MUJOCO_GL=egl


<pyvirtualdisplay.display.Display at 0x7f9b1c046870>

In [7]:
# Prepare to load data from google drive
from google.colab import drive
import datetime

# CONNECT TO GOOGLE DRIVE
gdrive_path = '/content/drive'
drive.mount(gdrive_path)

# DEFINE WORK DIRECTORY
workDir = os.path.join(gdrive_path, 'My Drive', 'OP3_TRAINNING', current_step)
print('WorkDir:', workDir)

log_dir = os.path.join(workDir, datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
print('LogDir:', log_dir)

# create folder if it doesn't exists
if not os.path.exists(log_dir):
  os.makedirs(log_dir)

tf_log_dir = os.path.join(log_dir, 'tensorboard_logs')
print('TfLogDir:', tf_log_dir)

# create folder if it doesn't exists
if not os.path.exists(tf_log_dir):
  os.makedirs(tf_log_dir)

# best models
to_load_path = os.path.join(workDir, 'best')
print('to_load_path:', to_load_path)
algos = ["a2c", "ddpg", "ppo", "sac", "td3"]
for algo in algos:
  algo_load_path = os.path.join(to_load_path, algo)
  print('AlgoLoadPath:', algo_load_path)
  if not os.path.exists(algo_load_path):
    os.makedirs(algo_load_path)

Mounted at /content/drive
WorkDir: /content/drive/My Drive/OP3_TRAINNING/step_007
LogDir: /content/drive/My Drive/OP3_TRAINNING/step_007/20250913-131810
TfLogDir: /content/drive/My Drive/OP3_TRAINNING/step_007/20250913-131810/tensorboard_logs
to_load_path: /content/drive/My Drive/OP3_TRAINNING/step_007/best
AlgoLoadPath: /content/drive/My Drive/OP3_TRAINNING/step_007/best/a2c
AlgoLoadPath: /content/drive/My Drive/OP3_TRAINNING/step_007/best/ddpg
AlgoLoadPath: /content/drive/My Drive/OP3_TRAINNING/step_007/best/ppo
AlgoLoadPath: /content/drive/My Drive/OP3_TRAINNING/step_007/best/sac
AlgoLoadPath: /content/drive/My Drive/OP3_TRAINNING/step_007/best/td3


In [8]:
# clean content folder
import os
import shutil

# location
location = "/content"

# directories
dirs = ["sample_data"]

for dir in dirs:
    path = os.path.join(location, dir)
    try:
        shutil.rmtree(path)
    except OSError as e:
        print("Error: %s : %s" % (path, e.strerror))

In [16]:
# Install Darwin Model
model_path = '/content/op3'
%cd /

if os.path.isdir(model_path):
  print(f"The directory '{model_path}' exists - git pull")
  %cd {model_path}
  !git pull
else:
  print(f"The directory '{model_path}' does not exist - git clone")
  !git clone https://github.com/Gianzanti/op3_model.git {model_path}


/
The directory '/content/op3' exists - git pull
/content/op3
remote: Enumerating objects: 23, done.[K
remote: Counting objects: 100% (23/23), done.[K
remote: Compressing objects: 100% (5/5), done.[K
remote: Total 12 (delta 7), reused 12 (delta 7), pack-reused 0 (from 0)[K
Unpacking objects: 100% (12/12), 1.39 KiB | 142.00 KiB/s, done.
From https://github.com/Gianzanti/op3_model
   a5eb87d..678b96d  main       -> origin/main
Updating a5eb87d..678b96d
Fast-forward
 pyproject.toml            |  2 [32m+[m[31m-[m
 src/op3/__init__.py       |  1 [32m+[m
 src/op3/env/callbacks.py  | 87 [32m++[m[31m---------------------------------------------[m
 src/op3/env/darwin_op3.py | 16 [32m++++++++[m[31m-[m
 tests/test_environment.py |  3 [32m+[m[31m-[m
 uv.lock                   |  2 [32m+[m[31m-[m
 6 files changed, 22 insertions(+), 89 deletions(-)


In [17]:
%cd /
!pip install -e {model_path}

/
Obtaining file:///content/op3
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: op3
  Building editable for op3 (pyproject.toml) ... [?25l[?25hdone
  Created wheel for op3: filename=op3-0.1.13-py3-none-any.whl size=1157 sha256=89589c092e5817d0f4e02b3c78ec0f3f388bc83956dbb7c26d3083e375759ab4
  Stored in directory: /tmp/pip-ephem-wheel-cache-5ef7v6ss/wheels/6e/41/34/9ebb5c3b64ee3d5f174ea5b433d00e8f3478f5497c112a46b1
Successfully built op3
Installing collected packages: op3
  Attempting uninstall: op3
    Found existing installation: op3 0.1.12
    Uninstalling op3-0.1.12:
      Successfully uninstalled op3-0.1.12
Successfully installed op3-0.1.13


In [11]:
# Install RL Zoo
trainner_path = '/content/rl-zoo'
%cd /

if os.path.isdir(trainner_path):
  print(f"The directory '{trainner_path}' exists - git pull")
  %cd {trainner_path}
  !git pull
else:
  print(f"The directory '{trainner_path}' does not exist - git clone")
  !git clone https://github.com/Gianzanti/rl-zoo.git {trainner_path}


/
The directory '/content/rl-zoo' does not exist - git clone
Cloning into '/content/rl-zoo'...
remote: Enumerating objects: 3683, done.[K
remote: Counting objects: 100% (43/43), done.[K
remote: Compressing objects: 100% (24/24), done.[K
remote: Total 3683 (delta 33), reused 19 (delta 19), pack-reused 3640 (from 3)[K
Receiving objects: 100% (3683/3683), 8.53 MiB | 27.92 MiB/s, done.
Resolving deltas: 100% (2252/2252), done.


In [12]:
%cd /
!pip install -e {trainner_path}

/
Obtaining file:///content/rl-zoo
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
Collecting sb3_contrib<3.0,>=2.7.0 (from rl_zoo3==2.7.0)
  Downloading sb3_contrib-2.7.0-py3-none-any.whl.metadata (4.1 kB)
Collecting huggingface_sb3<4.0,>=3.0 (from rl_zoo3==2.7.0)
  Downloading huggingface_sb3-3.0-py3-none-any.whl.metadata (6.3 kB)
Collecting optuna>=3.0 (from rl_zoo3==2.7.0)
  Downloading optuna-4.5.0-py3-none-any.whl.metadata (17 kB)
Collecting pytablewriter~=1.2 (from rl_zoo3==2.7.0)
  Downloading pytablewriter-1.2.1-py3-none-any.whl.metadata (38 kB)
Collecting shimmy~=2.0 (from rl_zoo3==2.7.0)
  Downloading Shimmy-2.0.0-py3-none-any.whl.metadata (3.5 kB)
Collecting colorlog (from optuna>=3.0->rl_zoo3==2.7.0)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collec

In [18]:
%cd {trainner_path}

algos = {
  # "a2c": {
  #   "lr": 7e-4, "dev": "cpu"
  # },
  # 'ddpg': {
  #   'lr': 1e-3, 'dev': 'cuda'
  # },
  'ppo': {
    'lr': 3e-4, 'dev': 'cpu'
  },
  # 'sac': {
  #   'lr': 3e-4, 'dev': 'cuda'
  # },
  # 'td3': {
  #   'lr': 1e-3, 'dev': 'cuda'
  # }
}

n_timestep = 3_000_000
# save_freq = 100_000
# eval_freq = 100_000
save_freq = min(100_000, int(n_timestep / 10))
eval_freq = min(200_000, int(n_timestep / 10))
max_episode_steps = 1000
wrapper = [{"gymnasium.wrappers.TimeLimit": {"max_episode_steps": max_episode_steps}}]
n_envs = 20
n_eval_envs = 3
eval_episodes = 30


# weights
keep_alive_weight = 1.0
control_weight = 0.00 #1e-3
target_distance = 5.5
velocity_weight = 0.00 #3.0
reach_target_reward = 20.0
knee_flex_weight = 0.00 #1e-3
feet_up_weight = 0.00 #1e-3
feet_misalign_weight = 0.00 #0.5
max_timestep = 500


# keep_alive_weight: float = 1.0,
# control_weight: float = 1e-3,
# target_distance: float = 100.0,
# velocity_weight: float = 3.0,
# reach_target_reward: float = 100.0,
# knee_flex_weight: float = 1e-3,
# feet_up_weight: float = 1e-3,
# feet_misalign_weight: float = 0.05,

for algo, value in algos.items():
  print('Training:', algo)
  config = f'research_config/{algo}.yml'
  best = os.path.join(to_load_path, algo, 'best_model.zip')
  if not os.path.exists(best):
    best = ''
  else:
    best = f'-i {best}'

  train_cmd = f'python3 train.py --algo {algo} --env DarwinOp3-v3 -conf {config} \
-f "{log_dir}" --tensorboard-log "{tf_log_dir}" --save-freq {save_freq} \
--vec-env subproc --eval-freq {eval_freq} --n-eval-envs {n_eval_envs} --eval-episodes {eval_episodes} \
--env-kwargs keep_alive_weight:{keep_alive_weight} control_weight:{control_weight} \
target_distance:{target_distance} velocity_weight:{velocity_weight} \
reach_target_reward:{reach_target_reward} knee_flex_weight:{knee_flex_weight} \
feet_up_weight:{feet_up_weight} feet_misalign_weight:{feet_misalign_weight} \
max_timestep:{max_timestep} --hyperparams n_envs:{n_envs} learning_rate:{value["lr"]} \
n_timesteps:{n_timestep} env_wrapper:"{wrapper}" --device {value["dev"]} {best} -P'

  print(train_cmd)
  !{train_cmd}

  video_cmd = f'python3 -m rl_zoo3.record_video --algo {algo} \
--env DarwinOp3-v3 -n 3000 --load-best -o "{log_dir}" -f "{log_dir}"'
  print(video_cmd)
  !{video_cmd}


/content/rl-zoo
Training: ppo
python3 train.py --algo ppo --env DarwinOp3-v3 -conf research_config/ppo.yml -f "/content/drive/My Drive/OP3_TRAINNING/step_007/20250913-131810" --tensorboard-log "/content/drive/My Drive/OP3_TRAINNING/step_007/20250913-131810/tensorboard_logs" --save-freq 100000 --vec-env subproc --eval-freq 200000 --n-eval-envs 3 --eval-episodes 30 --env-kwargs keep_alive_weight:1.0 control_weight:0.0 target_distance:5.5 velocity_weight:0.0 reach_target_reward:20.0 knee_flex_weight:0.0 feet_up_weight:0.0 feet_misalign_weight:0.0 max_timestep:1000 --hyperparams n_envs:20 learning_rate:0.0003 n_timesteps:3000000 --device cpu  -P
2025-09-13 20:31:29.016656: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1757795489.036622  117366 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN wh