In [45]:
# step_001 - no control at all
# step_002 - stand up reward and parallel feet penalty
# step_003 - step_002 and forward reward

current_step = 'step_003'

In [46]:
!apt install swig cmake ffmpeg xvfb python3-opengl
!pip install pyvirtualdisplay imageio[ffmpeg]

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
python3-opengl is already the newest version (3.1.5+dfsg-1).
swig is already the newest version (4.0.2-1ubuntu1).
cmake is already the newest version (3.22.1-1ubuntu1.22.04.2).
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
xvfb is already the newest version (2:21.1.4-2ubuntu1.7~22.04.15).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.


In [47]:
import os

NVIDIA_ICD_CONFIG_PATH = '/usr/share/glvnd/egl_vendor.d/10_nvidia.json'
if not os.path.exists(NVIDIA_ICD_CONFIG_PATH):
  with open(NVIDIA_ICD_CONFIG_PATH, 'w') as f:
    f.write("""{
    "file_format_version" : "1.0.0",
    "ICD" : {
        "library_path" : "libEGL_nvidia.so.0"
    }
}
""")

%env MUJOCO_GL=egl

from pyvirtualdisplay import Display
virtual_display = Display(visible=0, size=(1920, 1080))
virtual_display.start()

env: MUJOCO_GL=egl


<pyvirtualdisplay.display.Display at 0x789db1d9f6b0>

In [48]:
# Prepare to load data from google drive
from google.colab import drive
import datetime

# CONNECT TO GOOGLE DRIVE
gdrive_path = '/content/drive'
drive.mount(gdrive_path)

# DEFINE WORK DIRECTORY
workDir = os.path.join(gdrive_path, 'My Drive', 'OP3_TRAINNING', current_step)
print('WorkDir:', workDir)

log_dir = os.path.join(workDir, datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
print('LogDir:', log_dir)

# create folder if it doesn't exists
if not os.path.exists(log_dir):
  os.makedirs(log_dir)

tf_log_dir = os.path.join(log_dir, 'tensorboard_logs')
print('TfLogDir:', tf_log_dir)

# create folder if it doesn't exists
if not os.path.exists(tf_log_dir):
  os.makedirs(tf_log_dir)

# best models
to_load_path = os.path.join(workDir, 'best')
print('to_load_path:', to_load_path)
algos = ["a2c", "ddpg", "ppo", "sac", "td3"]
for algo in algos:
  algo_load_path = os.path.join(to_load_path, algo)
  print('AlgoLoadPath:', algo_load_path)
  if not os.path.exists(algo_load_path):
    os.makedirs(algo_load_path)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
WorkDir: /content/drive/My Drive/OP3_TRAINNING/step_003
LogDir: /content/drive/My Drive/OP3_TRAINNING/step_003/20250907-231253
TfLogDir: /content/drive/My Drive/OP3_TRAINNING/step_003/20250907-231253/tensorboard_logs
to_load_path: /content/drive/My Drive/OP3_TRAINNING/step_003/best
AlgoLoadPath: /content/drive/My Drive/OP3_TRAINNING/step_003/best/a2c
AlgoLoadPath: /content/drive/My Drive/OP3_TRAINNING/step_003/best/ddpg
AlgoLoadPath: /content/drive/My Drive/OP3_TRAINNING/step_003/best/ppo
AlgoLoadPath: /content/drive/My Drive/OP3_TRAINNING/step_003/best/sac
AlgoLoadPath: /content/drive/My Drive/OP3_TRAINNING/step_003/best/td3


In [49]:
# clean content folder
import os
import shutil

# location
location = "/content"

# directories
dirs = ["sample_data"]

for dir in dirs:
    path = os.path.join(location, dir)
    try:
        shutil.rmtree(path)
    except OSError as e:
        print("Error: %s : %s" % (path, e.strerror))

Error: /content/sample_data : No such file or directory


In [50]:
# Install Darwin Model
model_path = '/content/op3'
%cd /

if os.path.isdir(model_path):
  print(f"The directory '{model_path}' exists - git pull")
  %cd {model_path}
  !git pull
else:
  print(f"The directory '{model_path}' does not exist - git clone")
  !git clone https://github.com/Gianzanti/op3_model.git {model_path}


/
The directory '/content/op3' exists - git pull
/content/op3
Already up to date.


In [None]:
%cd /
!pip install -e {model_path}

/
Obtaining file:///content/op3
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: op3
  Building editable for op3 (pyproject.toml) ... [?25l[?25hdone
  Created wheel for op3: filename=op3-0.1.11-py3-none-any.whl size=1157 sha256=f092625e16d62ead5107841be2cc5b81f0eddd1ba32ede1ea059b79b77c842cb
  Stored in directory: /tmp/pip-ephem-wheel-cache-gefx2y50/wheels/6e/41/34/9ebb5c3b64ee3d5f174ea5b433d00e8f3478f5497c112a46b1
Successfully built op3
Installing collected packages: op3
  Attempting uninstall: op3
    Found existing installation: op3 0.1.10
    Uninstalling op3-0.1.10:
      Successfully uninstalled op3-0.1.10
Successfully installed op3-0.1.11


In [51]:
# Install RL Zoo
trainner_path = '/content/rl-zoo'
%cd /

if os.path.isdir(trainner_path):
  print(f"The directory '{trainner_path}' exists - git pull")
  %cd {trainner_path}
  !git pull
else:
  print(f"The directory '{trainner_path}' does not exist - git clone")
  !git clone https://github.com/Gianzanti/rl-zoo.git {trainner_path}


/
The directory '/content/rl-zoo' exists - git pull
/content/rl-zoo
Already up to date.


In [None]:
%cd /
!pip install -e {trainner_path}

/
Obtaining file:///content/rl-zoo
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: rl_zoo3
  Building editable for rl_zoo3 (pyproject.toml) ... [?25l[?25hdone
  Created wheel for rl_zoo3: filename=rl_zoo3-2.7.0-0.editable-py3-none-any.whl size=4428 sha256=bbe057a6ccf288f1695e003793fc304d41ec6f5c880c34aa40f4845dbcff8dcd
  Stored in directory: /tmp/pip-ephem-wheel-cache-gzlqsqxd/wheels/2d/0b/3c/be4c09b7d9d2a891b5d1bc1e086a8fe4f4b4f016a0613b625c
Successfully built rl_zoo3
Installing collected packages: rl_zoo3
  Attempting uninstall: rl_zoo3
    Found existing installation: rl_zoo3 2.7.0
    Uninstalling rl_zoo3-2.7.0:
      Successfully uninstalled rl_zoo3-2.7.0
Successfully installed rl_zoo3-2.7.0


In [52]:
%cd {trainner_path}

algos = {
  "a2c": {
    "lr": 7e-4, "dev": "cpu"
  },
  'ddpg': {
    'lr': 1e-3, 'dev': 'cuda'
  },
  'ppo': {
    'lr': 3e-4, 'dev': 'cpu'
  },
  'sac': {
    'lr': 3e-4, 'dev': 'cuda'
  },
  'td3': {
    'lr': 1e-3, 'dev': 'cuda'
  }
}

n_timestep = 5_000_000
# save_freq = 100_000
# eval_freq = 100_000
save_freq = min(100_000, int(n_timestep / 10))
eval_freq = min(200_000, int(n_timestep / 10))
max_episode_steps = 1000
wrapper = [{"gymnasium.wrappers.TimeLimit": {"max_episode_steps": max_episode_steps}}]
n_envs = 16

# weights
keep_alive_reward = 1.0
ctrl_cost_weight = 0.0000
target_distance = 2.5
forward_velocity_weight = 3.0
reach_target_reward = 20.0
knee_flex_reward = 0.0
feet_up_reward = 0.0
not_parallel_penalty = 0.5

# keep_alive_reward: float = 1.0,
# ctrl_cost_weight: float = 1e-3,
# target_distance: float = 100.0,
# forward_velocity_weight: float = 3.0,
# reach_target_reward: float = 100.0,
# knee_flex_reward: float = 1e-3,
# feet_up_reward: float = 1e-3,


for algo, value in algos.items():
  print('Training:', algo)
  config = f'research_config/{algo}.yml'
  best = os.path.join(to_load_path, algo, 'best_model.zip')
  if not os.path.exists(best):
    best = ''
  else:
    best = f'-i {best}'

  train_cmd = f'python3 train.py --algo {algo} --env DarwinOp3-v3 -conf {config} \
-f "{log_dir}" --tensorboard-log "{tf_log_dir}" --save-freq {save_freq} \
--vec-env subproc --eval-freq {eval_freq} --n-eval-envs 1 --eval-episodes 10 \
--env-kwargs keep_alive_reward:{keep_alive_reward} ctrl_cost_weight:{ctrl_cost_weight} \
target_distance:{target_distance} forward_velocity_weight:{forward_velocity_weight} \
reach_target_reward:{reach_target_reward} knee_flex_reward:{knee_flex_reward} \
feet_up_reward:{feet_up_reward} not_parallel_penalty:{not_parallel_penalty} \
--hyperparams n_envs:{n_envs} learning_rate:{value["lr"]} \
n_timesteps:{n_timestep} env_wrapper:"{wrapper}" --device {value["dev"]} {best} -P'

  print(train_cmd)
  !{train_cmd}

  video_cmd = f'python3 -m rl_zoo3.record_video --algo {algo} \
--env DarwinOp3-v3 -n 3000 --load-best -o "{log_dir}" -f "{log_dir}"'
  print(video_cmd)
  !{video_cmd}


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
| rollout/           |          |
|    ep_len_mean     | 214      |
|    ep_rew_mean     | 770      |
| time/              |          |
|    episodes        | 45860    |
|    fps             | 1190     |
|    time_elapsed    | 4075     |
|    total_timesteps | 4850656  |
| train/             |          |
|    actor_loss      | -2.69    |
|    critic_loss     | 0.0153   |
|    learning_rate   | 0.001    |
|    n_updates       | 302540   |
---------------------------------
[2K---------------------------------
| mean_episode/      |          |
|    _pos_x          | 0.691    |
|    _pos_y          | -0.0257  |
|    _pos_z          | 0.281    |
|    _vel_x          | 0.654    |
|    _vel_y          | -0.0115  |
|    _vel_z          | -0.0534  |
|    distance        | 0.715    |
|    p_control       | 0        |
|    p_not_parallel  | 0.0139   |
|    r_feet_up       | 0        |
|    r_forward       | 1.96     |
|    r_health