# 🧠 RLHF Pipeline: Trajectory Generation, Labeling, and Training

In [1]:
# Step 1: Set up imports and environment
import os
import sys
sys.path.append(os.path.abspath("."))

from training.trainer import RLHFTrainer

trainer = RLHFTrainer()

pybullet build time: Sep  3 2024 12:54:04


Version = 4.1 ATI-6.1.13
Vendor = ATI Technologies Inc.
Renderer = AMD Radeon Pro 5300M OpenGL Engine
b3Printf: Selected demo: Physics Server
startThreads creating 1 threads.
starting thread 0
started thread 0 
MotionThreadFunc thread started


2025-05-09 08:04:08.102 python[75208:7091495] +[IMKClient subclass]: chose IMKClient_Modern
2025-05-09 08:04:08.102 python[75208:7091495] +[IMKInputSession subclass]: chose IMKInputSession_Modern


In [None]:
import numpy as np
import cv2
import pybullet as p

# Requires opencv-python version less than or equal to 4.10.0.82
def save_trajectory_video_and_data(env, trajectory, filename_prefix):
    width, height = 320, 240
    video_path = f"data/clips/{filename_prefix}.mp4"
    data_path = f"data/trajectories/{filename_prefix}.npz"

    # Setup PyBullet camera
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    video_writer = cv2.VideoWriter(video_path, fourcc, 30.0, (width, height))

    env.reset()
    for i in range(len(trajectory['actions'])):
        # print(i)
        p.stepSimulation()
        view_matrix = p.computeViewMatrixFromYawPitchRoll(cameraTargetPosition=[0.5, 0, 0.5],
                                                          distance=1.0,
                                                          yaw=50,
                                                          pitch=-35,
                                                          roll=0,
                                                          upAxisIndex=2)
        proj_matrix = p.computeProjectionMatrixFOV(fov=60,
                                                   aspect=width/height,
                                                   nearVal=0.1,
                                                   farVal=100.0)
        (_, _, px, _, _) = p.getCameraImage(width=width,
                                            height=height,
                                            viewMatrix=view_matrix,
                                            projectionMatrix=proj_matrix,
                                            renderer=p.ER_BULLET_HARDWARE_OPENGL)
        frame = np.reshape(px, (height, width, 4))[:, :, :3].astype(np.uint8)
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        video_writer.write(frame)
    video_writer.release()
    
    # Convert to H.264
    h264_path = video_path.replace(".mp4", "_h264.mp4")
    os.system(
        f"ffmpeg -y -i {video_path} -vcodec libx264 -crf 23 {h264_path}"
    )
    os.remove(video_path)
    os.rename(h264_path, video_path)

    np.savez(data_path, obs=trajectory['observations'], act=trajectory['actions'])

os.makedirs("data/clips", exist_ok=True)
os.makedirs("data/trajectories", exist_ok=True)

for i in range(2):
    # print(i)
    traj = trainer.ppo_trainer.collect_trajectory()
    save_trajectory_video_and_data(trainer.env, traj, f"left_clip_{i:03d}" if i % 2 == 0 else f"right_clip_{i:03d}")

2025-05-09 08:04:23.604 python[75208:7091495] _TIPropertyValueIsValid called with 4 on nil context!
2025-05-09 08:04:23.604 python[75208:7091495] imkxpc_getApplicationProperty:reply: called with incorrect property value 4, bailing.
2025-05-09 08:04:23.604 python[75208:7091495] Text input context does not respond to _valueForTIProperty:
2025-05-09 08:04:23.604 python[75208:7091495] _TIPropertyValueIsValid called with 4 on nil context!
2025-05-09 08:04:23.605 python[75208:7091495] imkxpc_getApplicationProperty:reply: called with incorrect property value 4, bailing.
2025-05-09 08:04:23.605 python[75208:7091495] Text input context does not respond to _valueForTIProperty:
ffmpeg version 7.1.1 Copyright (c) 2000-2025 the FFmpeg developers
  built with Apple clang version 16.0.0 (clang-1600.0.26.6)
  configuration: --prefix=/usr/local/Cellar/ffmpeg/7.1.1_2 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gn

: 

In [7]:
# ✅ Step 3: Launch labeling UI (Streamlit)
import subprocess
subprocess.run(["streamlit", "run", "scripts/visualize_trajectories.py"])

# !streamlit run scripts/visualize_trajectories.py


  You can now view your Streamlit app in your browser.

  Local URL: http://localhost:8501
  Network URL: http://192.168.1.113:8501

  For better performance, install the Watchdog module:

  $ xcode-select --install
  $ pip install watchdog
            
  Stopping...


KeyboardInterrupt: 

In [5]:
# ✅ Step 4: Convert labeled preferences to training data
!python training/human_feedback_collector.py

Saved 1 trajectory preferences to data/reward_training_data.pt


In [8]:
# ✅ Step 5: Train reward model and PPO using collected preferences
!python training/trainer.py

pybullet build time: Sep  3 2024 12:54:04
Version = 4.1 ATI-6.1.13
Vendor = ATI Technologies Inc.
Renderer = AMD Radeon Pro 5300M OpenGL Engine
b3Printf: Selected demo: Physics Server
startThreads creating 1 threads.
starting thread 0
started thread 0 
MotionThreadFunc thread started
2025-05-08 22:03:41.347 python[71723:6980182] +[IMKClient subclass]: chose IMKClient_Modern
2025-05-08 22:03:41.347 python[71723:6980182] +[IMKInputSession subclass]: chose IMKInputSession_Modern

Iteration 1
Collecting human preferences...
2025-05-08 22:03:43.068 
  command:

    streamlit run training/trainer.py [ARGUMENTS]
2025-05-08 22:03:43.069 Session state does not function when running a script without `streamlit run`
Policy Loss: -0.0439, Value Loss: 290.7277, Entropy: 1.4194

Iteration 2
2025-05-08 22:03:45.261 python[71723:6980182] _TIPropertyValueIsValid called with 4 on nil context!
2025-05-08 22:03:45.261 python[71723:6980182] imkxpc_getApplicationProperty:reply: called with incorrect propert