In [1]:
!pip install swig
!pip install gymnasium[box2d]
!pip install box2d

Collecting swig
  Downloading swig-4.4.0-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (3.5 kB)
Downloading swig-4.4.0-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.9 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.9 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m72.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: swig
Successfully installed swig-4.4.0
Collecting box2d-py==2.3.5 (from gymnasium[box2d])
  Downloading box2d-py-2.3.5.tar.gz (374 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m374.4/374.4 kB[0m [31m17.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: box2d-py
  Building wheel for box2d-py (setup.py) ... [?25l[?25hdone
  Created wheel for box2d-py: filename=box2d_py-2.3.5-cp312-cp312-linux_x86_64.whl size=2399005 sha256=544

In [2]:
import gymnasium as gym
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

import imageio
from IPython.display import HTML
from base64 import b64encode


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [4]:
class ActorCritic(nn.Module):
  def __init__(self, obs_dim, act_dim):
    super().__init__()
    self.net = nn.Sequential(
      nn.Linear(obs_dim, 128),
      nn.ReLU(),
      nn.Linear(128, 128),
      nn.ReLU()
    )
    self.policy = nn.Linear(128, act_dim)
    self.value = nn.Linear(128, 1)

  def forward(self, x):
    x = self.net(x)
    return self.policy(x), self.value(x)

  def step(self, state):
    state = torch.tensor(state, dtype=torch.float32).unsqueeze(0).to(device)
    logits, value = self.forward(state)
    probs = torch.softmax(logits, dim=-1)
    dist = torch.distributions.Categorical(probs)
    action = dist.sample()
    return action.item(), dist.log_prob(action).item(), value.item()

In [5]:
# def play_colab(model, episodes=1):
#   env = gym.make("LunarLander-v3", render_mode="rgb_array")
#   frames = []
#   final_reward = 0
#   final_obs = None

#   for ep in range(episodes):
#     state, _ = env.reset()
#     done = False
#     total_reward = 0

#     while not done:
#       st = torch.tensor(state, dtype=torch.float32).unsqueeze(0).to(device)

#       logits, _ = model(st)
#       probs = torch.softmax(logits, dim=-1)
#       action = torch.argmax(probs, dim=-1).item()

#       state, reward, term, trunc, _ = env.step(action)
#       total_reward += reward
#       done = term or trunc

#       frames.append(env.render())

#       if done:
#         final_obs = state
#         final_reward = total_reward

#   env.close()

#   # ----------- Success Evaluation -----------
#   def is_soft_landing(obs):
#     x, y, vx, vy, angle, ang_vel, l1, l2 = obs
#     return (
#       l1 == 1 and l2 == 1 and
#       abs(vx) < 0.4 and
#       abs(vy) < 0.4 and
#       abs(angle) < 0.4
#     )

#   # ----------- Video export -----------
#   import imageio, base64
#   from IPython.display import HTML

#   imageio.mimsave("lander.mp4", frames, fps=60)

#   mp4 = open("lander.mp4",'rb').read()
#   data_url = "data:video/mp4;base64," + base64.b64encode(mp4).decode()
#   return HTML(f"<video width=480 controls><source src='{data_url}' type='video/mp4'></video>")


In [6]:
from PIL import Image, ImageDraw, ImageFont
import numpy as np


def draw_instrument_panel(state, action, base_frame):

  x, y, vx, vy, angle, ang_vel, l1, l2 = state

  landed = (l1 == 1 and l2 == 1 and abs(vx) < 0.4 and abs(vy) < 0.4 and abs(angle) < 0.4)

  frame = Image.fromarray(base_frame)
  W, H = frame.size

  panel_h = 250
  out = Image.new("RGB", (W, H + panel_h), (18, 18, 18))
  out.paste(frame, (0, 0))
  draw = ImageDraw.Draw(out)

  # -------------------------------------------------
  # 1. avia horizont
  # -------------------------------------------------
  radius = 75
  cx = W // 2
  cy = H + 70

  horizon = Image.new("RGB", (2*radius, 2*radius), (0, 0, 0))
  hdraw = ImageDraw.Draw(horizon)

  hdraw.rectangle((0, 0, 2*radius, radius), fill=(40, 90, 180))       # небо
  hdraw.rectangle((0, radius, 2*radius, 2*radius), fill=(110, 80, 50)) # земля
  hdraw.line((0, radius, 2*radius, radius), fill=(255,255,255), width=4)

  tilt_deg = angle * 180 / np.pi
  rotated = horizon.rotate(tilt_deg, resample=Image.BICUBIC)

  mask = Image.new("L", (2*radius, 2*radius), 0)
  m = ImageDraw.Draw(mask)
  m.ellipse((0, 0, 2*radius, 2*radius), fill=255)

  out.paste(rotated, (cx - radius, cy - radius), mask)

  draw.ellipse((cx-radius-4, cy-radius-4, cx+radius+4, cy+radius+4),
               outline=(220,220,220), width=4)


  speed_gain = 3.0

  # -------------------------------------------------
  # 2. Vertical speed
  # -------------------------------------------------
  vs_x = 70
  vs_y1 = H + 40
  vs_y2 = H + 200

  draw.text((vs_x - 30, vs_y1 - 30), "VERT SPD", fill=(200,255,200))

  draw.line((vs_x, vs_y1, vs_x, vs_y2), fill=(150,150,150), width=3)

  for i in range(6):
    ytick = vs_y1 + i * (vs_y2 - vs_y1) / 5
    draw.line((vs_x - 12, ytick, vs_x + 12, ytick), fill=(150,150,150), width=2)

  vy_scaled = vy * speed_gain
  vy_clamped = np.clip(vy_scaled, -5, 5)

  arrow_y = vs_y1 + (vy_clamped + 5) * (vs_y2 - vs_y1) / 10
  draw.line((vs_x - 22, arrow_y, vs_x + 22, arrow_y), fill=(200,255,200), width=4)


  # -------------------------------------------------
  # 3. Horizontal speed
  # -------------------------------------------------
  hs_y = H + 225
  hs_x1 = 150
  hs_x2 = W - 150

  draw.text((hs_x1, hs_y - 30), "HORZ SPD", fill=(200,255,200))

  draw.line((hs_x1, hs_y, hs_x2, hs_y), fill=(150,150,150), width=3)

  for i in range(6):
    xtick = hs_x1 + i * (hs_x2 - hs_x1) / 5
    draw.line((xtick, hs_y - 12, xtick, hs_y + 12), fill=(150,150,150), width=2)

  vx_scaled = vx * speed_gain
  vx_clamped = np.clip(vx_scaled, -5, 5)

  arrow_x = hs_x1 + (vx_clamped + 5) * (hs_x2 - hs_x1) / 10
  draw.line((arrow_x, hs_y - 22, arrow_x, hs_y + 22), fill=(200,255,200), width=4)


  # -------------------------------------------------
  # 4. ALTITUDE - right scale
  # -------------------------------------------------
  alt_x = W - 70
  alt_y1 = H + 40
  alt_y2 = H + 200

  draw.text((alt_x - 20, alt_y1 - 30), "ALT", fill=(200,255,200))

  draw.line((alt_x, alt_y1, alt_x, alt_y2), fill=(150,150,150), width=3)

  for i in range(6):
    ytick = alt_y1 + i * (alt_y2 - alt_y1) / 5
    draw.line((alt_x - 12, ytick, alt_x + 12, ytick), fill=(150,150,150), width=2)

  y_clamped = np.clip(y, 0, 1.4)  # высота в Lunar Lander ~ 1.4
  arrow_y = alt_y2 - y_clamped * (alt_y2 - alt_y1) / 1.4

  draw.line((alt_x - 22, arrow_y, alt_x + 22, arrow_y), fill=(200,255,200), width=4)


  # -------------------------------------------------
  # 5. ENGINES BUTTONS
  # -------------------------------------------------
  def ec(on): return (255,80,80) if on else (80,80,80)

  by = H + 10
  bw = 110
  bh = 35

  bx = W//2 - 180

  draw.rectangle((bx, by, bx+bw, by+bh), fill=ec(action==1))
  draw.text((bx+30, by+10), "LEFT", fill=(255,255,255))

  draw.rectangle((bx+140, by, bx+140+bw, by+bh), fill=ec(action==2))
  draw.text((bx+165, by+10), "MAIN", fill=(255,255,255))

  draw.rectangle((bx+280, by, bx+280+bw, by+bh), fill=ec(action==3))
  draw.text((bx+310, by+10), "RIGHT", fill=(255,255,255))


  # -------------------------------------------------
  # 6. Lamp LANDED
  # -------------------------------------------------
  lx = W - 100
  ly = H + 210
  lw = 70
  lh = 20

  color = (0,180,0) if landed else (150,0,0)
  text = "LANDED" if landed else "-----"

  draw.rectangle((lx, ly, lx+lw, ly+lh), fill=color)
  draw.text((lx+18, ly+7), text, fill=(255,255,255))

  return np.array(out)


In [7]:
def run_and_play_video(model, filename="panel_view.mp4", steps=600):
  env = gym.make("LunarLander-v3", render_mode="rgb_array")
  state, _ = env.reset()

  frames = []

  for _ in range(steps):
    st = torch.tensor(state, dtype=torch.float32).unsqueeze(0).to(device)
    logits, _ = model(st)
    probs = torch.softmax(logits, dim=-1)
    action = torch.argmax(probs, dim=-1).item()

    next_state, reward, term, trunc, _ = env.step(action)

    # initial stage frame
    base = env.render()

    # HUD
    hud = draw_instrument_panel(state, action, base)
    frames.append(hud)

    state = next_state

    if term or trunc:
      break

  env.close()

  # ---- Save mp4 ----
  import imageio, base64
  from IPython.display import HTML

  imageio.mimsave(filename, frames, fps=60)
  mp4 = open(filename,'rb').read()
  data_url = "data:video/mp4;base64," + base64.b64encode(mp4).decode()
  return HTML(f"<video width=480 controls><source src='{data_url}' type='video/mp4'></video>")


In [18]:
obs_dim = 8         # у LunarLander-v3 always 8
act_dim = 4         # 4 actions

model = ActorCritic(obs_dim, act_dim).to(device)
model.load_state_dict(torch.load("ppo_lunarlander_model_weights.pt", map_location=device))
model.eval()

run_and_play_video(model)


