<a href="https://colab.research.google.com/github/HarounH/smol/blob/main/rl/hello_rl.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# @title Hello RL
# https://chatgpt.com/c/690beda9-a39c-832b-b88b-9d016b9e1179

In [5]:
# @title Q Learning

import gymnasium
import tensorflow as tf
import numpy as np
from collections import deque
from dataclasses import dataclass
import io
import base64
import io
import base64

import numpy as np
import imageio.v2 as imageio
from IPython.display import HTML


def show_mp4_from_frames_imageio(frames, fps=30):
    """
    Display a list of (H, W, 3) RGB frames inline as an MP4 <video> in Jupyter,
    using only in-memory buffers on the Python side (no explicit temp files).

    Args:
        frames: list/array of frames, each (H, W, 3), RGB.
                float frames are assumed in [0, 1]; others are clipped to [0, 255].
        fps: frames per second.

    Returns:
        IPython.display.HTML object.
    """
    if not frames:
        raise ValueError("frames list is empty")

    norm_frames = []
    for f in frames:
        f = np.asarray(f)
        if f.ndim != 3 or f.shape[-1] != 3:
            raise ValueError(f"Each frame must be (H, W, 3), got {f.shape}")
        if f.dtype != np.uint8:
            if np.issubdtype(f.dtype, np.floating):
                f = np.clip(f, 0.0, 1.0)
                f = (f * 255).astype(np.uint8)
            else:
                f = np.clip(f, 0, 255).astype(np.uint8)
        norm_frames.append(f)

    # Encode MP4 into an in-memory buffer
    buf = io.BytesIO()
    # 'mp4' picks the ffmpeg writer; you can also try format="FFMPEG"
    imageio.mimsave(buf, norm_frames, format="mp4", fps=fps)
    buf.seek(0)
    video_bytes = buf.read()

    b64 = base64.b64encode(video_bytes).decode("ascii")

    html = f"""
    <video controls loop>
        <source src="data:video/mp4;base64,{b64}" type="video/mp4">
        Your browser does not support the video tag.
    </video>
    """

    return HTML(html)


In [6]:
env = gymnasium.make("CartPole-v1", render_mode="rgb_array")
_ = env.reset()
n_steps = 100 # @param
frames = []
for step_id in range(n_steps):
    frame = env.render()
    frames.append(frame)
    random_action = env.action_space.sample()
    new_state, reward, terminated, truncated, info = env.step(random_action)

    if terminated or truncated:
        break

show_mp4_from_frames_imageio(frames)



In [7]:
class Memory:
    def __init__(self, max_size: int = 1000):
        self.data = deque(maxlen=max_size)

    def append(self, item):
        self.data.append(item)

    def sample(self, batch_size: int = 4):
        indices = np.random.randint(0, len(self.data), batch_size)
        return [self.data[idx] for idx in indices]

_memory = Memory(4)
[_memory.append(x) for x in [1, 2, 3, 4, 5, 6, 7]]
print(_memory.data)
print(_memory.sample(3))

deque([4, 5, 6, 7], maxlen=4)
[6, 5, 5]


In [8]:
from flax import nnx


class Layer(nnx.Module):
    def __init__(self, dim: int, expansion: int, rngs: nnx.Rngs):
        self.fc1 = nnx.Linear(dim, dim * expansion, rngs=rngs)
        self.fc2 = nnx.Linear(dim * expansion, dim, rngs=rngs)

    def __call__(self, x):
        z = self.fc1(x)
        z = jax.nn.relu(z)
        z = self.fc2(z)
        return z


class QNet(nnx.Module):
    def __init__(self, dim_in: int = 4, d_model: int = 32, num_actions: int = 2, num_layers: int = 2, rngs: nnx.Rngs | None = None):
        self.in_proj = nnx.Linear(dim_in, d_model, rngs)
        self.out_proj = nnx.Linear(d_model, num_actions, rngs)
        self.layers = [Layer(d_model, 2, rngs) for _layer_idx in range(num_layers)]

    def __call__(self, x):
        z = self.in_proj(x)
        for layer in self.layers:
            z = layer(z)
        z = self.out_proj(z)
        return z



SyntaxError: parameter without a default follows parameter with a default (ipython-input-1725841814.py, line 17)