In [1]:
import gymnasium as gym
import numpy as np

from ipywidgets import IntProgress, HTML, VBox
from IPython.display import display

import pygetwindow as gw
import cv2 as cv
import mss

import time as time

In [2]:
# Tile coding

lb = np.array([-1.2,-0.07])
ub = np.array([0.6,0.07])

n = np.array([8,8])

tile = (ub-lb)/n

def tile2D(state,n_offset=4,offset_factor=0.25):
    x_t = np.zeros(np.hstack([n_offset,n]))
    for i in range(n_offset):
        offset = (i-(n_offset-1)/2)*tile*offset_factor
        n_i = np.floor((state-offset-lb)/tile)
        n_i = np.int32(n_i)
        if np.prod((n_i>=0)*(n_i<n)):
            x_t[i,n_i[0],n_i[1]] = 1
    return x_t.reshape(-1)

In [3]:
def epsilon_greedy(state,eps=0.1):
    x_t = tile2D(state)
    q_pi = np.dot(W.T,x_t)
    #print(q_pi)
    if(np.random.rand()<eps):
        action = np.random.randint(3)
    else:
        max = np.max(q_pi)
        action = np.arange(0,3)[q_pi==max]
        action = action[np.random.randint(np.size(action))]
    return(action)


def action_value_approx(state,action):
    return np.dot(W[:,action],tile2D(state))

In [4]:
#env = gym.make("MountainCar-v0",render_mode="human")
env = gym.make("MountainCar-v0")
W = np.zeros([np.size(tile2D([0,0])),3])

In [5]:
iters = 500
progress = IntProgress(value=0, min=0, max=iters)
label = HTML(value="Starting...")
box = VBox([label, progress])
display(box)


gamma = 0.9
alpha = 1e-2
actions = [0,1,2]

for i in range(iters):
    episode_over = False
    
    state1, info = env.reset()
    action1 = epsilon_greedy(state1)
    
    while not episode_over:
        state2, reward, terminated, truncated, info = env.step(action1)
        action2 = epsilon_greedy(state2)
        #print(state1,action1,state2,action2)
        W[:,action1] = W[:,action1] + alpha*(reward+gamma*action_value_approx(state2,action2) - action_value_approx(state1,action1))*tile2D(state1)
        episode_over = terminated
        state1 = state2
        action1 = action2
        
    progress.value = i+1
    label.value = f"Progress: {i+1}"

VBox(children=(HTML(value='Starting...'), IntProgress(value=0, max=500)))

In [6]:
# Saving file to .mp4 file


env = gym.make("MountainCar-v0",render_mode="human")
state, info = env.reset()
time.sleep(1)

# Configuration
output_file = "mountainCar.mp4"
fps = 30  # Frames per second

for window in gw.getWindowsWithTitle("pygame window"):
    pygame_window = window
    break

bbox = {
    "top": pygame_window.top,
    "left": pygame_window.left,
    "width": pygame_window.width,
    "height": pygame_window.height,
}

# Setup video writer
fourcc = cv.VideoWriter_fourcc(*"mp4v")  # MP4 codec
out = cv.VideoWriter(output_file, fourcc, fps, (bbox["width"], bbox["height"]))

# Run RL agent
with mss.mss() as sct:
    episode_over = False
    while not episode_over:
        action = epsilon_greedy(state,0)
        state, reward, terminated, truncated, info = env.step(action)
        episode_over = terminated

        # Capture the screen
        img = np.array(sct.grab(bbox))
        frame = cv.cvtColor(img, cv.COLOR_BGRA2BGR)
        out.write(frame)

# Release resources
out.release()
cv.destroyAllWindows()
print(f"Recording saved as {output_file}")

time.sleep(1)
env.close()

Recording saved as mountainCar.mp4
