In [None]:
import gym
from gym import logger as gymlogger
from gym.wrappers import RecordVideo
gymlogger.set_level(40) #error only
import tensorflow as tf
import numpy as np
import random
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import math
import glob
import io
import base64
from IPython.display import HTML
from IPython import display as ipythondisplay
import pygame
from typing import List

In [None]:
env = gym.make("CartPole-v1"
            #    , render_mode="human"
               )

In [None]:
print(f"this is observation space: {type(env.observation_space)}")
print(f"this is action space: {env.action_space}")

# 0: cart position 1: cart velocity 2: pole angle 3: pole angular velocity
# observation high represents the highest value that the observation can take
print(f"this is observation space high: {env.observation_space.high}")
upper_bound = env.observation_space.high.tolist()

# observation lowest represents the lowest value that the observation can take
print(f"this is observation space low: {env.observation_space.low}")
lower_bound = env.observation_space.low.tolist()

# potentially changing
span = [(upper_bound[i] - lower_bound[i]) for i in range(len(upper_bound))]
print(span)

In [None]:
def random_seed() -> int:
    return random.randint(0, 10000)

seed = random_seed()
print(f"this is the seed: {seed}")
env.action_space.seed(seed)
state = env.reset()[0]
print(state)
print(state[0])
print(env.step(0))
print(type(env))
# print(env.action_space.n)
# print(env.observation_space.shape)
# print(env.observation_space.shape[0])

### Task 1: need to choose corresponding action according to state of the cartpole

##### Task 1.1: getting discrete state

In [None]:
def discretiser(
        cart_position: float, 
        cart_velocity: float, 
        angle: float, 
        angular_velocity: float, 
        n_buckets: tuple = (10,10,10,10),
        lower_bounds: list = env.observation_space.low.tolist(),
        upper_bounds: list = env.observation_space.high.tolist(),
    ) -> tuple:
    """
    Discretises the continuous state space into discrete states for CartPole-v1.
    
    Parameters:
    - cart_position (float): The cart's position.
    - cart_velocity (float): The cart's velocity.
    - angle (float): The pole's angle.
    - angular_velocity (float): The pole's angular velocity.
    - bins (tuple): Number of bins for each state variable.
    - lower_bounds (list): Lower bounds for each state variable. [OPTIONAL]
    - upper_bounds (list): Upper bounds for each state variable. [OPTIONAL]

    Returns:
    - tuple: Discretized indices for (cart_position, cart_velocity, angle, angular_velocity).
    """

    bucket_width = [(upper_bounds[i] - lower_bounds[i]) / (n_buckets[i] - 1) for i in range(len(n_buckets))]
    
    cart_pos_index = int(min(max((cart_position - lower_bounds[0]) / bucket_width[0], 0), n_buckets[0] - 1))
    cart_vel_index = int(min(max((cart_velocity - lower_bounds[1]) / bucket_width[1], 0), n_buckets[1] - 1))
    angle_index = int(min(max((angle - lower_bounds[2]) / bucket_width[2], 0), n_buckets[2] - 1))
    angular_vel_index = int(min(max((angular_velocity - lower_bounds[3]) / bucket_width[3], 0), n_buckets[3] - 1))


    return (cart_pos_index, cart_vel_index, angle_index, angular_vel_index)


### Task 2: run the RL agent 100 times, reset state at the start of each iteration 

### Task 3: Render one episode played by the developed RL agent on Jupyter.