In [1]:
import gymnasium as gym
from gymnasium.wrappers import RecordVideo
import matplotlib.pyplot as plt
import numpy as np

from scripts.agents import QLearningAgent, SARSAAgent
from scripts.training import Trainer, trainingInspector, test_agent, plot_test_results
from scripts.tilecoding import QTable

### Top 3 hyperparameters for Q-Learning and SARSA

In [2]:
# Top 3 hyperparameters for Q-Learning and SARSA
qlearning_hyperparameter_list = [
    
    # Best performing hyperparameters
    {
        "num_episodes": 10000,
        "max_return": 500,
        "num_tiles_per_feature": 30,
        "num_tilings": 1,
        "learning_rate": 0.1,
        "tau_start": 10000,
        "tau_end": 0.1,
        "decay_type": "exponential",
        "frac_episodes_to_decay": 0.7
    }
    # Second best performing hyperparameters
    # {
    #     "num_episodes": 10000,
    #     "max_return": -100,
    #     "num_tiles_per_feature": 20,
    #     "num_tilings": 1,
    #     "learning_rate": 0.1,
    #     "tau_start": 100000,
    #     "tau_end": 0.1,
    #     "decay_type": "exponential",
    #     "frac_episodes_to_decay": 0.1
    # }
    # Third best performing hyperparameters
    # {
    #     "num_episodes": 10000,
    #     "max_return": -100,
    #     "num_tiles_per_feature": 20,
    #     "num_tilings": 1,
    #     "learning_rate": 0.1,
    #     "tau_start": 100000,
    #     "tau_end": 0.1,
    #     "decay_type": "exponential",
    #     "frac_episodes_to_decay": 0.1
    # }
    
]

sarsa_hyperparameter_list = [
    
    # Best performing hyperparameters
    {
        "num_episodes": 10000,
        "max_return": 500,
        "num_tiles_per_feature": 30,
        "num_tilings": 1,
        "learning_rate": 0.1,
        "eps_start": 1,
        "eps_end": 0.01,
        "decay_type": "linear",
        "frac_episodes_to_decay": 0.5
    }
    # Second best performing hyperparameters
    # {
    #     "num_episodes": 10000,
    #     "max_return": -100,
    #     "num_tiles_per_feature": 20,
    #     "num_tilings": 4,
    #     "learning_rate": 0.1,
    #     "eps_start": 1,
    #     "eps_end": 0.01,
    #     "decay_type": "exponential",
    #     "frac_episodes_to_decay": 0.5
    # }
    # Third best performing hyperparameters
    # {
    #     "num_episodes": 10000,
    #     "max_return": -100,
    #     "num_tiles_per_feature": 20,
    #     "num_tilings": 4,
    #     "learning_rate": 0.1,
    #     "eps_start": 1,
    #     "eps_end": 0.01,
    #     "decay_type": "exponential",
    #     "frac_episodes_to_decay": 0.5
    # }
]


## Helper functions

In [3]:
class ObsWrapper(gym.ObservationWrapper):

    def __init__(self, env: gym.Env, f):
        super().__init__(env)
        assert callable(f)
        self.f = f

        self.observation_space.high = f(env.observation_space.high)
        self.observation_space.low = f(env.observation_space.low)

    def observation(self, observation):
        return self.f(observation)

def episode_trigger(x):
    if x % 1000 == 0:
        return True
    return False

## Running experiments

In [None]:
env = gym.make('CartPole-v1', render_mode="rgb_array")
env = ObsWrapper(env,
                 lambda obs: np.clip(obs, -5, 5))
env = RecordVideo(
    env,
    video_folder="backups/cartpole-qlearning-visualizations",
    name_prefix="eval",
    episode_trigger=episode_trigger
)

qlearning_agent = QLearningAgent(
    state_space=env.observation_space,
    action_space=env.action_space,
    seed=0
)

sarsa_agent = SARSAAgent(
    state_space=env.observation_space,
    action_space=env.action_space,
    seed=0
)

trainer = Trainer()
qlearning_results = test_agent(env, qlearning_agent, trainer, qlearning_hyperparameter_list, num_experiments=1)
sarsa_results = test_agent(env, sarsa_agent, trainer, sarsa_hyperparameter_list, num_experiments=1)

env.close()

  logger.warn(
Training:  57%|██████████████▎          | 5729/10000 [01:05<03:38, 19.59it/s, Mean Score=295]

In [None]:
combined_results = qlearning_results + sarsa_results
plot_test_results(combined_results, [0, 1])