In [None]:
import gymnasium as gym
from gymnasium.wrappers import RecordVideo
import matplotlib.pyplot as plt
import numpy as np

from scripts.agents import QLearningAgent, SARSAAgent
from scripts.training import Trainer, trainingInspector, test_agent, plot_test_results
from scripts.tilecoding import QTable

### Top 3 hyperparameters for Q-Learning and SARSA

In [None]:
# Top 3 hyperparameters for Q-Learning and SARSA
qlearning_hyperparameter_list = [
    
    # Best performing hyperparameters
    {
        "num_episodes": 10000,
        "max_return": -100,
        "num_tiles_per_feature": 20,
        "num_tilings": 1,
        "learning_rate": 0.1,
        "tau_start": 100000,
        "tau_end": 0.1,
        "decay_type": "exponential",
        "frac_episodes_to_decay": 0.1
    }
    # Second best performing hyperparameters
    # {
    #     "num_episodes": 10000,
    #     "max_return": -100,
    #     "num_tiles_per_feature": 20,
    #     "num_tilings": 1,
    #     "learning_rate": 0.1,
    #     "tau_start": 100000,
    #     "tau_end": 0.1,
    #     "decay_type": "exponential",
    #     "frac_episodes_to_decay": 0.1
    # }
    # Third best performing hyperparameters
    # {
    #     "num_episodes": 10000,
    #     "max_return": -100,
    #     "num_tiles_per_feature": 20,
    #     "num_tilings": 1,
    #     "learning_rate": 0.1,
    #     "tau_start": 100000,
    #     "tau_end": 0.1,
    #     "decay_type": "exponential",
    #     "frac_episodes_to_decay": 0.1
    # }
    
]

sarsa_hyperparameter_list = [
    
    # Best performing hyperparameters
    {
        "num_episodes": 10000,
        "max_return": -100,
        "num_tiles_per_feature": 20,
        "num_tilings": 4,
        "learning_rate": 0.1,
        "eps_start": 1,
        "eps_end": 0.01,
        "decay_type": "exponential",
        "frac_episodes_to_decay": 0.5
    }
    # Second best performing hyperparameters
    # {
    #     "num_episodes": 10000,
    #     "max_return": -100,
    #     "num_tiles_per_feature": 20,
    #     "num_tilings": 4,
    #     "learning_rate": 0.1,
    #     "eps_start": 1,
    #     "eps_end": 0.01,
    #     "decay_type": "exponential",
    #     "frac_episodes_to_decay": 0.5
    # }
    # Third best performing hyperparameters
    # {
    #     "num_episodes": 10000,
    #     "max_return": -100,
    #     "num_tiles_per_feature": 20,
    #     "num_tilings": 4,
    #     "learning_rate": 0.1,
    #     "eps_start": 1,
    #     "eps_end": 0.01,
    #     "decay_type": "exponential",
    #     "frac_episodes_to_decay": 0.5
    # }
]


## Running experiments - No Reward shaping

In [None]:
def episode_trigger(x):
    if x % 1000 == 0:
        return True
    return False

In [None]:
env = gym.make('MountainCar-v0', render_mode="rgb_array")
env = RecordVideo(
    env,
    video_folder="backups/mountaincar-qlearning-visualizations",
    name_prefix="eval",
    episode_trigger=episode_trigger
)

qlearning_agent = QLearningAgent(
    state_space=env.observation_space,
    action_space=env.action_space,
    seed=0
)

sarsa_agent = SARSAAgent(
    state_space=env.observation_space,
    action_space=env.action_space,
    seed=0
)

trainer = Trainer()
qlearning_results = test_agent(env, qlearning_agent, trainer, qlearning_hyperparameter_list, num_experiments=1)
sarsa_results = test_agent(env, sarsa_agent, trainer, sarsa_hyperparameter_list, num_experiments=1)

env.close()

## Reward Shaping experiment

In [None]:
class RewShaper(gym.Wrapper):

    def __init__(self, env):
        super().__init__(env)
        self.prev_vel = 0

    def step(self, action):
        observation, reward, terminated, truncated, info = self.env.step(action)
        reward += 100*(action-1)*self.prev_vel
        self.prev_vel = observation[1]
        return observation, reward, terminated, truncated, info

class MountainCarTrainer(Trainer):
    def compute_score(self, reward):
        return -1

In [None]:
env = gym.make('MountainCar-v0', render_mode="rgb_array")
env = RecordVideo(
    env,
    video_folder="backups/mountaincar-qlearning-visualizations",
    name_prefix="eval",
    episode_trigger=episode_trigger
)
env = RewShaper(env)

qlearning_agent = QLearningAgent(
    state_space=env.observation_space,
    action_space=env.action_space,
    seed=0
)

sarsa_agent = SARSAAgent(
    state_space=env.observation_space,
    action_space=env.action_space,
    seed=0
)

trainer = MountainCarTrainer()
qlearning_results = test_agent(env, qlearning_agent, trainer, qlearning_hyperparameter_list, num_experiments=1)
sarsa_results = test_agent(env, sarsa_agent, trainer, sarsa_hyperparameter_list, num_experiments=1)

env.close()

In [None]:
combined_results = qlearning_results
plot_test_results(combined_results, [0])

In [None]:
table = np.array(qlearning_agent.q_table.table)
im = np.reshape(table, (20,20,3))
im = (im-np.min(im))/(np.max(im)-np.min(im))
plt.imshow(im)