#### Loading Libraries

In [1]:
import os, sys
import gymnasium as gym
import time

import matplotlib.pyplot as plt
import text_flappy_bird_gym

from plotting.plotting import plot_value_function
from Agents.TemporalDifferenceAgent import TemporalDifferenceAgent
from Agents.ExpectedSarsa import ExpectedSarsa

from __future__ import annotations

#### Defining Environment to Train Agents with

In [2]:
height = 15
width = 20 
pipe_gap = 4
env = gym.make(
    'TextFlappyBird-v0', 
    height = height, 
    width = width, 
    pipe_gap = pipe_gap
)


#### Defining Search Grids for TemporalDifferenceAgent(1)

Parameters to tune:

* Epsilon
* Gamma
* Alpha


In [32]:
# TDAgent_maxScore49_w20_h15_nAct2_eps0.1_nSteps1_gamma1_alpha0.001


epsilon_grid = [
    1e-1,
    1e-2, 
    1e-3,
    2e-2,
    2e-2, 
    2e-3, 
]
gamma_grid = [
    1, 
    75e-2, 
    5e-1,
]
alpha_grid = [
    1, 
    5e-1, 
    1e-1, 
    5e-2, 
    1e-2, 
    5e-3, 
    1e-3
]
n_steps_grid = [
    1
]

#### Defining Number of Episodes for each search, Patience for the search and Random Seed  

In [33]:
PATIENCE = int(1e+3)
N_EPISODES = int(5e+5)
SEED = 1024

#### Constructing Grid 

In [34]:
agents = []
for epsilon in epsilon_grid:
    for gamma in gamma_grid:
        for alpha in alpha_grid:
            for n_steps in n_steps_grid:
                agent = ExpectedSarsa(
                    width, 
                    height,
                    n_actions = 2,
                    alpha = alpha, 
                    epsilon = epsilon, 
                    gamma = gamma, 
                    n_steps = n_steps, 
                    debug = 0, 
                    verbose = 0,
                    seed = SEED, 
                    consider_height = False
                )
                agents.append(agent)

In [35]:
print(len(agents))

126


#### Grid Search loop

In [None]:
from tqdm import tqdm
for agent in tqdm(agents):
    print("###############################################################")
    agent.train_n_episodes(env, N_EPISODES, patience = PATIENCE)