#### run global setup

In [None]:
try:
    with open("../global_setup.py") as setupfile:
        exec(setupfile.read())
except FileNotFoundError:
    print('Setup already completed')

#### run local setup

In [None]:
from tqdm import tqdm
from src.rl.NatureDQN import NatureDQN
from src.rl.AtariAgent import AtariAgent
from src.rl.util import run_episode
import gym

config = {'conv_layers': 3,
          'conv_units': [32, 64, 64],
          'filter_sizes': [8, 4, 3],
          'strides': [4, 2, 1],
          'state_frames': 4,
          'fc_layers': 1,
          'fc_units': [512],
          'in_width': 84,
          'in_height': 84,
          'discount': 0.99,
          'device': '/gpu:0',
          'lr': 0.00025,
          'opt_decay': 0.95,
          'momentum': 0.0,
          'opt_eps': 0.01,
          'clip_delta': 1.0,
          'tensorboard': False,
          'tensorboard_freq': 50,
          'ckpt': 0,
          'random_seed': 42,
          'hist_size': 1e6,
          'batch_size': 32,
          'eps': 1.0,
          
          }

### Breakout

In [None]:
env = gym.make('Breakout-v4')
config['num_actions'] = env.action_space.n
net = NatureDQN(config)
#net.load('src/rl/trained/breakout')
agent = AtariAgent(env, net, config)

In [None]:
s = env.reset()
s.shape

In [None]:
run_episode(env, agent, render=True, render_delay=0.03)

In [None]:
def run_experiment(env, agent, epsilon_decay, n_episodes) -> list:
    rewards = []
    for i in tqdm(range(n_episodes)):
        agent.reset()
        sum_r = run_episode(env, agent, learn=True)
        rewards.append(sum_r)
        agent.eps *= epsilon_decay
        if i % 1e4 == 0:
            print("Episode ", i, " reward: ", sum_r)
            agent.sync_target()
    agent.epsilon = 0
    sum_r = run_episode(env, agent)
    print('Trained for ', n_episodes, ' episodes. Last episode achieved a reward of ', sum_r)     
    return rewards


run_experiment(env, agent, 9e-7, 10)