#### run global setup

In [None]:
try:
    with open("../global_setup.py") as setupfile:
        exec(setupfile.read())
except FileNotFoundError:
    print('Setup already completed')

#### run local setup

In [None]:
%matplotlib inline

import gym
from gym import logger

logger.set_level(logger.ERROR)

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from tqdm import tqdm

from src.rl.RandomAgent import RandomAgent
from src.rl.util import run_episode
from src.rl.TabularQAgent import TabularQAgent
from src.rl.NeuralQAgent import NeuralQAgent

## CartPole Challenge

In [None]:
env = gym.make('CartPole-v1')
env.reset()
plt.imshow(env.render(mode='rgb_array'))
env.close()

In [None]:
env = gym.make('CartPole-v1')
agent = RandomAgent()
run_episode(env, agent, render=True)
dist = [run_episode(env, agent) for _ in range(1000)]

In [None]:
agent = TabularQAgent(0.1, 0.5, 0.99)

def run_experiment(env, agent, epsilon_decay, n_episodes) -> list:
    rewards = []
    for i in tqdm(range(n_episodes)):
        sum_r = run_episode(env, agent, learn=True)
        rewards.append(sum_r)
        agent.epsilon *= epsilon_decay
    agent.epsilon = 0
    sum_r = run_episode(env, agent)
    print('Trained for ', n_episodes, ' episodes. Last episode achieved a reward of ', sum_r)     
    #env.render(mode='path', ss=ss)
    return rewards


#run_episode(env, agent, learn=True)
rewards = run_experiment(env, agent, 0.99, 1000)
sns.tsplot(rewards)
sns.despine()

Compare random histogram to histogram of training - does it look different? How come?

In [None]:
sns.distplot(rewards, kde=False)

In [None]:
agent = NeuralQAgent(4, env.action_space.n, alpha=0.001, gamma=0.95, epsilon=1.0)
rewards = run_experiment(env, agent, 0.995, 1000)
sns.tsplot(rewards)
sns.despine()

In [None]:
dist = [run_episode(env, agent) for _ in tqdm(range(200))]
sns.distplot(dist, kde=False)
print("Mean neural agent reward: ", np.mean(dist))

In [None]:
env = gym.make('CartPole-v1')
run_episode(env, agent, render=True)