In [2]:
from model import build_model
from rl_lib.agents import DQNAgent
from rl_lib.memory import ReplayMemory
from rl_lib.policy import EpsilonGreedyQPolicy, GreedyQPolicy
from rl_lib.epsilon_decay import EpisodicDecay

import gym

In [3]:
def build_agent(model, target_model, nb_actions):
    memory = ReplayMemory(10000)
    decay = EpisodicDecay()
    policy = EpsilonGreedyQPolicy(eps=0.1)
    test_policy = GreedyQPolicy()
    return DQNAgent(nb_actions, model, memory, policy, test_policy, target_model)

In [4]:
env = gym.make('CartPole-v0')
nb_actions = env.action_space.n
state_shape = (1, env.observation_space.shape[0])

In [5]:
model = build_model(state_shape, nb_actions)
model.compile(optimizer='adam', loss='mse', metrics=['mse'])
target_model = build_model(state_shape, nb_actions)
target_model.compile(optimizer='adam', loss='mse', metrics=['mse'])
agent = build_agent(model, target_model, nb_actions)

In [6]:
agent.warmup(env, 100)



In [7]:
agent.fit(env, 1000)

1000/1000 Completed - 100.00%

{'episode_steps': [10,
  11,
  12,
  11,
  11,
  11,
  12,
  10,
  13,
  10,
  28,
  11,
  13,
  11,
  10,
  104,
  37,
  28,
  33,
  48,
  20,
  47,
  24,
  45,
  28,
  40,
  24,
  35,
  20,
  27,
  29,
  19,
  30,
  35,
  20,
  39,
  17,
  30,
  37,
  32],
 'episode_rewards': [9.0,
  10.0,
  11.0,
  10.0,
  10.0,
  10.0,
  11.0,
  9.0,
  12.0,
  9.0,
  27.0,
  10.0,
  12.0,
  10.0,
  9.0,
  103.0,
  36.0,
  27.0,
  32.0,
  47.0,
  19.0,
  46.0,
  23.0,
  44.0,
  27.0,
  39.0,
  23.0,
  34.0,
  19.0,
  26.0,
  28.0,
  18.0,
  29.0,
  34.0,
  19.0,
  38.0,
  16.0,
  29.0,
  36.0,
  31.0]}

In [None]:
agent.test(env, 100, visualize=False)

In [8]:
agent.policy.eps

0.1