## Watch a Deep Q-Network Agent! 

### 1.Start the Environment 

In [1]:
import gym
import torch
from agent import Agent, FloatTensor
import time

env = gym.make('LunarLander-v2')

state_dim =  env.observation_space.shape[0] # n_spaces =
action_dim = env.action_space.n # n_actions = 
hidden_dim = 16
agent = Agent(state_dim, action_dim, hidden_dim)        
print('input_dim: ', state_dim, ', output_dim: ', action_dim, ', hidden_dim: ', hidden_dim)


input_dim:  8 , output_dim:  4 , hidden_dim:  16


### 2. Prepare Load

In [2]:
def load(agent, directory, filename):
    agent.q_local.load_state_dict(torch.load('%s/%s_local.pth' % (directory,  filename)))
    agent.q_target.load_state_dict(torch.load('%s/%s_target.pth' % (directory,  filename)))


### 3. Prepare Player

In [3]:
from collections import deque
import os
import numpy as np

def play(env, agent, n_episodes):
    state = env.reset()
    
    scores_deque = deque(maxlen=100)

    for i_episode in range(1, n_episodes+1):
        s = env.reset()        
        
        total_reward = 0
        time_start = time.time()
        timesteps =  0
        
        while True:
            
            a = agent.get_action(FloatTensor([s]), check_eps=False, eps=0.01)
            env.render()
            s2, r, done, _ = env.step(a.item())
            s = s2
            total_reward += r
            timesteps += 1
            
            if done:
                break 

        delta = (int)(time.time() - time_start)
        
        scores_deque.append(total_reward)

        print('Episode {}\tAverage Score: {:.2f}, \t Timesteps: {} \tTime: {:02}:{:02}:{:02}'\
                  .format(i_episode, np.mean(scores_deque), timesteps,\
                          delta//3600, delta%3600//60, delta%60))

### 4. Load and Play

In [4]:
load(agent, 'dir_chk', 'LunarLander-v2')
play(env=env, agent=agent, n_episodes=10)

Episode 1	Average Score: 282.86, 	 Timesteps: 232 	Time: 00:00:08
Episode 2	Average Score: 294.44, 	 Timesteps: 242 	Time: 00:00:04
Episode 3	Average Score: 288.48, 	 Timesteps: 211 	Time: 00:00:03
Episode 4	Average Score: 277.02, 	 Timesteps: 188 	Time: 00:00:03
Episode 5	Average Score: 273.24, 	 Timesteps: 243 	Time: 00:00:04
Episode 6	Average Score: 277.14, 	 Timesteps: 299 	Time: 00:00:04
Episode 7	Average Score: 251.95, 	 Timesteps: 1000 	Time: 00:00:16
Episode 8	Average Score: 240.05, 	 Timesteps: 1000 	Time: 00:00:16
Episode 9	Average Score: 225.79, 	 Timesteps: 1000 	Time: 00:00:16
Episode 10	Average Score: 231.14, 	 Timesteps: 175 	Time: 00:00:02


In [5]:
env.close()