## Watch a Deep Q-Network MountainCar Agent! 

### 1.Start the Environment 

In [1]:
import gym
import torch
from agent import Agent, FloatTensor
import time
import torch

env = gym.make('MountainCar-v0')

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

state_dim =  env.observation_space.shape[0] # n_spaces =
action_dim = env.action_space.n # n_actions = 
hidden_dim = 16
agent = Agent(state_dim, action_dim, hidden_dim, lr=0.01, device=device)        
print('input_dim: ', state_dim, ', output_dim: ', action_dim, ', hidden_dim: ', hidden_dim)


input_dim:  2 , output_dim:  3 , hidden_dim:  16


### 2. Prepare Load

In [2]:
def load(agent, directory, filename):
    agent.q_local.load_state_dict(torch.load('%s/%s_local.pth' % (directory,  filename)))
    agent.q_target.load_state_dict(torch.load('%s/%s_target.pth' % (directory,  filename)))


### 3. Prepare Player

In [3]:
from collections import deque
import os
import numpy as np

def play(env, agent, n_episodes):
    state = env.reset()
    
    scores_deque = deque(maxlen=100)

    for i_episode in range(1, n_episodes+1):
        s = env.reset()        
        
        total_reward = 0
        time_start = time.time()
        timesteps =  0
        
        while True:
            
            a = agent.get_action(FloatTensor([s]), check_eps=False, eps=0.01)
            env.render()
            s2, r, done, _ = env.step(a.item())
            s = s2
            total_reward += r
            timesteps += 1
            
            if done:
                break 

        delta = (int)(time.time() - time_start)http://localhost:8892/notebooks/WatchAgent-MountainCar-DQN.ipynb#
        
        scores_deque.append(total_reward)

        print('Episode {}\tAverage Score: {:.2f}, \t Timesteps: {} \tTime: {:02}:{:02}:{:02}'\
                  .format(i_episode, np.mean(scores_deque), timesteps,\
                          delta//3600, delta%3600//60, delta%60))

### 4. Load and Play

In [4]:
load(agent, 'dir_chk', 'MountainCar')
play(env=env, agent=agent, n_episodes=10)

Episode 1	Average Score: -87.00, 	 Timesteps: 87 	Time: 00:00:02
Episode 2	Average Score: -90.00, 	 Timesteps: 93 	Time: 00:00:01
Episode 3	Average Score: -95.67, 	 Timesteps: 107 	Time: 00:00:01
Episode 4	Average Score: -98.50, 	 Timesteps: 107 	Time: 00:00:01
Episode 5	Average Score: -101.40, 	 Timesteps: 113 	Time: 00:00:01
Episode 6	Average Score: -102.33, 	 Timesteps: 107 	Time: 00:00:01
Episode 7	Average Score: -99.86, 	 Timesteps: 85 	Time: 00:00:01
Episode 8	Average Score: -100.75, 	 Timesteps: 107 	Time: 00:00:01
Episode 9	Average Score: -100.89, 	 Timesteps: 102 	Time: 00:00:01
Episode 10	Average Score: -101.60, 	 Timesteps: 108 	Time: 00:00:01


In [None]:
env.close()