## Watch a Deep Q-Network Agent! 

### 1.Start the Environment 

In [1]:
import gym
import numpy as np
import torch

import ddpg_agent_1
import ddpg_agent_2
import time

env = gym.make('LunarLanderContinuous-v2')

print('torch version: ', torch.__version__)

seed = 0
env.seed(seed)
torch.manual_seed(seed)
np.random.seed(seed)

state = env.reset()
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0] 

print('input_dim: ', state_dim, ', output_dim: ', action_dim) 

agent1 = ddpg_agent_1.Agent(state_dim, action_dim, random_seed=8)        
agent2 = ddpg_agent_2.Agent(state_dim, action_dim, random_seed=8)        


torch version:  1.2.0
input_dim:  8 , output_dim:  2


### 2. Prepare Load

In [2]:
def load(agent, directory, filename):
    agent.actor_local.load_state_dict(torch.load('%s/%s_actor_local.pth' % (directory,  filename)))
    agent.actor_target.load_state_dict(torch.load('%s/%s_actor_target.pth' % (directory,  filename)))
    agent.critic_local.load_state_dict(torch.load('%s/%s_critic_local.pth' % (directory,  filename)))
    agent.critic_target.load_state_dict(torch.load('%s/%s_critic_target.pth' % (directory,  filename)))

### 3. Prepare Player

In [3]:
from collections import deque
import os
import numpy as np

def play(env, agent, n_episodes):
    
    state = env.reset()
    scores_deque = deque(maxlen=100)

    for i_episode in range(1, n_episodes+1):
        state = env.reset()
        agent.reset()
        
        total_reward = 0
        time_start = time.time()
        timesteps =  0
        
        done = False

        while True:
            
            action = agent.act(state, add_noise=False)
            env.render()
            next_state, reward, done, _ = env.step(action)
            total_reward += reward
            
            #agent.step(state, action, reward, next_state, done, timesteps)
            state = next_state
            timesteps += 1
            
            if done:
                break 

        delta = (int)(time.time() - time_start)
        
        scores_deque.append(total_reward)

        print('Episode {}\tAverage Score: {:.2f}, \t Timesteps: {} \tTime: {:02}:{:02}:{:02}'\
                  .format(i_episode, np.mean(scores_deque), timesteps,\
                          delta//3600, delta%3600//60, delta%60))

### 4. Load and Play (LunarLanderContinuous-v2-DDPG_746 epis.ipynb)

In [4]:
load(agent1, 'dir_chk_1', 'LLC-v2')
play(env=env, agent=agent1, n_episodes=7)

Episode 1	Average Score: 261.48, 	 Timesteps: 363 	Time: 00:00:12
Episode 2	Average Score: 267.51, 	 Timesteps: 336 	Time: 00:00:05
Episode 3	Average Score: 252.92, 	 Timesteps: 287 	Time: 00:00:04
Episode 4	Average Score: 250.10, 	 Timesteps: 321 	Time: 00:00:05
Episode 5	Average Score: 251.13, 	 Timesteps: 275 	Time: 00:00:04
Episode 6	Average Score: 253.06, 	 Timesteps: 283 	Time: 00:00:04
Episode 7	Average Score: 251.09, 	 Timesteps: 314 	Time: 00:00:05


### 5. Load and Play (LunarLanderContinuous-v2-DDPG_2560.ipynb)

In [6]:
load(agent2, 'dir_chk_2', 'LunarLanderContinuous-v2')
play(env=env, agent=agent2, n_episodes=7)

Episode 1	Average Score: 252.94, 	 Timesteps: 347 	Time: 00:00:05
Episode 2	Average Score: 250.06, 	 Timesteps: 202 	Time: 00:00:03
Episode 3	Average Score: 253.39, 	 Timesteps: 261 	Time: 00:00:04
Episode 4	Average Score: 257.17, 	 Timesteps: 261 	Time: 00:00:04
Episode 5	Average Score: 259.15, 	 Timesteps: 219 	Time: 00:00:03
Episode 6	Average Score: 245.38, 	 Timesteps: 382 	Time: 00:00:06
Episode 7	Average Score: 239.42, 	 Timesteps: 246 	Time: 00:00:04


In [None]:
env.close()