## Watch a Smart Agent!

### 1.Start the Environment for Trained Agent

In [None]:
import numpy as np
import torch
import gym
import os
import time
import pybullet_envs

from gym import wrappers as w
from TwinDelayed import TD3

env = gym.make('HalfCheetahBulletEnv-v0', render=True)
env = w.monitor.Monitor(env, directory='./videos_hc1/')

# Set seeds
seed = 12345
env.seed(seed)
torch.manual_seed(seed)
np.random.seed(seed)

state_size = env.observation_space.shape[0]
action_size=env.action_space.shape[0]
action_high= float(env.action_space.high[0])
print('state_size: ', state_size, ', action_size: ', action_size, ', action_high: ', action_high)
    
agent = TD3(state_dim=state_size, action_dim=action_size, max_action=action_high)


### 2. Prepare Load

In [None]:
def load(agent, dir, prefix):
    agent.actor.load_state_dict(torch.load(os.path.join(dir,'%s_actor.pth' % prefix)))
    agent.critic.load_state_dict(torch.load(os.path.join(dir,'%s_critic.pth' % prefix)))
    agent.actor_target.load_state_dict(torch.load(os.path.join(dir,'%s_actor_t.pth' % prefix)))
    agent.critic_target.load_state_dict(torch.load(os.path.join(dir,'%s_critic_t.pth' % prefix)))


### 3. Prepare Player

In [None]:
from collections import deque
import os

def play(env, agent, n_episodes):
    state = env.reset()
    
    scores_deque = deque(maxlen=100)
    scores = []

    for i_episode in range(1, n_episodes+1):
        state = env.reset()        
        score = 0
        
        time_start = time.time()
        
        while True:
            action = agent.select_action(np.array(state))
            env.render()
            time.sleep(0.01)
            next_state, reward, done, _ = env.step(action)
            state = next_state
            score += reward
            if done:
                break 

        s = (int)(time.time() - time_start)
        
        scores_deque.append(score)
        scores.append(score)

        print('Episode {}\tAverage Score: {:.2f},\tScore: {:.2f} \tTime: {:02}:{:02}:{:02}'\
                  .format(i_episode, np.mean(scores_deque), score, s//3600, s%3600//60, s%60))  


### 3. Load and Play

In [None]:
load(agent, 'dir_chk_005', 'chpnt_2')
play(env, agent, n_episodes=7)

In [None]:
env.close()