## Watch a Soft-Actor-Critic Agent!

### 1.Start the Environment for Trained Agent

In [1]:
import numpy as np
import torch
import gym
import time
from  collections  import deque

from sac_agent import soft_actor_critic_agent

env = gym.make('BipedalWalker-v3')

# Set seeds
seed = 0 ## 12345
env.seed(seed)
torch.manual_seed(seed)
np.random.seed(seed)

eval=True 
LEARNING_RATE=0.0001
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('device: ', device)

state_size = env.observation_space.shape[0]
action_size=env.action_space.shape[0]
action_high= float(env.action_space.high[0])
print('state_size: ', state_size, ', action_size: ', action_size, ', action_high: ', action_high)
    
agent = soft_actor_critic_agent(env.observation_space.shape[0], env.action_space, \
        device=device, hidden_size=256, lr=LEARNING_RATE, gamma=0.99, tau=0.005, alpha=0.2)



device:  cuda:0
state_size:  24 , action_size:  4 , action_high:  1.0


### 2. Prepare Load

In [2]:
def load(agent, actor, critic):
    print('Load model from {} and {}'.format(actor, critic))
    agent.policy.load_state_dict(torch.load(actor))
    agent.critic.load_state_dict(torch.load(critic))


### 3. Prepare Player

In [3]:
def play(env, agent, num_episodes):
    
    state = env.reset()
    scores_deque = deque(maxlen=100)
    scores = []
    
    for i_episode in range(num_episodes + 1):
        
        state = env.reset()
        score = 0                    
        time_start = time.time()
        
        while True:
            
            ## action = agent.select_action(np.array(state)) ## TD3
            action = agent.select_action(state, eval=True)
            env.render()
            next_state, reward, done, _ = env.step(action)
            score += reward 
            state = next_state
    
            if done:
                break
                
        s = (int)(time.time() - time_start)
        
        scores_deque.append(score)
        scores.append(score)    
        
        print('Episode {}\tAverage Score: {:.2f},\tScore: {:.2f} \tTime: {:02}:{:02}:{:02}'\
                  .format(i_episode, np.mean(scores_deque), score, s//3600, s%3600//60, s%60))
    env.close()
    

### 4. Load and Play,  lr = 0.0001, 756 episodes

In [4]:
import os
base = os.getcwd() + '/'
base

'/home/dan/licenta/main/rl-ticket/BipedalWalker-Soft-Actor-Critic/'

In [5]:
actor = base+'dir_chk/weights_actor_lr0001.pth'    
critic = base+'dir_chk/weights_critic_lr0001.pth'

load(agent, actor, critic)
play(env, agent, num_episodes=1)

Load model from /home/dan/licenta/main/rl-ticket/BipedalWalker-Soft-Actor-Critic/dir_chk/weights_actor_lr0001.pth and /home/dan/licenta/main/rl-ticket/BipedalWalker-Soft-Actor-Critic/dir_chk/weights_critic_lr0001.pth
Episode 0	Average Score: 303.84,	Score: 303.84 	Time: 00:00:35
Episode 1	Average Score: 304.34,	Score: 304.84 	Time: 00:00:13


### 5. Load and Play,  lr = 0.0005, 540 episodes

In [7]:
actor = 'dir_chk\weights_actor_lr0005.pth'    
critic = 'dir_chk\weights_critic_lr0005.pth'

load(agent, actor, critic)
play(env, agent, num_episodes=3)

Load model from dir_chk\weights_actor_lr0005.pth and dir_chk\weights_critic_lr0005.pth


FileNotFoundError: [Errno 2] No such file or directory: 'dir_chk\\weights_actor_lr0005.pth'

### 6. Load and Play,  lr = 0.00008,  408 episodes

In [None]:
actor = 'dir_chk\weights_actor_lr00008.pth'    
critic = 'dir_chk\weights_critic_lr00008.pth'

load(agent, actor, critic)
play(env, agent, num_episodes=3)

Load model from dir_chk\weights_actor_lr00008.pth and dir_chk\weights_critic_lr00008.pth
Episode 0	Average Score: 303.68,	Score: 303.68 	Time: 00:00:18
Episode 1	Average Score: 305.97,	Score: 308.26 	Time: 00:00:18
Episode 2	Average Score: 306.69,	Score: 308.13 	Time: 00:00:18


In [None]:
env.close()