In [1]:
import os
import sys
import time
import subprocess
from PIL import Image
import numpy as np
import torch
import yaml
from torchvision import transforms
from experiment import VAEXperiment
from models import *

import gymnasium as gym
from gymnasium import ObservationWrapper
from gymnasium.wrappers import PixelObservationWrapper, FrameStack
from gymnasium.spaces import Box, Discrete

from stable_baselines3 import SAC, PPO, A2C
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.callbacks import CallbackList, CheckpointCallback, EvalCallback
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.evaluation import evaluate_policy


In [46]:
-26.3832
-24.6370
-26.1828
-28.7215
-27.0238
-25.9470
-25.6194
-0.0259
-0.0199
-26.8950
-25.4861
-27.1048
-0.0252
-26.1438
-25.2564
-26.2054
-26.6818
-24.7952
-24.4125
-0.0241
-30.6486
-27.1290
-25.1966
-21.5085
-0.0279
-25.6476
-24.1161
-26.1040
-23.2622
-8.0783
-0.0291
-0.0200
-0.0287
-27.9851
-23.4676
-0.0314
-25.9330
-24.8221
-0.0239
-27.2952
-27.2766
-0.0359
-28.5562
-24.7911
-0.0197
-0.1621
-28.1487
-25.1702
-24.3624
-25.9836
-25.3753
-24.8576
-28.1595
-26.1673
-0.0271
-0.3942
-24.5561
-24.7331
-25.8240
-0.0300
-0.9674
-25.4803
-26.9636
-23.9666

-23.9666

In [47]:
def get_vae(version='version_0',log_directory='logs/BCE_test_VAE_1/MSSIMVAE/',
            hparam_path = "configs/bces_vae.yaml"):
    #model_path= log_directory+'/'+version+'/hparams.yaml'
    ckpt_path=log_directory+'/'+version+'/checkpoints/last.ckpt'

    config = yaml.safe_load(open(hparam_path))
    model = vae_models[config['model_params']['name']](**config['model_params'])
    # Check if CUDA is available
    if torch.cuda.is_available():
        # Load the serialized object using CUDA if available
        ckpt = torch.load(ckpt_path)
    else:
        # Load the serialized object onto the CPU if CUDA is not available
        ckpt = torch.load(ckpt_path, map_location=torch.device('cpu'))
    experiment = VAEXperiment(model, config['exp_params'])
    experiment.load_state_dict(ckpt['state_dict'])      
    vae = experiment.model
    return vae

#Make a funciton to create environment, this allows to vectorize it
def make_env(env_id: str = "MountainCarContinuous-v0", rank: int = 0, seed: int = 42, 
            data_dir: str = "Data/MountainCar/test2", collect_frames: bool = True, env_iterator: int = 0,
            vae_version: int = 0, latent_dim: int = 1,
            vae_directory: str = 'logs/MountainCar/BCE_test_VAE_1/MSSIMVAE/',
            hparam_path: str = "configs/bces_no_pretrained.yaml"):
    def _init():
        save_path= data_dir+'/train_env_id_'+str(env_iterator)+'_nenv_'+str(rank)+'_'
        vae = get_vae(version='version_'+str(vae_version),
                      log_directory = vae_directory,
                      hparam_path = hparam_path)
        
        env = gym.make(env_id,
                    render_mode ='rgb_array')
        env = Monitor(env)
        seed = 42
        env.reset(seed=seed + rank)
        env = PixelObservationWrapper(env)
        if collect_frames:
            env = frame_saver(env, save_path)
        env = VAE_ENC(env, vae, latent_dim)
        env = FrameStack(env, num_stack=2)
        
        return env
    set_random_seed(seed)
    return _init

In [48]:
#save frame wrapper class for env
class frame_saver(ObservationWrapper):
    def __init__(self, env,
                 collect_frames_dir = None,
                 start_index = 0):
        super().__init__(env)
        
        self.collect_frames_dir = collect_frames_dir
        self.frame_idx = start_index
                
        
    def observation(self, obs):
        frame = obs['pixels']#.to('cuda')
        if self.collect_frames_dir != None:
            im = Image.fromarray(np.array(frame))
            im.save(self.collect_frames_dir+'_'+str(self.frame_idx)+'.jpeg')
            self.frame_idx += 1
        return obs

# to add Gaussian noise to the observations
class AddGaussianNoise(object):
    def __init__(self, mean=0., std=0.1):
        self.std = std
        self.mean = mean
        
    def __call__(self, tensor):
        return tensor + torch.randn(tensor.size()) * self.std + self.mean
    
    def __repr__(self):
        return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)

#create VAE wrapper
class VAE_ENC(ObservationWrapper):
    def __init__(self, env, vae, latent_dim,
                 mean=0,std=0.1,
                 size=(64,64),
                 start_index = 0):
        super().__init__(env)
        #new obs space with std
        #self.observation_space = Box(shape=(2, latent_dim), low=-np.inf, high=np.inf)
        #just mean
        self.observation_space = Box(shape=(latent_dim,), low=-np.inf, high=np.inf)
        
        self.vae = vae
        #transforms
        self.mean = mean
        self.std = std
        self.size = size
        
        self.frame_idx = start_index
        
        
        
    def observation(self, obs):
        #get frame
        #print(obs)
        frame = obs['pixels']#.to('cuda')
        #transform for VAE
        val_transforms = transforms.Compose([transforms.ToTensor(),
        #transforms.RandomHorizontalFlip(),
        AddGaussianNoise(self.mean, self.std),
        transforms.Resize(self.size),
        #transforms.Grayscale(),
        #transforms.Normalize(self.mean, self.std),
        ])
        frame = val_transforms(frame) #(c,h,w)
        frame = torch.unsqueeze(frame, 0)#.to(self.device) #make it (1,c,h,w)
        enc = self.vae.encode(frame)    
        enc = np.array([tensor.detach().cpu().numpy() for tensor in enc])
        #with std
        #enc = np.array([enc[0][0], enc[1][0]]) ## mu, std #  give only mu?
        #just mean
        enc = np.array(enc[0][0])

        return enc

In [49]:
data_name='test1'
save_path='Data/MountainCar/'+data_name+'/'

In [5]:
data_name='test1'
save_path='Data/MountainCar/'+data_name+'/'
num_of_episodes = 1

env = gym.make("MountainCarContinuous-v0",
                render_mode ='rgb_array')

i=0
for episode in range(num_of_episodes):
    observation, info = env.reset()
    done = False
    while not done: 
        action= env.env.action_space.sample()
        observation, reward, terminated, truncated, info = env.step(action)
        #state, reward, terminated, truncated, info = env.step(action)
        #print(terminated, truncated)
        if terminated:
            done = True
        if truncated:
            done = True
        current_frame = env.render()

        i+=1
        im = Image.fromarray(np.array(current_frame))
        im.save(save_path+data_name+'_'+str(i)+'.jpeg')

print('collected frames from', num_of_episodes,"episodes with random action in dir:", save_path)

collected frames from 1 episodes with random action in dir: Data/MountainCar/test1/


In [34]:
#to save the encodings of all pictures used during training of the vae
def save_known_universe(vae, observation_dir, save_dir, save_name):
    os.makedirs(save_dir, exist_ok=True)
    
    # Collect filenames of all JPEG images in the observation directory
    image_filenames = [filename for filename in os.listdir(observation_dir) if filename.lower().endswith('.jpeg')]

    latent_vectors = []

    for filename in image_filenames:
        image_path = os.path.join(observation_dir, filename)
        image = Image.open(image_path)
        image = np.array(image)

        # Process image through the VAE
        latent_vector = encode_image(vae, image)
        latent_vectors.append(latent_vector)

    # Save latent vectors as a CSV file
    file_name = save_name + '_latent_vectors.csv'
    csv_path = os.path.join(save_dir, file_name)
    print(latent_vectors)
    np.savetxt(csv_path, np.array(latent_vectors), delimiter=',')

    
    
 #vae.encode produces [[[mu1,mu2,...]],[[std1,std2...]]] --> enc[0]=[[mu1,mu2,..]]
    
    
def encode_image(vae, image):
    val_transforms = transforms.Compose([transforms.ToTensor(),
        #transforms.RandomHorizontalFlip(),
        AddGaussianNoise(0, 0.1),
        transforms.Resize((64,64)),
        #transforms.Grayscale(),
        #transforms.Normalize(self.mean, self.std),
        ])
    image = val_transforms(image)
    image = torch.unsqueeze(image, 0)
    enc = vae.encode(image)
    enc = np.array([tensor.detach().cpu().numpy() for tensor in enc])
    print(enc)
    print('ENC', enc[0][0])
    return enc[0][0] #vae.encode produces [[[mu1,mu2,...]],[[std1,std2...]]] --> enc[0]=[[mu1,mu2,..]]

In [37]:
def save_known_universe(vae, observation_dir, save_dir, save_name, batch_size):
    os.makedirs(save_dir, exist_ok=True)
    
    # Collect filenames of all JPEG images in the observation directory
    image_filenames = [filename for filename in os.listdir(observation_dir) if filename.lower().endswith('.jpeg')]

    latent_vectors = []
      
    for i in range(0, len(image_filenames), batch_size):
        batch_filenames = image_filenames[i:i + batch_size]
        batch_images = [Image.open(os.path.join(observation_dir, filename)) for filename in batch_filenames]
        batch_encodings = encode_images_batch(vae, batch_images)
        latent_vectors.extend(batch_encodings)

    # Save latent vectors as a CSV file
    file_name = save_name + '_latent_vectors2.csv'
    csv_path = os.path.join(save_dir, file_name)
    print(latent_vectors)
    np.savetxt(csv_path, np.array(latent_vectors), delimiter=',')


    
def encode_images_batch(vae, images):
    val_transforms = transforms.Compose([transforms.ToTensor(),
        #transforms.RandomHorizontalFlip(),
        AddGaussianNoise(0, 0.1),
        transforms.Resize((64,64)),
        #transforms.Grayscale(),
        #transforms.Normalize(self.mean, self.std),
        ])
    processed_images = torch.stack([val_transforms(img) for img in images])
    enc = vae.encode(processed_images)
    enc = np.array([tensor.detach().cpu().numpy() for tensor in enc])
    return enc[0]

In [23]:
[ 11.603084 -23.715708 -25.953197]

[-38.065821]

In [39]:
agent_model_dir = "RLmodels/MountainCarContinuous-v0/Double_loop"#where to save the RL agents
agent_log_dir = agent_model_dir+"/logs" #where to log RL progress
vae_name = 'BCE_VAE_3_test1'#"BCE_test1_VAE_2"
vae_directory = 'logs/MountainCar/BCE_VAE_3_test1/MSSIMVAE/' # directory for versions of the vae
latent_dir = agent_model_dir + "/latent_space_encodings/"+vae_name
config_path = "configs/bces_no_pretrained.yaml"
vae_version = 0
latent_save_dir = agent_model_dir + "/latent_space_encodings/"+vae_name

latent_save_name = vae_name+ '_v'+str(vae_version)
vae = get_vae(version='version_'+str(vae_version),
              log_directory = vae_directory,
              hparam_path = config_path)    
save_known_universe(vae, observation_dir = save_path, save_dir = latent_save_dir, save_name=latent_save_name, batch_size=1)

[[[ 11.151729   -14.9741125  -27.892572  ]]

 [[ -0.7281062  -12.011102    -0.12703975]]]
ENC [[ 11.151729  -14.9741125 -27.892572 ]]
[[[ 11.0791645  -15.025645   -27.792711  ]]

 [[ -0.66849893 -12.0893955   -0.29304186]]]
ENC [[ 11.0791645 -15.025645  -27.792711 ]]
[[[ 11.068259   -15.154696   -27.878351  ]]

 [[ -0.6535852  -12.000162    -0.08041926]]]
ENC [[ 11.068259 -15.154696 -27.878351]]
[[[ 11.125714  -15.212889  -27.870562 ]]

 [[ -0.6634645 -12.020565   -0.2022248]]]
ENC [[ 11.125714 -15.212889 -27.870562]]
[[[ 11.100503   -15.119889   -27.823944  ]]

 [[ -0.8709987  -12.065943    -0.38806677]]]
ENC [[ 11.100503 -15.119889 -27.823944]]
[[[ 11.0519905  -15.273967   -27.896212  ]]

 [[ -0.84732443 -12.097748    -0.4061372 ]]]
ENC [[ 11.0519905 -15.273967  -27.896212 ]]
[[[ 11.122836   -14.974782   -27.860735  ]]

 [[ -0.9430581  -12.183885    -0.19026677]]]
ENC [[ 11.122836 -14.974782 -27.860735]]
[array([ 11.151729 , -14.9741125, -27.892572 ], dtype=float32), array([ 11.07916

NameError: name 'latent_vectors' is not defined

In [48]:
n_envs = 4
vae_version = 8
vae_name = "BCE_VAE_l1_test1_A2C"
vae_directory = 'logs/MountainCar/BCE_VAE_1_test1/MSSIMVAE/' # directory for versions of the vae
config_path = "configs/bces_no_pretrained.yaml"
data_name='A2C_l1_envTest'
save_path='Data/MountainCar/'+data_name+'/'

###
### try with make vec env kwargs for different envs
####
env = make_vec_env([make_env(env_id = "MountainCarContinuous-v0", rank=i, 
data_dir = save_path, collect_frames = False, env_iterator = 0,
vae_version = vae_version,
vae_directory = vae_directory,
hparam_path = config_path) for i in range(n_envs)])

TypeError: 'list' object is not callable

In [35]:
agent_model_dir = "RLmodels/MountainCarContinuous-v0/Double_loop"#where to save the RL agents
agent_log_dir = agent_model_dir+"/logs"
n_steps = 100 
agent = A2C(
    device = 'cpu',
    env = env,
    n_steps= n_steps,           
    policy='MlpPolicy',
    ent_coef= 0.0,
    use_sde=True,
    sde_sample_freq = 16,
    policy_kwargs= dict(log_std_init=0.0, ortho_init=False),
    tensorboard_log=agent_log_dir,
    verbose=2)

Using cpu device


In [36]:
agent.learn(total_timesteps=100, tb_log_name='eval_test')

Logging to RLmodels/MountainCarContinuous-v0/Double_loop/logs/eval_test_6


<stable_baselines3.a2c.a2c.A2C at 0x7f49ac5a07f0>

In [106]:
agent.learn(total_timesteps=30000, tb_log_name='eval_test')

Logging to RLmodels/MountainCarContinuous-v0/Double_loop/logs/eval_test_4
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 638      |
|    ep_rew_mean        | 20       |
| time/                 |          |
|    fps                | 66       |
|    iterations         | 100      |
|    time_elapsed       | 59       |
|    total_timesteps    | 4000     |
| train/                |          |
|    entropy_loss       | -2.68    |
|    explained_variance | -1.78    |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | -0.561   |
|    std                | 0.986    |
|    value_loss         | 0.453    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 724      |
|    ep_rew_mean        | 12.8     |
| time/                 |          |
|    fps                | 67       |
|    iterations         | 200      |
|

<stable_baselines3.a2c.a2c.A2C at 0x7fe98aab5d00>

In [37]:
def eval_agent(agent, env, n_eval_episodes,deterministic = True):
    total_rewards = []
    #device = 'cuda'
    #policy = agent.policy.to(device)  # Move observation to the same device as the model

    observation_space = env.observation_space
    print('observation space:', observation_space)
    
    for episode in range(n_eval_episodes):

        result = env.reset()
        
        if len(result) == 2:
            observation, info = result
        else:
            observation = result
        
        episode_reward = 0
        done = False
        terminated = False
        truncated = False
        while not done: 
            action = agent.predict(observation, deterministic=deterministic)
            result = env.step(action)
            if len(result) == 5:
                observation, reward, terminated, truncated, info = result
            else:
                observation, reward, done, info = result
                
            print(info)
            if not observation_space.contains(observation):
                #print("Observation is not valid:", observation)
                observation = observation[0]
                if not observation_space.contains(observation):
                    print("Observation is not valid:", observation)
            episode_reward += reward                  
            
            if terminated or truncated:
                done = True
            
        total_rewards.append(episode_reward)

    total_rewards = np.array(total_rewards)
    
    #agent.policy.to('cuda')
    return np.mean(total_rewards), np.std(total_rewards), total_rewards

In [38]:
n_envs = 1
vae_version = 7
vae_name = "BCE_VAE_l1_test1_A2C"
vae_directory = 'logs/MountainCar/BCE_VAE_1_test1/MSSIMVAE/' # directory for versions of the vae
config_path = "configs/bces_no_pretrained.yaml"
data_name='A2C_l1_envTest'
save_path='Data/MountainCar/'+data_name+'/'

eval_env = Dummy_vec_env([make_env(env_id = "MountainCarContinuous-v0", rank=i, 
data_dir = save_path, collect_frames = False, env_iterator = 0,
vae_version = vae_version,
vae_directory = vae_directory,
hparam_path = config_path) for i in range(n_envs)])


eval_env2 = make_env(env_id = "MountainCarContinuous-v0", rank=0, 
data_dir = save_path, collect_frames = False, env_iterator = 0,
vae_version = vae_version,
vae_directory = vae_directory,
hparam_path = config_path)()

In [39]:
agent_env=agent.env

In [40]:
agent_env.action_space

Box(-1.0, 1.0, (1,), float32)

In [41]:
n = agent_env.num_envs
# Sample n actions for each environment
actions = [agent_env.action_space.sample() for _ in range(n)]
observation, reward, done, info = agent_env.step(actions)

In [42]:
info

[{'TimeLimit.truncated': False},
 {'TimeLimit.truncated': False},
 {'TimeLimit.truncated': False},
 {'TimeLimit.truncated': False}]

In [15]:
eval_agent(agent, eval_env, 1)

observation space: Box(-inf, inf, (2, 1), float32)
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': 

[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLim

[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLim

[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLimit.truncated': False}]
[{'TimeLim

(-99.89843, 0.0, array([[-99.89843]], dtype=float32))

In [109]:
evaluate_policy(agent, agent.env, 100)

(-34.37815818, 0.793758065206614)

In [112]:
eval_agent(agent, eval_env2, 100)

observation space: Box(-inf, inf, (2, 1), float32)


(-64.9869457706169,
 0.6332611851154137,
 array([-65.54779633, -64.15821832, -66.27939134, -65.60961608,
        -65.62113337, -64.18530253, -64.7955757 , -64.81577643,
        -66.00738035, -65.34294653, -65.76723012, -64.75899366,
        -64.29844947, -65.00765884, -64.0627092 , -65.72242984,
        -65.29213533, -65.61524046, -64.66106259, -66.16945472,
        -66.03372352, -65.72191019, -64.24243311, -64.91141584,
        -64.05720713, -64.24350616, -65.44193594, -65.58221013,
        -66.2708616 , -64.5371661 , -64.55955265, -64.95620541,
        -64.26612986, -64.17391884, -64.89183577, -64.39653351,
        -65.34910968, -64.82153159, -65.74422308, -65.54735783,
        -64.57169562, -65.81814991, -65.75047059, -64.63125122,
        -64.48773314, -65.46028272, -64.1997985 , -64.36772931,
        -64.04008981, -65.63868015, -65.38552815, -65.45057354,
        -65.64500567, -64.86161595, -65.2245569 , -64.20104131,
        -64.14788023, -65.34886571, -64.98105704, -65.10390271,

In [113]:
eval_agent(agent, eval_env, 100)

observation space: Box(-inf, inf, (2, 1), float32)


(-64.95953,
 0.6376761,
 array([[-65.872154],
        [-64.10578 ],
        [-65.66641 ],
        [-64.1279  ],
        [-64.65069 ],
        [-65.22105 ],
        [-64.87064 ],
        [-65.19069 ],
        [-65.77256 ],
        [-65.56284 ],
        [-66.3046  ],
        [-65.674095],
        [-65.06393 ],
        [-64.163925],
        [-65.53662 ],
        [-64.67561 ],
        [-64.87065 ],
        [-64.13064 ],
        [-64.41521 ],
        [-64.823654],
        [-65.52386 ],
        [-65.81609 ],
        [-64.68034 ],
        [-65.47413 ],
        [-64.4766  ],
        [-65.658066],
        [-65.529785],
        [-64.804115],
        [-64.168884],
        [-65.395355],
        [-65.17203 ],
        [-65.27459 ],
        [-65.16169 ],
        [-64.09696 ],
        [-64.36147 ],
        [-65.86018 ],
        [-64.08109 ],
        [-64.46386 ],
        [-65.060295],
        [-65.41145 ],
        [-65.674324],
        [-64.022415],
        [-65.57453 ],
        [-64.23982 ],
        

In [100]:
evaluate_policy(agent, agent.env, 100)

(-48.033374, 2.974568889760158)

In [102]:
evaluate_policy(agent, eval_env, 1)

(-88.764527, 0.0)

In [81]:
eval_agent(agent, eval_env2, 100) #determenistic TRUE

observation space: Box(-inf, inf, (2, 1), float32)


(18.19666718762395,
 57.211613506047726,
 array([-74.13086341,  46.61047441,  68.99497337, -79.689522  ,
         32.22826715, -75.76612705,  60.69805386,  50.25380659,
         55.62586184, -74.65099244,  45.43512275, -78.97720712,
         46.58800899,  60.00608876,  42.84013394,  68.43502433,
         34.63521894,  31.61845688, -71.61239745,  52.00564203,
        -82.09105642, -75.31023015,  39.59720439,  60.05953786,
         47.21847079,  64.93506172,  69.65559502, -75.22523841,
        -69.04763241, -80.37794727,  47.71136671,  56.08512363,
         44.88438658,  59.66337752,  68.88819833,  55.40425989,
         53.33746741, -71.26274004,  55.42624926, -78.68579963,
         39.79994321,  56.50712975, -74.12125301,  31.83864824,
         56.25681695,  55.12845546,  33.6463891 ,  60.4923198 ,
         63.95205082,  64.6637173 , -72.79917031, -69.43841077,
        -70.78432101, -75.2363962 ,  47.20230832, -71.64574196,
         62.90654553,  66.71274324,  36.14213414,  55.29058097,

In [92]:
eval_agent(agent, eval_env, 100) #determenistic TRUE

observation space: Box(-inf, inf, (2, 1), float32)


(20.510653,
 57.253517,
 array([[ 69.03984 ],
        [ 32.9879  ],
        [ 69.0011  ],
        [ 60.010036],
        [-71.97538 ],
        [ 46.19172 ],
        [ 41.35769 ],
        [-74.99716 ],
        [-71.63879 ],
        [-74.40619 ],
        [-73.180626],
        [-67.446144],
        [ 64.93894 ],
        [-71.16245 ],
        [ 55.742256],
        [ 41.644413],
        [ 67.87607 ],
        [ 59.08738 ],
        [ 43.032585],
        [-69.99064 ],
        [-78.06071 ],
        [ 51.852776],
        [ 42.553757],
        [ 59.382328],
        [-70.98329 ],
        [ 31.890465],
        [ 42.44451 ],
        [ 68.64244 ],
        [-76.222176],
        [ 41.296265],
        [ 31.743332],
        [ 41.013027],
        [ 69.49202 ],
        [ 63.870594],
        [-73.523705],
        [ 65.16844 ],
        [ 59.99257 ],
        [ 45.683575],
        [ 34.212196],
        [ 36.55285 ],
        [ 73.06198 ],
        [-69.373505],
        [ 65.45111 ],
        [ 69.63164 ],
        

In [93]:
eval_agent(agent, eval_env, 100, deterministic=False)

observation space: Box(-inf, inf, (2, 1), float32)


(-97.54545,
 0.3951888,
 array([[-96.704956],
        [-97.39471 ],
        [-97.77497 ],
        [-97.96563 ],
        [-97.653305],
        [-97.71136 ],
        [-97.42056 ],
        [-97.46863 ],
        [-98.09219 ],
        [-98.500885],
        [-97.2296  ],
        [-96.70008 ],
        [-97.906494],
        [-96.88454 ],
        [-97.7625  ],
        [-97.61216 ],
        [-97.295494],
        [-97.255775],
        [-97.8869  ],
        [-97.1741  ],
        [-97.41812 ],
        [-97.37732 ],
        [-97.401794],
        [-97.554276],
        [-97.73752 ],
        [-96.53244 ],
        [-97.644264],
        [-97.85418 ],
        [-97.59632 ],
        [-97.69297 ],
        [-98.45748 ],
        [-96.77757 ],
        [-97.46465 ],
        [-96.90003 ],
        [-97.431786],
        [-98.104675],
        [-97.715614],
        [-97.61924 ],
        [-97.459305],
        [-96.88639 ],
        [-97.38704 ],
        [-97.10182 ],
        [-97.38044 ],
        [-97.32304 ],
        

In [83]:
eval_agent(agent, eval_env2, 100, deterministic=False) #determenistic False

observation space: Box(-inf, inf, (2, 1), float32)


(-97.49539815458566,
 0.3944803442482428,
 array([-96.85236466, -97.56164347, -96.99394145, -97.61136478,
        -97.66042538, -96.90210404, -97.90576911, -97.56931211,
        -97.7740074 , -97.41650237, -97.03757497, -97.47012034,
        -97.5759842 , -97.21493622, -97.01103755, -97.63889487,
        -97.82222689, -97.74554024, -96.68862462, -97.60121528,
        -97.27993397, -96.85948496, -97.7580608 , -97.6078203 ,
        -97.83730635, -97.70256513, -97.73256868, -97.61603216,
        -97.07685386, -97.38630385, -97.73667439, -97.21137036,
        -98.10424578, -97.55198632, -96.75409409, -97.17446246,
        -98.08297848, -97.38171999, -97.99318251, -97.99493739,
        -97.28431583, -97.19468026, -97.45136121, -98.03961261,
        -97.55684607, -97.98114065, -97.68332776, -96.97079627,
        -97.01530107, -97.46135007, -96.78030142, -96.95532342,
        -97.56910454, -97.67121048, -97.68905473, -97.84071526,
        -98.28745041, -97.19540417, -97.92586882, -97.44072682