In [11]:
%load_ext autoreload
%autoreload
import gym
from rl.algo.drep import DREP_SAC
import os
import torch as th
import time
import numpy as np
from gym.spaces import Box
from rl.module.drep_networks import DREPNet
from rl.module.general import MLPExtractor
from rl.vecenv import vecenv
from gym.wrappers.record_video import RecordVideo
from pyvirtualdisplay import Display

import torch.utils.tensorboard

#, critic_coef=1, ent_coef=0.01, pred_coef=0.0, gamma=0.99, epsilon=0.2, lamda=0.95, policy_epochs=4

#virtual_display = Display(visible=0, size=(1400, 900))
#virtual_display.start()


alg_args = {
    'name' : 'nomix',
    'nenvs' : 1,
    'n_steps' : 256,
    'gradient_steps' : 256,
    'gamma' : 0.98,
    'capacity' : 1000,
    'train_steps' : 1e6,
    'batch_size' : 128,
    'train_start' : 10000,
    'lr' : 3e-4,
    'target_polyak' : 0.005,

}


def train(model, env, log_interval=0.01, save_interval=0.1):
    prevtime = time.time()
    saves = 1
    
    next_log = log_interval
    next_save = save_interval
    while next_log <= model.progress:
        next_log += log_interval
    while next_save <= model.progress:
        next_save += save_interval


    while os.path.isdir(f'logs/{model.name}/valid/{saves}'):
        saves += 1

    while model.progress < 1:
        print(model.progress)
        model.train_epoch(env)
        if model.progress >= next_log:
            next_log += log_interval
            avg = np.mean(env.recent_scores)
            print(f"Average score:\t{round(avg,3)}")
            print(f"progress:\t{round(model.progress * 100, 2)}%")
            currtime = time.time()
            time_passed = currtime - prevtime
            print(f"elapsed time:\t{round(time_passed, 3)} second")
            print(f"time left:\t{round(time_passed*(1-model.progress)/log_interval/3600, 3)} hour")
            prevtime = currtime
            model.write_log('Average_score', avg)
            print('-----------------------------------------------------------')


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [12]:

device = th.device('cuda' if th.cuda.is_available() else 'cpu')
istrain = True

env_name = 'LunarLanderContinuous-v2'

env = gym.make(env_name)
envs = vecenv(alg_args['nenvs'], env_name, {})

extractor = MLPExtractor(env.observation_space.shape[0], 64, 256)
network = DREPNet(extractor, env.action_space.shape[0], 256)

model = DREP_SAC(device, network, **alg_args)

  return np.array(self.states)


In [77]:
train(model, envs, save_interval=0.1)
model.save_model(f'saves/{model.name}/last.pth')

In [13]:
model.load_model(f'saves/{model.name}/last.pth')
os.makedirs(f'./logs/{model.name}/video', exist_ok=True)

env = gym.make(env_name, render_mode="rgb_array")
env = RecordVideo(env, f'./logs/{model.name}/video')
state, _ = env.reset()
done = False
goal = th.zeros(size=(1, 64), device=device)
while not done:
    tstate = th.as_tensor(state[None], dtype=th.float32).to(device)
    action = model.model.get_action(tstate, goal)
    state, reward, done, _, info = env.step(action[0])

env.close()

Model loaded


  logger.warn(


Moviepy - Building video /home/depthfirst/RLModel/logs/nomix/video/rl-video-episode-0.mp4.
Moviepy - Writing video /home/depthfirst/RLModel/logs/nomix/video/rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready /home/depthfirst/RLModel/logs/nomix/video/rl-video-episode-0.mp4


In [4]:
print(state)

(array([-1.2124062e-03,  1.4221095e+00, -1.2282413e-01,  4.9730101e-01,
        1.4117146e-03,  2.7821491e-02,  0.0000000e+00,  0.0000000e+00],
      dtype=float32), {})
