##Setup

You will need to make a copy of this notebook in your Google Drive before you can edit the homework files. You can do so with **File &rarr; Save a copy in Drive**.

## Run DQN and Double DQN

In [1]:
#@title imports
import os
import time

from cs285.infrastructure.rl_trainer import RL_Trainer
from cs285.agents.dqn_agent import DQNAgent
from cs285.infrastructure.dqn_utils import get_env_kwargs

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#@title runtime arguments

class Args:

  def __getitem__(self, key):
    return getattr(self, key)

  def __setitem__(self, key, val):
    setattr(self, key, val)

  def __contains__(self, key):
    return hasattr(self, key)

  env_name = 'LunarLander-v3' #@param ['MsPacman-v0', 'LunarLander-v3', 'PongNoFrameSkip-v4']
  exp_name = 'q2_dqn_1' #@param

  ## PDF will tell you how to set ep_len
  ## and discount for each environment
  ep_len = 200 #@param {type: "integer"}

  #@markdown batches and steps
  batch_size = 32 #@param {type: "integer"}
  eval_batch_size = 1000 #@param {type: "integer"}

  num_agent_train_steps_per_iter = 1 #@param {type: "integer"}

  num_critic_updates_per_agent_update = 1 #@param {type: "integer"}
  
  #@markdown Q-learning parameters
  double_q = True #@param {type: "boolean"}

  #@markdown system
  save_params = False #@param {type: "boolean"}
  no_gpu = False #@param {type: "boolean"}
  which_gpu = 0 #@param {type: "integer"}
  seed = 1 #@param {type: "integer"}

  #@markdown logging
  ## default is to not log video so
  ## that logs are small enough to be
  ## uploaded to gradscope
  video_log_freq =  -1 #@param {type: "integer"}
  scalar_log_freq =  10000#@param {type: "integer"}


args = Args()

## ensure compatibility with hw1 code
args['train_batch_size'] = args['batch_size']

if args['video_log_freq'] > 0:
  import warnings
  warnings.warn(
      '''\nLogging videos will make eventfiles too'''
      '''\nlarge for the autograder. Set video_log_freq = -1'''
      '''\nfor the runs you intend to submit.''')

In [3]:
#@title create directories for logging

data_path = 'data'

if not (os.path.exists(data_path)):
    os.makedirs(data_path)

logdir = args.exp_name + '_' + args.env_name + '_' + time.strftime("%d-%m-%Y_%H-%M-%S")
logdir = os.path.join(data_path, logdir)
args['logdir'] = logdir
if not(os.path.exists(logdir)):
    os.makedirs(logdir)

print("LOGGING TO: ", logdir)


LOGGING TO:  data/q2_dqn_1_LunarLander-v3_24-10-2022_17-26-53


In [4]:
#@title Define Q-function trainer

class Q_Trainer(object):

    def __init__(self, params):
        self.params = params

        train_args = {
            'num_agent_train_steps_per_iter': params['num_agent_train_steps_per_iter'],
            'num_critic_updates_per_agent_update': params['num_critic_updates_per_agent_update'],
            'train_batch_size': params['batch_size'],
            'double_q': params['double_q'],
        }

        env_args = get_env_kwargs(params['env_name'])

        for k, v in env_args.items():
          params[k] = v

        self.params['agent_class'] = DQNAgent
        self.params['agent_params'] = params
        self.params['train_batch_size'] = params['batch_size']
        self.params['env_wrappers'] = env_args['env_wrappers']

        self.rl_trainer = RL_Trainer(self.params)

    def run_training_loop(self):
        self.rl_trainer.run_training_loop(
            self.params['num_timesteps'],
            collect_policy = self.rl_trainer.agent.actor,
            eval_policy = self.rl_trainer.agent.actor,
            )

In [5]:
#@title run training

trainer = Q_Trainer(args)
trainer.run_training_loop()

########################
logging outputs to  data/q2_dqn_1_LunarLander-v3_24-10-2022_17-26-53
########################
Using GPU id 0


This call to matplotlib.use() has no effect because the backend has already
been chosen; matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.

The backend was *originally* set to 'module://matplotlib_inline.backend_inline' by the following code:
  File "/home/suyan/Software/anaconda3/envs/rlenv/lib/python3.8/runpy.py", line 194, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/suyan/Software/anaconda3/envs/rlenv/lib/python3.8/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "/home/suyan/Software/anaconda3/envs/rlenv/lib/python3.8/site-packages/ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "/home/suyan/Software/anaconda3/envs/rlenv/lib/python3.8/site-packages/traitlets/config/application.py", line 846, in launch_instance
    app.start()
  File "/home/suyan/Software/anaconda3/envs/rlenv/lib/python3.8/site-packages/ipykernel/kernela



********** Iteration 0 ************

Training agent...

Training agent using sampled data from replay buffer...

Beginning logging procedure...
Timestep 1
mean reward (100 episodes) nan
best mean reward -inf
running time 120.920631
Train_EnvstepsSoFar : 1
TimeSinceStart : 120.92063117027283
Done logging...



Training agent using sampled data from replay buffer...

Training agent using sampled data from replay buffer...

Training agent using sampled data from replay buffer...

Training agent using sampled data from replay buffer...

Training agent using sampled data from replay buffer...

Training agent using sampled data from replay buffer...

Training agent using sampled data from replay buffer...

Training agent using sampled data from replay buffer...

Training agent using sampled data from replay buffer...

Training agent using sampled data from replay buffer...

Training agent using sampled data from replay buffer...

Training agent using sampled data from replay buffer...

Tra

KeyboardInterrupt: 

In [None]:
#@markdown You can visualize your runs with tensorboard from within the notebook

## requires tensorflow==2.3.0
# %load_ext tensorboard
# %tensorboard --logdir /content/cs285_f2021/homework_fall2021/hw3/data/