In [None]:
# default_exp car

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#hide
# stellt sicher, dass beim verändern der core library diese wieder neu geladen wird
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Car RL

In [None]:
from bfh_mt_hs2020_rl_basics.env import CarEnv
from bfh_mt_hs2020_rl_basics.agent import Agent
from bfh_mt_hs2020_rl_basics.bridge import Bridge
from bfh_mt_hs2020_rl_basics.loop import LoopControl

import gym
from gym.spaces import Tuple, Discrete, Box
import numpy as np
import warnings

from types import SimpleNamespace

import torch
from torch.optim import Optimizer, Adam

In [None]:
HYPERPARAMS = {
    'base_setup': SimpleNamespace(**{
            # agent
            'agent_device'                : "cpu",    # cpu or cuda
            'agent_gamma_exp'             : 0.9,      # discount_factor for experience_first_last.. shouldn't matter since step_size is only 1
            'agent_buffer_size'           : 1000,     # size of replay buffer
            'agent_eps_start'             : 1.0,      # epsilon start
            'agent_eps_final'             : 0.02,     # epsilon end
            'agent_eps_frames'            : 10**5,    # epsilon frames -> how many frames until 0.02 should be reached .. decay is linear
            'agent_target_net_sync'       : 1000,     # sync TargetNet with weights of DNN every .. iterations
  
            # bridge  
            'bridge_optimizer'            : None,     # Optimizer -> default ist Adam
            'bridge_learning_rate'        : 0.0001,   # learningrate
            'bridge_gamma'                : 0.9,      # discount_factor for reward
            'bridge_initial_population'   : 1000,     # initial number of experiences in buffer
            'bridge_batch_size'           : 32,       # batch_size for training
  
            # loop control  
            'loop_run_name'               : "base",   # runname for logfile
            'loop_bound_avg_reward'       : 0.0,   # target avg reward
            'loop_logtb'                  : True,     # Log to Tensorboard Logfile
    }),
  'buffer_size_eps': SimpleNamespace(**{
            # agent
            'agent_device'                : "cpu",    # cpu or cuda
            'agent_gamma_exp'             : 0.9,      # discount_factor for experience_first_last.. shouldn't matter since step_size is only 1
            'agent_buffer_size'           : 50000,    # size of replay buffer
            'agent_eps_start'             : 1.0,      # epsilon start
            'agent_eps_final'             : 0.02,     # epsilon end
            'agent_eps_frames'            : 10**6,    # epsilon frames -> how many frames until 0.02 should be reached .. decay is linear
            'agent_target_net_sync'       : 1000,     # sync TargetNet with weights of DNN every .. iterations
  
            # bridge  
            'bridge_optimizer'            : None,     # Optimizer -> default ist Adam
            'bridge_learning_rate'        : 0.0001,   # learningrate
            'bridge_gamma'                : 0.9,      # discount_factor for reward
            'bridge_initial_population'   : 5000,     # initial number of experiences in buffer
            'bridge_batch_size'           : 32,       # batch_size for training
  
            # loop control  
            'loop_run_name'               : "buffer_eps",   # runname for logfile
            'loop_bound_avg_reward'       : 0.0,   # target avg reward
            'loop_logtb'                  : True,     # Log to Tensorboard Logfile
    })
}
    

In [None]:
def create_control(params: SimpleNamespace) -> LoopControl:
    env = CarEnv()
    
    agent = Agent(env, devicestr       = params.agent_device, 
                       gamma           = params.agent_gamma_exp, 
                       buffer_size     = params.agent_buffer_size,
                       eps_start       = params.agent_eps_start,
                       eps_final       = params.agent_eps_final,
                       eps_frames      = params.agent_eps_frames,
                       target_net_sync = params.agent_target_net_sync)
    
    bridge = Bridge(agent=agent,
                    optimizer          = params.bridge_optimizer,
                    learning_rate      = params.bridge_learning_rate,
                    gamma              = params.bridge_gamma,
                    initial_population = params.bridge_initial_population,
                    batch_size         = params.bridge_batch_size,
                   )
    
    control = LoopControl(
                   bridge              = bridge, 
                   run_name            = params.loop_run_name, 
                   bound_avg_reward    = params.loop_bound_avg_reward,
                   logtb               = params.loop_logtb)
    
    return control

In [None]:
def run_example(config_name: str):
    # get rid of missing metrics warning
    warnings.simplefilter("ignore", category=UserWarning)
    
    control = create_control(HYPERPARAMS[config_name])
    control.run()

In [None]:
# run_example('base_setup')
run_example('buffer_size_eps')

Episode 1: reward=-1051, steps=383, elapsed=0:00:01
Episode 2: reward=-1068, steps=414, elapsed=0:00:01
Episode 3: reward=-1066, steps=313, elapsed=0:00:01
Episode 4: reward=-1046, steps=178, elapsed=0:00:01
Episode 5: reward=-1062, steps=437, elapsed=0:00:01
Episode 6: reward=-1063, steps=384, elapsed=0:00:01
Episode 7: reward=-1014, steps=260, elapsed=0:00:01
Episode 8: reward=-1035, steps=248, elapsed=0:00:01
Episode 9: reward=-1005, steps=136, elapsed=0:00:01
Episode 10: reward=-1082, steps=412, elapsed=0:00:01
Episode 11: reward=-1028, steps=248, elapsed=0:00:01
Episode 12: reward=-1073, steps=307, elapsed=0:00:01
Episode 13: reward=-1057, steps=195, elapsed=0:00:01
Episode 14: reward=-1060, steps=324, elapsed=0:00:01
Episode 15: reward=-1015, steps=182, elapsed=0:00:01
Episode 16: reward=-1060, steps=325, elapsed=0:00:01
Episode 17: reward=-1088, steps=296, elapsed=0:00:01
Episode 18: reward=-1042, steps=332, elapsed=0:00:02
Episode 19: reward=-1068, steps=301, elapsed=0:00:02
Ep

Engine run is terminating due to exception: .


Episode 5312: reward=69, steps=225, elapsed=1:00:43


KeyboardInterrupt: 