# DQN Implmentation

### Installations

In [1]:
# # Install environment and agent
# !pip install highway-env
# !pip install --upgrade sympy torch


### Learning using existing model

The following is the pesudocode that will be followed when creating the DQN

Useful: https://www.youtube.com/watch?v=RVMpm86equc&list=PL58zEckBH8fCMIVzQCRSZVPUp3ZAVagWi&index=2

https://github.com/saashanair/rl-series/tree/master/dqn

https://github.com/johnnycode8/gym_solutions/blob/main/frozen_lake_dql.py

<img src="DQN.png" style="width: 900px;" align="left"/>


Potential Problems: https://www.reddit.com/r/reinforcementlearning/comments/1555wgi/dqn_loss_increasing_and_rewards_decreasing/


For CNN:

https://www.reddit.com/r/MachineLearning/comments/3l5qu7/rules_of_thumb_for_cnn_architectures/


In [31]:
from torch.utils.tensorboard import SummaryWriter
import os
import datetime

class Metrics:
    def __init__(self, policy, result_file_name, use_metrics):
        self.use_metrics = use_metrics
        if not self.use_metrics:
            return
        time = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
        new_num = str(len(os.listdir("./" +result_file_name)) + 1)
        file_name = f'{result_file_name}/{policy}_{new_num}_{time}'
        self.writer = SummaryWriter(log_dir=file_name, flush_secs=60)
            
    def add(self, type, y, x):
        if not self.use_metrics:
            return
        self.writer.add_scalar(type, y, x)
    def close(self):
        if not self.use_metrics:
            return
        self.writer.close()

In [34]:
import gymnasium as gym
import highway_env
import os
from tqdm import tqdm

class HumanAgent:
    def __init__(self, params):
        self.episode_num = params.get("episode_num", 10)
        use_metrics = params.get("use_metrics", False)
        
        if use_metrics:
            self.create_folder("training_results")
            
        self.metrics = Metrics("human", "training_results", use_metrics)
        
    def act(self, env):        
        for epoch in tqdm(range(self.episode_num), desc="Training Model"):
            env.reset()

            done = False 
            truncated = False
            episode_rewards = []
            episode_len = 0
            while(not done and not truncated):
                
                _, reward, done, truncated, _ = env.step(env.action_space.sample())
                
                episode_rewards.append(reward)
                episode_len += 1
                env.render()
                
            self.metrics.add("rollout/rewards", sum(episode_rewards) / len(episode_rewards), epoch)
            self.metrics.add("rollout/episode-length", episode_len, epoch)
            
        self.metrics.close()

    def create_folder(self, directory_name):
        try:
            os.mkdir(directory_name)
            print(f"Directory '{directory_name}' created successfully.")
        except FileExistsError:
            return
        except PermissionError:
            print(f"Permission denied: Unable to create '{directory_name}'.")
        except Exception as e:
            print(f"An error occurred: {e}")


In [36]:
params = {
    'use_metrics' : True,
}

human_agent = HumanAgent(params)
env = gym.make('highway-v0', render_mode='rgb_array', config={
        "lanes_count": 3,
        "manual_control": True,
})
human_agent.act(env)

Training Model: 100%|██████████| 10/10 [01:11<00:00,  7.14s/it]


### Run the Tensorboard

In [5]:
%reload_ext tensorboard

%tensorboard --logdir training_results --host localhost --port 6010