# DQN Implmentation

### Installations

In [4]:
# # Install environment and agent
# !pip install highway-env
# !pip install --upgrade sympy torch


### Learning using existing model

The following is the pesudocode that will be followed when creating the DQN

Useful: https://www.youtube.com/watch?v=RVMpm86equc&list=PL58zEckBH8fCMIVzQCRSZVPUp3ZAVagWi&index=2

https://github.com/saashanair/rl-series/tree/master/dqn

https://github.com/johnnycode8/gym_solutions/blob/main/frozen_lake_dql.py

<img src="DQN.png" style="width: 900px;" align="left"/>


Potential Problems: https://www.reddit.com/r/reinforcementlearning/comments/1555wgi/dqn_loss_increasing_and_rewards_decreasing/


For CNN:

https://www.reddit.com/r/MachineLearning/comments/3l5qu7/rules_of_thumb_for_cnn_architectures/


In [2]:
import gymnasium as gym
import highway_env
import os
from tqdm import tqdm

# importing metrics
import sys
sys.path.append(os.path.abspath('..'))
from metrics import Metrics

class BaseAgent:
    def __init__(self, params):
        self.episode_num = params.get("episode_num", 10)
        use_metrics = params.get("use_metrics", False)
        self.type = params.get("type", "random")
        self.metrics = Metrics(self.type, "training_results", use_metrics)
        
    def act(self, env):        
        for epoch in tqdm(range(self.episode_num), desc="Training Model"):
            env.reset()

            done = False 
            truncated = False
            episode_rewards = []
            episode_len = 0
            while(not done and not truncated):
                
                _, reward, done, truncated, _ = env.step(env.action_space.sample())
                
                episode_rewards.append(reward)
                episode_len += 1
                env.render()
                
            self.metrics.add("rollout/rewards", sum(episode_rewards) / len(episode_rewards), epoch)
            self.metrics.add("rollout/episode-length", episode_len, epoch)
            
        self.metrics.close()

In [4]:
params= {
        'use_metrics' : True,
        'type': 'human',
        'episode_num' : 10,
        }
human_agent = BaseAgent(params=params)
env = gym.make('highway-v0', render_mode='rgb_array', config={
        "lanes_count": 3,
        "manual_control": True,
})
human_agent.act(env)

Training Model:   0%|          | 0/10 [00:00<?, ?it/s]2024-12-30 13:52:19.075 python[32664:4203241] NSEventModifierFlagFunction specified to -setKeyEquivalentModifierMask: for item <NSMenuItem: 0x171fe4260 No Brightness, ke='Command-F1'>, but is only supported for system-provided menu items; will not be used
2024-12-30 13:52:19.075 python[32664:4203241] NSEventModifierFlagFunction specified to -setKeyEquivalentModifierMask: for item <NSMenuItem: 0x171fe4690 Work Time>, but is only supported for system-provided menu items; will not be used
Training Model:   0%|          | 0/10 [00:08<?, ?it/s]


KeyboardInterrupt: 

In [5]:
params= {
        'use_metrics' : True,
        'type': 'random',
        'episode_num' : 10,
        }

random_agent = BaseAgent(params=params)
env = gym.make('highway-v0', render_mode='rgb_array', config={
        "lanes_count": 3,
})
random_agent.act(env)

Training Model:  30%|███       | 3/10 [00:08<00:19,  2.80s/it]


KeyboardInterrupt: 

### Run the Tensorboard

In [5]:
%reload_ext tensorboard

%tensorboard --logdir training_results --host localhost --port 6011