# Baselines

## Baseline Agent

In [2]:
import gymnasium as gym
import highway_env
import os
from tqdm import tqdm

import sys
sys.path.append(os.path.abspath('..'))
from metrics import Metrics

class BaseAgent:
    def __init__(self, params):
        self.episode_num = params.get("episode_num", 10)
        use_metrics = params.get("use_metrics", False)
        self.type = params.get("type", "random")
        self.metrics = Metrics(self.type, "training_results", use_metrics)
        
    def act(self, env):        
        for epoch in tqdm(range(self.episode_num), desc="Training Agent"):
            env.reset()

            done = False 
            truncated = False
            episode_rewards = []
            episode_len = 0
            while(not done and not truncated):
                
                _, reward, done, truncated, _ = env.step(env.action_space.sample())
                
                episode_rewards.append(reward)
                episode_len += 1
                env.render()
                
            self.metrics.add("rollout/rewards", sum(episode_rewards) / len(episode_rewards), epoch)
            self.metrics.add("rollout/episode-length", episode_len, epoch)
            
        self.metrics.close()

## Human Agent

In [None]:
params= {
        'use_metrics' : True,
        'type': 'human',
        'episode_num' : 10,
        }
human_agent = BaseAgent(params=params)
env = gym.make('highway-v0', render_mode='rgb_array', config={
        "lanes_count": 3,
        "manual_control": True,
})
# Control with the arrow keys
human_agent.act(env)

## Random Agent

In [None]:
params= {
        'use_metrics' : True,
        'type': 'random',
        'episode_num' : 10,
        }

random_agent = BaseAgent(params=params)
env = gym.make('highway-v0', render_mode='rgb_array', config={
        "lanes_count": 3,
})
random_agent.act(env)

## Run Tensorboard

In [5]:
%reload_ext tensorboard

%tensorboard --logdir training_results --host localhost --port 6010