In [None]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import seaborn as sns

from glob import glob
from tqdm import tqdm
from traffic_tail.environment import create_env
from traffic_tail.trainer import SUMOTrainer


USE_SUMO_GUI = False
TOTAL_TIME = 900
NUM_SEEDS = 5
NUM_EPISODES = 40


class DefaultConfig:
    name = "default"
    use_gui = USE_SUMO_GUI
    num_seconds = TOTAL_TIME
    tailgating = False
    default_mode = 31


class OverspeedConfig:
    name = "overspeed"
    use_gui = USE_SUMO_GUI
    num_seconds = TOTAL_TIME
    tailgating = False
    default_mode = 24
    

class TailgatingConfig:
    name = "tailgating"
    use_gui = USE_SUMO_GUI
    num_seconds = TOTAL_TIME
    tailgating = True
    default_mode = 31
    

class TailgatingOverspeedConfig:
    name = "tailgating_overspeed"
    use_gui = USE_SUMO_GUI
    num_seconds = TOTAL_TIME
    tailgating = True
    default_mode = 24


def run_episode(env, agent=None): #에피소드 실행함수 정의
    total_reward = 0
    state = env.reset()
    done = {"__all__": False}
    while not done["__all__"]:
        if agent is None: #에이전트가 주어지지 않았다면(초기경우)
            actions = {#행동공간에서 무작위 선택
                ts_id: env.action_spaces(ts_id).sample()
                for ts_id in env.ts_ids
            }
        else:
            actions = {#에이전트에 따른 선택
                ts_id: agent[ts_id].act(state[ts_id]) 
                for ts_id in state.keys()
            }
        state, reward, done, _ = env.step(actions)
        total_reward += sum(reward.values())
    env.close()
    return total_reward


