# Libs

In [None]:
import gym
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from evaluate import *

# Simulation Setup

In [None]:
input_dir = './agent'
output_dir  = './out' 
sim_cfg = './cfg/simulator.cfg'

In [None]:
simulator_cfg_file = sim_cfg
submission_dir, scores_dir = resolve_dirs(
    os.path.dirname("evaluate.py"), input_dir, output_dir
)
agent_spec,gym_cfg = load_agent_submission(submission_dir)

# Simulation Init

In [None]:
gym_configs = gym_cfg.cfg
simulator_configs = read_config(simulator_cfg_file)
env = gym.make(
    'CBEngine-v0',
    simulator_cfg_file=simulator_cfg_file,
    thread_num=1,
    gym_dict = gym_configs
)

In [None]:
observations, infos = env.reset()
agent_id_list = []
for k in observations:
    agent_id_list.append(int(k.split('_')[0]))
agent_id_list = list(set(agent_id_list))
agent = agent_spec['test']
agent.load_agent_list(agent_id_list)

# Run Simulation (Single Step)

In [None]:
rec = []
for i in range(100):
    pressures, action = act(observations)
    observations, rewards, dones, infos = env.step(actions)
    
    #rec
    rec_step = []
    for k, v in action.items():
        rec_step.append([i, k, v, pressures[k]])
    rec += rec_step

In [None]:
rec_df = pd.DataFrame(rec, columns=['sim_step', 'agent_id', 'action', 'pressure'])
agent_ids = rec_df['agent_id'].unique()
plt.plot(rec_df[rec_df.agent_id == agent_ids[0]]['action'].values)

In [None]:
rec_df[rec_df.agent_id == agent_ids[0]]

# Process Observation

In [None]:
def obs_process(observations):
    # preprocess observations
    observations_for_agent = {}
    for key,val in observations.items():
        observations_agent_id = int(key.split('_')[0])
        observations_feature = key[key.find('_')+1:]
        if(observations_agent_id not in observations_for_agent.keys()):
            observations_for_agent[observations_agent_id] = {}
        observations_for_agent[observations_agent_id][observations_feature] = val

    # format into pd.dataframe
    obs = []
    for agent_id, agent_obs in observations_for_agent.items():
        sim_step = agent_obs['lane_speed'][0]
        lane_speed = agent_obs['lane_speed'][1:]
        lane_vehicle_num = agent_obs['lane_vehicle_num'][1:]
        assert len(lane_speed) == len(lane_vehicle_num)
        for idx, speed in enumerate(lane_speed):
            road_idx = idx // 3
            lane_idx = idx % 3
            veh_num = lane_vehicle_num[idx]
            obs.append([sim_step, agent_id, road_idx, lane_idx, speed, veh_num])

    obs_df = pd.DataFrame(obs, columns=['sim_step', 'agent_id', 'road_idx',
                                        'lane_idx', 'speed', 'veh_num'])

    return obs_df

# Define Pressure

# Pressure 1:
the number of vehicles on the approach

In [None]:
def gen_pressure(obs_df):
    
    # init params
    road_idx_to_arm = {0:"Ni", 1:"Ei", 2:"Si", 3:"Wi",
                       4:"No", 5:"Eo", 6:"So", 7:"Wo"}
    lane_idx_to_mv = {0:"L", 1:"T", 2:"R"}
    mv_to_phase = {
        "NiL":1, "SiL":1, "NiT":2, "SiT":2,
        "EiL":3,  "WiL":3, "EiT":4, "WiT":4
    }
    
    # formatting obs_df
    obs_df = obs_df[(obs_df.road_idx < 4) & (obs_df.lane_idx < 2)]
    obs_df['road_idx'] = obs_df['road_idx'].replace(road_idx_to_arm)
    obs_df['lane_idx'] = obs_df['lane_idx'].replace(lane_idx_to_mv)
    obs_df['mv'] = obs_df['road_idx'] + obs_df['lane_idx']
    obs_df['phase'] = obs_df['mv'].replace(mv_to_phase)

    # define pressure: the number of vehicles on the approach
    pressure = obs_df.pivot_table(index='agent_id',
                                  columns='mv',
                                  values='veh_num',
                                  aggfunc='sum')

    return pressure

In [None]:
def act(observations):
    obs_df = obs_process(observations)
    cur_step = obs_df['sim_step'].unique()[0]

    # get pressure
    pressure = gen_pressure(obs_df)

    # get actions
    phase_mv_inc = [(1, 2), (1, 4), (2, 3), (2, 5),
                    (3, 0), (3, 6), (4, 1), (4, 7),
                    (5, 2), (5, 3), (6, 0), (6, 1),
                    (7, 4), (7, 5), (8, 6), (8, 7)]
    phase_mv_incidence = np.zeros((8, 8))
    for x, y in phase_mv_inc:
        phase_mv_incidence[x - 1, y] = 1

    action = {}
    pressures = {}
    for i in range(len(pressure)):
        agent_id = pressure.index[i]
        phase_pressures = np.dot(phase_mv_incidence,pressure.iloc[i, :].values)
        max_locs = np.where(phase_pressures == np.max(phase_pressures))[0]
        if np.std(phase_pressures) == 0:
            action[agent_id] = np.random.randint(1, 9, 1)[0]
        elif len(max_locs) > 1:
            loc = np.random.randint(0, len(max_locs), 1)
            action[agent_id] = max_locs[loc][0] + 1
            print(agent_id, ":", action[agent_id])
        else:
            agent_action = np.argmax(phase_pressures) + 1
            action[agent_id] = agent_action
        pressures[agent_id] = phase_pressures
        
    return pressures, action