In [None]:
import pandas as pd
import numpy as np
import shap
import torch
from torch import nn
import scipy.stats
import matplotlib.pyplot as plt
import plotly.express as px

import json
import xmltodict

from agents.option_critic import OptionCriticFeatures

In [None]:
BASE_FOLDER = './outputs/evaluation'

In [None]:
def to_tensor(obs):
    obs = np.asarray(obs)
    obs = torch.from_numpy(obs).float()
    return obs

def load_data(prefix, traffic):
    df_single_episode = pd.read_csv(f'{BASE_FOLDER}/{prefix}_1_episode_{traffic}.csv')
    df_single_episode = df_single_episode.set_index('step')
    df_single_episode = df_single_episode.drop(columns=['cumulative_reward'])
    
    df_multiple_episodes = pd.read_csv(f'{BASE_FOLDER}/{prefix}_100_episode_{traffic}.csv')
    df_multiple_episodes = df_multiple_episodes.set_index('episode')
    # df_multiple_episodes = df_multiple_episodes.drop(columns=['cumulative_reward'])
    return df_single_episode, df_multiple_episodes

def extract_obs_data(df):
    # Convert obs and probabilities to separate dataset
    obs = []
    prob = df.termination_prob.to_list()
    options = df.option.to_list()
    for step, item in enumerate(df.obs.tolist()):
        item = [float(s.strip()) for s in item.split(',')]
        data = {
            'option': options[step],
            'prob': prob[step],
            'min_green': item[4]
        }
        for i in range(0, 4):
            data[f'green_light_{i}'] = item[i]
        for i in range(8):
            item_index = i + 5
            data[f'lane_{i}_density'] = item[item_index]
            item_index2 = i + 13
            data[f'lane_{i}_queue'] = item[item_index2]
        obs.append(data)        
    df_probs_obs = pd.DataFrame(obs)
    return df_probs_obs

In [None]:
df_single_episode, _ = load_data('oc_2way_500k_steps', 'custom-2way-single-intersection')

In [None]:
# df_single_episode, _ = load_data('oc_hd_500k_steps', 'custom-2way-single-intersection')

In [None]:
df_probs_obs = extract_obs_data(df_single_episode)

In [None]:
# Visualisations

In [None]:
def extract_avg(df):
    data = []
    for _, row in df.iterrows():
        row = row.to_dict()
        density = []
        queue = []
        record = {
            'option': row['option']
        }
        for key in row:
            if 'density' in key:
                density.append(row[key])
            elif 'queue' in key:
                queue.append(row[key])
        record['avg_density'] = np.mean(density)
        record['avg_queue'] = np.mean(queue)
        data.append(record)
    return pd.DataFrame(data)
df_avgs = extract_avg(df_probs_obs)
df_avgs.plot.scatter(x='avg_density', y='avg_queue', c='option', figsize=(15,15))

In [None]:
# Correlations

In [None]:
def calculate_correlation_df(df):
    results = []
    columns = df.columns
    for column in columns:
        if column != 'prob':
            correlation = df['prob'].corr(df[column])
            results.append({
                'test': f'Correlation between prob and {column}',
                'correlation': correlation
            })
    pd_correlation = pd.DataFrame(results)
    return pd_correlation
df_correlation = calculate_correlation_df(df_probs_obs)
df_correlation.sort_values('correlation', ascending=False)

In [None]:
df_probs_obs.cov()['prob'].sort_values(ascending=False)

In [None]:
# Shap
feature_conversion = nn.Sequential(
    nn.Linear(21, 32), nn.ReLU(), nn.Linear(32, 64), nn.ReLU()
)

def convert_state(obs):
    if obs.ndim < 4:
        obs = obs.unsqueeze(0)
    state = feature_conversion(obs)
    return state
        
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(64, 2)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

    
    
model_params = torch.load(
            "./models/option_critic_2_options_custom-2way-single-intersection_500000_steps"
        )['model_params']
termination_state_dict = {
    'linear_relu_stack.0.weight': model_params['terminations.weight'],
    'linear_relu_stack.0.bias': model_params['terminations.bias']
}
model = NeuralNetwork()
model.load_state_dict(termination_state_dict)

In [None]:
features = df_probs_obs[['green_light_0', 'green_light_1',
       'green_light_2', 'green_light_3', 'min_green', 'lane_0_density', 'lane_0_queue',
       'lane_1_density', 'lane_1_queue', 'lane_2_density', 'lane_2_queue',
       'lane_3_density', 'lane_3_queue', 'lane_4_density', 'lane_4_queue',
       'lane_5_density', 'lane_5_queue', 'lane_6_density', 'lane_6_queue',
       'lane_7_density', 'lane_7_queue']]
def f(X):
    results = []
    for record in X:
        record = to_tensor([record])
        state = convert_state(record)
        prob = model(state).sigmoid()
        max_prob = np.max(prob.detach().numpy())
        results.append(max_prob)
    return np.array(results)
explainer = shap.KernelExplainer(f, features.iloc[:50, :])
shap_values = explainer.shap_values(features.iloc[:, :], nsamples=20)

In [None]:
shap.initjs()
shap.force_plot(explainer.expected_value, shap_values, features)

In [None]:
shap.initjs()
shap.decision_plot(explainer.expected_value, shap_values, features)

In [None]:
def get_action_dist(model, state, option):
    logits = state.data @ model.options_W[option] + model.options_b[option]
    action_dist = (logits / model.temperature).softmax(dim=-1)
    return action_dist.detach().numpy()

agent = option_critic.OptionCriticFeatures(
        in_features=env.observation_space.shape[0],
        num_actions=env.action_space.n,
        num_options=2,
        temperature=0.1,
        eps_start=0.9,
        eps_min=0.1,
        eps_decay=0.999,
        eps_test=0.05,
        device="cpu",
)
agent.load_state_dict(
    torch.load(
        "./models/option_critic_2_options_custom-2way-single-intersection_500000_steps"
    )["model_params"]
)

In [None]:
random_row = df_single_episode.sample()
obs = [float(s.strip()) for s in random_row.obs.to_list()[0].split(',')]
state = agent.get_state(to_tensor(obs))

In [None]:
option_distributions_dict = {}
for option in range(0, df_single_episode.option.max() + 1):
    dist = get_action_dist(agent, state, option)[0]
    for i, value in enumerate(dist):
        if i not in option_distributions_dict:
            option_distributions_dict[i] = {'action': i}
        option_distributions_dict[i][f'prob_option_{option}'] = value        
df_option_distributions = pd.DataFrame(list(option_distributions_dict.values()))

In [None]:
state

In [None]:
px.bar(
    data_frame = df_option_distributions,
    x = "action",
    y = ["prob_option_0", "prob_option_1"],
    opacity = 0.9,
    orientation = "v",
    barmode = 'group',
)

In [None]:
df_action_encoded = pd.get_dummies(df_single_episode[['action', 'option']], columns=['action', ])

In [None]:
df_action_encoded.groupby('option').sum()

In [None]:
lanes = ['n_t_0', 'n_t_1', 'e_t_0', 'e_t_1', 's_t_0', 's_t_1', 'w_t_0', 'w_t_1']
          0          1         2       3         4         5     6        7

In [None]:
# backup
option_distributions = []
for option in range(0, df_single_episode.option.max() + 1):
    dist = get_action_dist(agent, state, option)[0]
    for i, value in enumerate(dist):
        dist_dict = {
            'option': option
        }
        dist_dict[f'action'] = i
        dist_dict[f'prob'] = value
        option_distributions.append(dist_dict)
df_option_distributions = pd.DataFrame(option_distributions)
fig, ax = plt.subplots()

for key, grp in df_option_distributions.groupby(['option']):
    key = key[0]
    ax = grp.plot(ax=ax, kind='line', x='action', y='prob', label=key)

plt.legend(loc='best')
plt.show()