In [187]:
import json
import pandas as pd
import numpy as np
import plotly.express as pe
import plotly.io as pio
import re

pio.renderers.default='iframe'


## Preliminary Hyperparameter Study

In [2]:
# Open the file in read mode
with open("results/results_GNN_LSTM.json", "r") as file:
    # Load the JSON data from the file
    json_data = json.load(file)

# Create a list to store the rows
rows = []

# Iterate over each JSON element in the array
for json_element in json_data:
    # Add the JSON element as a row to the list
    rows.append(json_element)

# Create a DataFrame from the list of rows
df = pd.DataFrame(rows)
df = df.sort_values('loss')

# Display the DataFrame
df
df.drop(columns=['run_name']).corr().round(2)


Unnamed: 0,hidden_dim,num_layers,dropout,loss,run_time,batch_size
hidden_dim,1.0,0.0,0.0,-0.4,0.95,-0.84
num_layers,0.0,1.0,0.0,0.04,0.15,-0.0
dropout,0.0,0.0,1.0,0.11,-0.0,0.0
loss,-0.4,0.04,0.11,1.0,-0.28,0.65
run_time,0.95,0.15,-0.0,-0.28,1.0,-0.7
batch_size,-0.84,-0.0,0.0,0.65,-0.7,1.0


In [3]:
df.sort_values(['hidden_dim', 'num_layers', 'dropout'])

Unnamed: 0,run_name,hidden_dim,num_layers,dropout,loss,run_time,batch_size
0,GNN_LSTM,16,2,0.2,0.171592,1410.136341,256
1,GNN_LSTM,16,2,0.3,0.183197,1414.065486,256
2,GNN_LSTM,16,2,0.4,0.173644,1409.190984,256
3,GNN_LSTM,16,3,0.2,0.171244,1642.270052,256
4,GNN_LSTM,16,3,0.3,0.175314,1647.590405,256
5,GNN_LSTM,16,3,0.4,0.198071,1643.358875,256
6,GNN_LSTM,32,2,0.2,0.172558,1967.753288,128
7,GNN_LSTM,32,2,0.3,0.166872,1980.382358,128
8,GNN_LSTM,32,2,0.4,0.168935,1978.563265,128
9,GNN_LSTM,32,3,0.2,0.166404,2350.847477,128


## Results analysis

In [183]:
graphs_settings = pd.read_csv('graphs_settings.csv')
graphs_settings
graphs_settings.to_latex('graphs_settings.tex', index=False, float_format='%.3f')

In [4]:
file_path = "baseline_results.json"

with open(file_path, "r") as file:
    baseline_results = json.load(file)

In [9]:
baseline_results

{'TLS_A23_1_60min': 0.5918209552764893,
 'TLS_A23_1_30min': 0.42569783329963684,
 'TLS_A23_1_5min': 1.3106695413589478,
 'TLS_A23_2_60min': 0.544157087802887,
 'TLS_A23_2_30min': 0.404752641916275,
 'TLS_A23_2_5min': 1.2575961351394653,
 'TLS_A23_1,2_60min': 0.5664417147636414,
 'TLS_A23_1,2_30min': 0.4132250249385834,
 'TLS_A23_1,2_5min': 1.2828391790390015,
 'TLS_A01_1_60min': 0.324165940284729,
 'TLS_A01_1_30min': 0.23715412616729736,
 'TLS_A01_1_5min': 0.752231240272522,
 'TLS_A01_2_60min': 0.33062905073165894,
 'TLS_A01_2_30min': 0.21580064296722412,
 'TLS_A01_2_5min': 0.7658590078353882,
 'TLS_A01_1,2_60min': 0.327648788690567,
 'TLS_A01_1,2_30min': 0.22672617435455322,
 'TLS_A01_1,2_5min': 0.7593659162521362,
 'TLS_A23,A01,A07_1_60min': 0.22697073221206665,
 'TLS_A23,A01,A07_1_30min': 0.15145939588546753,
 'TLS_A23,A01,A07_1_5min': 0.6069501638412476,
 'TLS_A23,A01,A07_2_60min': 0.24952025711536407,
 'TLS_A23,A01,A07_2_30min': 0.16799196600914001,
 'TLS_A23,A01,A07_2_5min': 0.66

In [37]:
baseline_df = pd.DataFrame(columns=['name', 'val_loss'])
for k,v in baseline_results.items():
    baseline_df = pd.concat([baseline_df, pd.Series({'name': k, 'val_loss': v}).to_frame().T], ignore_index=True)

baseline_df.insert(0, 'minutes', baseline_df.name.apply(get_minutes))
baseline_df.insert(1, 'direction', baseline_df.name.apply(lambda x: x.split('_')[2]))
baseline_df.insert(2, 'road', baseline_df.name.apply(lambda x: x.split('_')[1]))
baseline_df.insert(2, 'net_name', 'baseline')
baseline_df.insert(3, 'hidden_dim', 0)

baseline_df = baseline_df[baseline_df.road == "A23"]
baseline_df = baseline_df.drop(columns="name")
baseline_df

Unnamed: 0,minutes,direction,net_name,hidden_dim,road,val_loss
0,60,1,baseline,0,A23,0.591821
1,30,1,baseline,0,A23,0.425698
2,5,1,baseline,0,A23,1.31067
3,60,2,baseline,0,A23,0.544157
4,30,2,baseline,0,A23,0.404753
5,5,2,baseline,0,A23,1.257596
6,60,12,baseline,0,A23,0.566442
7,30,12,baseline,0,A23,0.413225
8,5,12,baseline,0,A23,1.282839


In [131]:
import re

results_df = pd.concat([graph_settings_results, baseline_df])
results_df = results_df.groupby(['minutes', 'direction', 'net_name', 'hidden_dim']).agg({'val_loss': min}).reset_index()
results_df = results_df.pivot(index=['minutes', 'direction', 'net_name'], columns='hidden_dim', values='val_loss').reset_index()
results_df = results_df.pivot(index=['minutes', 'direction'], columns='net_name', values=[0, 50, 100]).reset_index()
# results_df.columns = [''.join(str(col)) for col in results_df.columns.values]
# results_df = results_df.reset_index(inplace=True)

results_df.columns = zip([str(x) for x in results_df.columns.get_level_values(1)],[str(x) for x in results_df.columns.get_level_values(0)])

results_df.columns = [re.sub(r"['()\,']", "", str(c)).strip() for c in results_df.columns]

results_df.drop(columns=['A3TGCN 0', 'GCN_LSTM 0', 'baseline 50', 'baseline 100'])
column_order = ['minutes', 'direction', 'baseline 0', 'GCN_LSTM 50', 'GCN_LSTM 100', 'A3TGCN 50', 'A3TGCN 100']
results_df = results_df[column_order]
results_df[['baseline 0','GCN_LSTM 50','GCN_LSTM 100','A3TGCN 50','A3TGCN 100']] = results_df[['baseline 0','GCN_LSTM 50','GCN_LSTM 100','A3TGCN 50','A3TGCN 100']].astype(float)
results_df

Unnamed: 0,minutes,direction,baseline 0,GCN_LSTM 50,GCN_LSTM 100,A3TGCN 50,A3TGCN 100
0,5,1,1.31067,0.456955,1.053597,1.050835,1.046846
1,5,12,1.282839,1.015063,1.02451,1.035158,1.030573
2,5,2,1.257596,1.003403,0.590801,1.028975,1.025081
3,30,1,0.425698,1.020054,0.233556,1.005527,0.517924
4,30,12,0.413225,0.983687,0.267276,0.994697,0.992599
5,30,2,0.404753,1.013665,0.255877,0.523816,0.993287
6,60,1,0.591821,0.297151,0.290184,1.014908,1.008511
7,60,12,0.566442,0.317912,0.301587,0.991007,0.988065
8,60,2,0.544157,0.988103,0.285369,0.984486,0.979044


In [193]:
np.mean(results_df['A3TGCN 100']/results_df['baseline 0'])

1.5263970147850197

In [142]:
latex.to_latex('results_df.tex', index=False, float_format='%.3f')

TypeError: Styler.to_latex() got an unexpected keyword argument 'index'

In [167]:

# Find the index of the lowest value in each row
min_index = results_df.iloc[:, 2:].idxmin(axis=1)

# # Create a copy of the DataFrame with formatting
styled_df = results_df.style.apply(lambda x: ['background-color: blue' if i == min_index[x.name] else '' for i in range(2, len(x))])

# # Convert the styled DataFrame to LaTeX format

# # Print the LaTeX string
# styled_df

min_index

0     GCN_LSTM 50
1     GCN_LSTM 50
2    GCN_LSTM 100
3    GCN_LSTM 100
4    GCN_LSTM 100
5    GCN_LSTM 100
6    GCN_LSTM 100
7    GCN_LSTM 100
8    GCN_LSTM 100
dtype: object

In [None]:
results_df

Unnamed: 0,minutes,direction,baseline 0,GCN_LSTM 50,GCN_LSTM 100,A3TGCN 50,A3TGCN 100
0,5,1,1.31067,0.456955,1.053597,1.050835,1.046846
1,5,12,1.282839,1.015063,1.02451,1.035158,1.030573
2,5,2,1.257596,1.003403,0.590801,1.028975,1.025081
3,30,1,0.425698,1.020054,0.233556,1.005527,0.517924
4,30,12,0.413225,0.983687,0.267276,0.994697,0.992599
5,30,2,0.404753,1.013665,0.255877,0.523816,0.993287
6,60,1,0.591821,0.297151,0.290184,1.014908,1.008511
7,60,12,0.566442,0.317912,0.301587,0.991007,0.988065
8,60,2,0.544157,0.988103,0.285369,0.984486,0.979044


In [7]:
# Extract the minutes using regular expressionsbegin
def get_minutes(name):
    minutes = re.findall(r'\d+min', name)[0]
    minutes = minutes.replace('min', '')
    minutes = int(minutes)
    return minutes


In [32]:
graph_settings_results = pd.read_csv('results/graph_settings_results.csv')
graph_settings_results.insert(0, 'minutes', graph_settings_results.name.apply(get_minutes))
graph_settings_results = graph_settings_results.drop(columns=['road', 'name'])
graph_settings_results

Unnamed: 0,minutes,direction,nr_sensors,nr_edges,nr_timestamps,net_name,hidden_dim,val_loss,running_time,nr_train_samples,nr_test_samples,batch_size,nr_epochs,best_model_size
0,60,1,18,124,4681,GCN_LSTM,50,0.297151,217.345174,3409,853,512,57,722655
1,60,1,18,124,4681,A3TGCN,50,1.014908,120.650138,3409,853,4096,8,357755
2,60,1,18,124,4681,GCN_LSTM,100,0.290184,240.464388,3409,853,256,40,2780511
3,60,1,18,124,4681,A3TGCN,100,1.008511,125.302479,3409,853,1024,8,1164091
4,30,1,18,124,9361,GCN_LSTM,50,1.020054,85.841067,7032,1759,512,8,722591
5,30,1,18,124,9361,A3TGCN,50,1.005527,135.788946,7032,1759,2048,8,357755
6,30,1,18,124,9361,GCN_LSTM,100,0.233556,245.60512,7032,1759,256,26,2780511
7,30,1,18,124,9361,A3TGCN,100,0.517924,2217.418977,7032,1759,1024,279,1164091
8,5,1,18,124,56161,GCN_LSTM,50,0.456955,345.390381,43867,10967,512,24,722655
9,5,1,18,124,56161,A3TGCN,50,1.050835,252.302617,43867,10967,2048,8,357755


Hypotheses:
1. GCN_LSTM performs better than A3TGCN
1. time scales
1. direction does not matter

In [95]:
from scipy.stats import ttest_ind

In [170]:
ttest_ind(group_model[group_model.index.get_level_values(0) == 5].val_loss.values, group_model[group_model.index.get_level_values(0) == 60].val_loss.values)

Ttest_indResult(statistic=2.0516631443797237, pvalue=0.05229349476960383)

In [172]:
group_model = graph_settings_results.drop(columns=['nr_sensors','nr_edges','nr_timestamps','nr_train_samples','nr_test_samples', 'batch_size']).groupby(['minutes', 'net_name','direction', 'hidden_dim']).mean()
group_model_diff = group_model[group_model.index.get_level_values(0) == 5].droplevel(0) - group_model[group_model.index.get_level_values(0) == 60].droplevel(0)
fig = pe.histogram(group_model_diff, 'val_loss', color=group_model_diff.index.get_level_values(0), nbins=30, title='Differences in loss between direction 1 and both',barmode='overlay',
                  labels={'val_loss': 'difference in loss'})
# fig.update_layout(showlegend=False)
fig.show()
fig.write_image('hist_diff.png')
group_model_diff

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,val_loss,running_time,nr_epochs,best_model_size
net_name,direction,hidden_dim,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A3TGCN,1,50,0.035926,131.652479,0.0,0.0
A3TGCN,1,100,0.038335,227.178599,0.0,0.0
A3TGCN,12,50,0.044151,229.642481,0.0,0.0
A3TGCN,12,100,0.042508,439.278789,0.0,0.0
A3TGCN,2,50,0.04449,129.851617,0.0,0.0
A3TGCN,2,100,0.046037,224.919115,0.0,0.0
GCN_LSTM,1,50,0.159805,128.045208,-33.0,0.0
GCN_LSTM,1,100,0.763414,83.510161,-32.0,0.0
GCN_LSTM,12,50,0.697151,-182.799174,-80.0,0.0
GCN_LSTM,12,100,0.722923,227.607115,-36.0,0.0


In [None]:
pe.scatter

In [161]:
group_model

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,val_loss,running_time,nr_epochs,best_model_size
minutes,net_name,direction,hidden_dim,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
5,A3TGCN,1,50,1.050835,252.302617,8.0,357755.0
5,A3TGCN,1,100,1.046846,352.481077,8.0,1164091.0
5,A3TGCN,12,50,1.035158,366.57537,8.0,360187.0
5,A3TGCN,12,100,1.030573,591.764319,8.0,1166523.0
5,A3TGCN,2,50,1.028975,258.363084,8.0,357755.0
5,A3TGCN,2,100,1.025081,357.294838,8.0,1164091.0
5,GCN_LSTM,1,50,0.456955,345.390381,24.0,722655.0
5,GCN_LSTM,1,100,1.053597,323.974549,8.0,2780511.0
5,GCN_LSTM,12,50,1.015063,275.623303,8.0,725087.0
5,GCN_LSTM,12,100,1.02451,644.686952,8.0,2782943.0


In [74]:
group_model[group_model.index.get_level_values(0) == 'GCN_LSTM']


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,val_loss,running_time,nr_epochs,best_model_size
net_name,direction,minutes,hidden_dim,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
GCN_LSTM,1,5,50,0.456955,345.390381,24.0,722655.0
GCN_LSTM,1,5,100,1.053597,323.974549,8.0,2780511.0
GCN_LSTM,1,30,50,1.020054,85.841067,8.0,722591.0
GCN_LSTM,1,30,100,0.233556,245.60512,26.0,2780511.0
GCN_LSTM,1,60,50,0.297151,217.345174,57.0,722655.0
GCN_LSTM,1,60,100,0.290184,240.464388,40.0,2780511.0
GCN_LSTM,12,5,50,1.015063,275.623303,8.0,725087.0
GCN_LSTM,12,5,100,1.02451,644.686952,8.0,2782943.0
GCN_LSTM,12,30,50,0.983687,110.449092,8.0,725087.0
GCN_LSTM,12,30,100,0.267276,504.596698,31.0,2782943.0


In [26]:
graph_settings_results = graph_settings_results.drop(columns=['name', 'road'])
graph_settings_results['direction'] = graph_settings_results['direction'].apply(lambda x: 0 if len(x) == 1 else 1)
graph_settings_results['net_name'] = graph_settings_results['net_name'].apply(lambda x: 0 if x == "GCN_LSTM" else 1)

corrs = graph_settings_results .corr().round(2)
fig = pe.imshow(corrs, text_auto=True, color_continuous_scale='RdBu_r', width = 600, height= 600)
fig.show()

