
# Analysis Notebook

This notebook can be used to calculate statistical metrics for the data produced. 
*** 

## 0. Setup <a class="anchor" id="0"></a>
This section imports all files and sets up the notebook.

In [None]:
# Import all pacakges
import os
import pandas as pd
import numpy as np
from scipy import stats

In [None]:
# Choose which results to use
results_to_use = []  # If this is empty, all files in the folder will be used
WARMUP_EPOCHS = 0 # Number of epochs to ignore.
sim_colours = ['blue', 'green', 'red', 'orange', 'purple', 'brown', 'pink', 'gray', 'olive', 'cyan']


all_results = os.listdir('results/')

if results_to_use == [""] or results_to_use == []:
    results_to_use = all_results
# remove DS_Store from the list
if '.DS_Store' in results_to_use:
    results_to_use.remove('.DS_Store')
    
print("Using results: " + str(results_to_use))


In [None]:
# Create a dictionary. Each key is a set of results. 

all_sim_results = {}

for sim in results_to_use:
    all_sim_results[sim] = {}
    all_sim_results[sim]['configuration'] = pd.read_csv(
        'results/' + sim + '/configuration.txt', header=None, delimiter=":")
    types_of_results = []
    # Get all the filenames in the folder, excluding the extension
    for file in os.listdir('results/' + sim + '/exported_data/'):
        if file.endswith(".npy"):
            types_of_results.append(file[:-4])
    # Create a dictionary for each type of result
    for result in types_of_results:
        all_sim_results[sim][result] = np.load(
            'results/' + sim + '/exported_data/' + result + '.npy')

In [None]:
# Show all the all_sim_results in the notebook
for set_of_results in results_to_use:
    print("Results: " + set_of_results)
    print(all_sim_results[set_of_results]['configuration'])

## 1. Average Total Number of Trips  <a class="anchor" id="1"></a>

This t-test compares the average total number of trips between the two experiments, for all sims.

In [None]:
all_average_number_of_trips = []
for sim in results_to_use:
    all_average_number_of_trips.append(
        all_sim_results[sim]['stat_num_of_trips_per_simulation'])

results = stats.ttest_ind(all_average_number_of_trips[0], all_average_number_of_trips[1])
print(results)
#present the results nicely
print("The average number of trips for the first two simulations are: " + str(np.mean(all_average_number_of_trips[0])) + " and " + str(np.mean(all_average_number_of_trips[1])))
print("the standard deviation of number of trips for the first two simulations are: " + str(np.std(all_average_number_of_trips[0])) + " and " + str(np.std(all_average_number_of_trips[1])))
mean_1 = round(np.mean(all_average_number_of_trips[0]),3)
mean_2 = round(np.mean(all_average_number_of_trips[1]),3)
std_1 = round(np.std(all_average_number_of_trips[0]),3)
std_2 = round(np.std(all_average_number_of_trips[1]),3)
print("in latex:","$(M=",mean_1,", SD=",std_1,")$","$(M=",mean_2,", SD=",std_2,")$")
print("in latex:", "t(", results.df, ")=", round(results[0], 3), ", p=", round(results[1], 3))



## 2. Average Congestion of Central Intersection

This t-test is between the 2 experiments, for all sims. It does not process individual epochs

In [None]:
average_congestion_per_intersection = []
for exp in results_to_use:
    average_congestion_per_intersection.append(
        all_sim_results[exp]['stat_average_congestion_per_intersection'])

results = stats.ttest_ind(average_congestion_per_intersection[0][:,1,1], average_congestion_per_intersection[1][:,1,1])
print(results)
print("The average congestion for the first two simulations are: " + str(np.mean(average_congestion_per_intersection[0][:,1,1])) + " and " + str(np.mean(average_congestion_per_intersection[1][:,1,1])))
print("The p-value is: " + str(results[1]))
if results[1] < 0.05:
    print("The difference is significant")
else:
    print("The difference is not significant")
    
mean_1 = round(np.mean(average_congestion_per_intersection[0][:,1,1]),3)
mean_2 = round(np.mean(average_congestion_per_intersection[1][:,1,1]),3)
std_1 = round(np.std(average_congestion_per_intersection[0][:,1,1]),3)
std_2 = round(np.std(average_congestion_per_intersection[1][:,1,1]),3)
print("in latex:","$(M=",mean_1,",\ SD=",std_1,")$","$(M=",mean_2,",\ SD=",std_2,")$")
print("in latex:", "$t(", results.df, ")=", round(results[0], 3), ", p=", round(results[1], 3), "$")

## 3. Average Time Waited <a id="3"></a>

agent, intersection and grid based

In [None]:
######Agent based######
# Average time
average_time_agent = []
for exp in results_to_use:
    average_time_agent.append(
        all_sim_results[exp]['stat_average_time_waited_per_simulation_agent'])
    
results = stats.ttest_ind(average_time_agent[0], average_time_agent[1])
print(results)
print("The average time waited for the first two simulations are: " + str(np.mean(average_time_agent[0])) + " and " + str(np.mean(average_time_agent[1])))
print("The p-value is: " + str(results[1]))
mean_1 = round(np.mean(average_time_agent[0]),3)
mean_2 = round(np.mean(average_time_agent[1]),3)
std_1 = round(np.std(average_time_agent[0]),3)
std_2 = round(np.std(average_time_agent[1]),3)
print("in latex:","$(M=",mean_1,",\ SD=",std_1,")$","$(M=",mean_2,",\ SD=",std_2,")$")
print("in latex:", "$t(", results.df, ")=", round(results[0], 3), ", p=", round(results[1], 3), "$")

# Max time
max_time_agent = []
for exp in results_to_use:
    max_time_agent.append(
        all_sim_results[exp]['stat_max_time_waited_per_simulation_agent'])
    
results = stats.ttest_ind(max_time_agent[0], max_time_agent[1])
print(results)
print("The max time waited for the first two simulations are: " + str(np.mean(max_time_agent[0])) + " and " + str(np.mean(max_time_agent[1])))
print("The p-value is: " + str(results[1]))
mean_1 = round(np.mean(max_time_agent[0]),3)
mean_2 = round(np.mean(max_time_agent[1]),3)
std_1 = round(np.std(max_time_agent[0]),3)
std_2 = round(np.std(max_time_agent[1]),3)
print("in latex:","$(M=",mean_1,",\ SD=",std_1,")$","$(M=",mean_2,",\ SD=",std_2,")$")
print("in latex:", "$t(", results.df, ")=", round(results[0], 3), ",\ p=", round(results[1], 3), "$")

In [None]:
######Intersection based - Average Time######

average_time_intersection = []
for exp in results_to_use:
    average_time_intersection.append(
        all_sim_results[exp]['stat_average_time_waited_per_intersection'])
    
for i in range(len(average_time_intersection[0][0])):
    for j in range(len(average_time_intersection[0][0])):
        # print("Intersection [", i, "][", j, "]")
        results = stats.ttest_ind(average_time_intersection[0][:,i,j], average_time_intersection[1][:,i,j])
        # print(results)
        # print("The average time waited for the first two simulations are: " + str(np.mean(average_time_intersection[0][:,i,j])) + " and " + str(np.mean(average_time_intersection[1][:,i,j])))
        # print("The p-value is: " + str(results[1]))
        mean_1 = round(np.mean(average_time_intersection[0][:,i,j]),3)
        mean_2 = round(np.mean(average_time_intersection[1][:,i,j]),3)
        std_1 = round(np.std(average_time_intersection[0][:,i,j]),3)
        std_2 = round(np.std(average_time_intersection[1][:,i,j]),3)
        print("\item [{[", i, ",", j, "]}]: ", "Adaptive $(M=",mean_1,",\ SD=",std_1,")$",", Zero $(M=",mean_2,",\ SD=",std_2,"),$newline$\  t(", int(results.df), ")=", round(results[0], 2), ",\ p=", round(results[1], 3), "$")
        # \item [{[0, 0]}]: Adaptive $(M= 0.027 ,\ SD= 0.005 )$ , Random $(M= 0.024 ,\ SD= 0.004 ),$\newline$\ t( 198 )= 5.81 ,\ p<.001 $


In [None]:
######Intersection based - Max Time######

max_time_intersection = []
for exp in results_to_use:
    max_time_intersection.append(
        all_sim_results[exp]['stat_max_time_waited_per_intersection'])
    
for i in range(len(max_time_intersection[0][0])):
    for j in range(len(max_time_intersection[0][0])):
        results = stats.ttest_ind(max_time_intersection[0][:,i,j], max_time_intersection[1][:,i,j])
        mean_1 = round(np.mean(max_time_intersection[0][:,i,j]),3)
        mean_2 = round(np.mean(max_time_intersection[1][:,i,j]),3)
        std_1 = round(np.std(max_time_intersection[0][:,i,j]),3)
        std_2 = round(np.std(max_time_intersection[1][:,i,j]),3)
        print("\item [{[", i, ",", j, "]}]: ", "Adaptive $(M=",mean_1,",\ SD=",std_1,")$",", Zero $(M=",mean_2,",\ SD=",std_2,"),$newline$\  t(", int(results.df), ")=", round(results[0], 2), ",\ p=", round(results[1], 3), "$")


In [None]:
# ###### Grid based###### DEPRECATED/NOT USED

# # Average time
# print("AVERAGE TIME WAITED")
# average_time_grid = []
# for exp in results_to_use:
#     average_time_grid.append(
#         all_sim_results[exp]['stat_average_time_waited_grid'])

# means_1 = []
# for i in range(len(average_time_grid[0])):
#     means_1.append(np.mean(average_time_grid[0][i]))
# means_2 = []
# for i in range(len(average_time_grid[1])):
#     means_2.append(np.mean(average_time_grid[1][i]))

# print(np.mean(average_time_grid[0], axis=0))
# # print("mean = ", np.mean(np.mean(average_time_grid[0], axis=0)))
# # print("std = ", np.std(np.mean(average_time_grid[0], axis=0)))

# results = stats.ttest_ind(means_1, means_2)
# print(results)
# print("The average time waited for the first two simulations are: " +
#       str(np.mean(means_1)) + " and " + str(np.mean(means_2)))
# print("The p-value is: " + str(results[1]))
# mean_1 = round(np.mean(means_1), 3)
# mean_2 = round(np.mean(means_2), 3)
# std_1 = round(np.std(means_1), 3)
# std_2 = round(np.std(means_2), 3)
# print("in latex:", "$(M=", mean_1, ",\ SD=", std_1, ")$",
#       "$(M=", mean_2, ",\ SD=", std_2, ")$")
# print("in latex:", "$t(", results.df, ")=", round(
#     results[0], 3), ", p=", round(results[1], 3), "$")

# # Max time
# print("MAX TIME WAITED")
# max_time_grid = []
# for exp in results_to_use:
#     max_time_grid.append(
#         all_sim_results[exp]['stat_max_time_waited_grid'])

# means_1 = []
# for i in range(len(max_time_grid[0])):
#     means_1.append(np.mean(max_time_grid[0][i]))
# means_2 = []
# for i in range(len(max_time_grid[1])):
#     means_2.append(np.mean(max_time_grid[1][i]))


# results = stats.ttest_ind(means_1, means_2)
# print(results)
# print("The max time waited for the first two simulations are: " +
#       str(np.mean(means_1)) + " and " + str(np.mean(means_2)))
# print("The p-value is: " + str(results[1]))
# mean_1 = round(np.mean(means_1), 3)
# mean_2 = round(np.mean(means_2), 3)
# std_1 = round(np.std(means_1), 3)
# std_2 = round(np.std(means_2), 3)
# print("in latex:", "$(M=", mean_1, ",\ SD=", std_1, ")$",
#       "$(M=", mean_2, ",\ SD=", std_2, ")$")
# print("in latex:", "$t(", results.df, ")=", round(
#     results[0], 3), ", p=", round(results[1], 3), "$")

## 4. Gini coefficient <a id="4"></a>

In [None]:
# Average Time Waited.
#Averaged over epochs and intersections: the gini of each intersection, and average of all the ginis per epoch.
all_ginis_time_waited = []
for sim in results_to_use:
    all_ginis_time_waited.append(
        all_sim_results[sim]['stat_time_waited_gini'])

results = stats.ttest_ind(all_ginis_time_waited[0], all_ginis_time_waited[1])
print(results)
# Present the results nicely
print("The average gini of time waited for the first two simulations are: " + str(np.mean(all_ginis_time_waited[0])) + " and " + str(np.mean(all_ginis_time_waited[1])))
print("The p-value is: " + str(results[1]))
mean_1 = round(np.mean(all_ginis_time_waited[0]),3)
mean_2 = round(np.mean(all_ginis_time_waited[1]),3)
std_1 = round(np.std(all_ginis_time_waited[0]),3)
std_2 = round(np.std(all_ginis_time_waited[1]),3)
print("in latex:","$(M=",mean_1,",\ SD=",std_1,")$","$(M=",mean_2,",\ SD=",std_2,")$")
print("in latex:", "$t(", results.df, ")=", round(results[0], 3), ", p=", round(results[1], 3), "$")



In [None]:
# Average Satisfaction.
#Averaged over epochs and intersections: the gini of each intersection, and average of all the ginis per epoch.
all_ginis_satisfaction = []
for sim in results_to_use:
    all_ginis_satisfaction.append(
        all_sim_results[sim]['stat_satisfaction_gini'])

results = stats.ttest_ind(all_ginis_time_waited[0], all_ginis_time_waited[1])
print(results)
#present the results nicely
print("The average gini of satisfaction for the first two simulations are: " + str(np.mean(all_ginis_satisfaction[0])) + " and " + str(np.mean(all_ginis_satisfaction[1])))
print("The p-value is: " + str(results[1]))
mean_1 = round(np.mean(all_ginis_satisfaction[0]),3)
mean_2 = round(np.mean(all_ginis_satisfaction[1]),3)
std_1 = round(np.std(all_ginis_satisfaction[0]),3)
std_2 = round(np.std(all_ginis_satisfaction[1]),3)
print("in latex:","$(M=",mean_1,",\ SD=",std_1,")$","$(M=",mean_2,",\ SD=",std_2,")$")
print("in latex:", "$t(", results.df, ")=", round(results[0], 3), ", p=", round(results[1], 3), "$")



## 5. Trip Satisfaction <a id="5"></a>

In [None]:
all_satisfactions = []
for sim in results_to_use:
    all_satisfactions.append(
        all_sim_results[sim]['stat_satisfaction_mean'])

results = stats.ttest_ind(all_satisfactions[0], all_satisfactions[1])
print(results)
#present the results nicely
print("The average satisfaction for the first two simulations are: " + str(np.mean(all_satisfactions[0])) + " and " + str(np.mean(all_satisfactions[1])))
print("The p-value is: " + str(results[1]))
mean_1 = round(np.mean(all_satisfactions[0]),3)
mean_2 = round(np.mean(all_satisfactions[1]),3)
std_1 = round(np.std(all_satisfactions[0]),3)
std_2 = round(np.std(all_satisfactions[1]),3)
print("in latex:","$(M=",mean_1,",\ SD=",std_1,")$","$(M=",mean_2,",\ SD=",std_2,")$")
print("in latex:", "$t(", results.df, ")=", round(results[0], 3), ", p=", round(results[1], 3), "$")


## 6. Auction Reward <a id="5"></a>

In [None]:
######Intersection based - Average Auction Reward######

average_reward_intersection = []
for exp in results_to_use:
    average_reward_intersection.append(
        all_sim_results[exp]['stat_average_auction_reward_per_intersection'])
    
for i in range(len(average_reward_intersection[0][0])):
    for j in range(len(average_reward_intersection[0][0])):
        print("Intersection [", i, "][", j, "]")
        results = stats.ttest_ind(average_reward_intersection[0][:,i,j], average_reward_intersection[1][:,i,j])
        print(results)
        print("The average auction reward for the first two simulations are: " + str(np.mean(average_reward_intersection[0][:,i,j])) + " and " + str(np.mean(average_reward_intersection[1][:,i,j])))
        print("The p-value is: " + str(results[1]))