In [88]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

RESULTS_PATH = os.path.join(os.getcwd(), 'results')

def excel_path(file: str):
    return os.path.join(RESULTS_PATH, file)

results_files = [excel_path(file) for file in os.listdir(RESULTS_PATH)]

def read_result_file(file):
    return pd.read_csv(file).drop(['Unnamed: 0'], axis=1)

def select_rows(df, discount=None, precision=None, min_state_dim=None, max_state_dim=None):
    if discount is not None:
        df = df[df['discount'] == discount]
    if precision is not None:
        df = df[df['final_precision'] == precision]
    if min_state_dim is not None:
        df = df[df['state_dim'] >= min_state_dim]
    if max_state_dim is not None:
        df = df[df['state_dim'] <= max_state_dim]
    return df

for i, elem in enumerate(results_files):
    print("{} {}".format(i, elem))

0 c:\Users\orsof\mdp_benchmark_discounted\results\0_experience_maze_prims.py.csv
1 c:\Users\orsof\mdp_benchmark_discounted\results\1_experience_barto.py.csv
2 c:\Users\orsof\mdp_benchmark_discounted\results\1_experience_block.py.csv
3 c:\Users\orsof\mdp_benchmark_discounted\results\1_experience_garnet.py.csv
4 c:\Users\orsof\mdp_benchmark_discounted\results\1_experience_impatience.py.csv
5 c:\Users\orsof\mdp_benchmark_discounted\results\1_experience_maze_backtrack.py.csv
6 c:\Users\orsof\mdp_benchmark_discounted\results\1_experience_maze_wilson.py.csv
7 c:\Users\orsof\mdp_benchmark_discounted\results\1_experience_mountain.py.csv
8 c:\Users\orsof\mdp_benchmark_discounted\results\1_experience_rooms.py.csv
9 c:\Users\orsof\mdp_benchmark_discounted\results\1_experience_sutton.py.csv
10 c:\Users\orsof\mdp_benchmark_discounted\results\1_experience_tandem.py.csv
11 c:\Users\orsof\mdp_benchmark_discounted\results\1_experience_toy.py.csv


In [96]:
model = 11
df = read_result_file(results_files[model])
df = select_rows(df=df, discount=0.99, precision=1e-2, min_state_dim=100, max_state_dim=10000)
df

Unnamed: 0,instance_name,solver_name,discount,runtime,distance_to_optimal,state_dim,action_dim,instance_parameters,final_precision,transition_density,reward_density
1,500_6_gym_toy_text_Taxi,gurobi_dual,0.99,164.598588,3.519977e-07,500,6,_name_Taxi-v3,0.01,0.002,0.677333
3,500_6_gym_toy_text_Taxi,marmote_vi,0.99,0.255373,1.748526e-05,500,6,_name_Taxi-v3,0.01,0.002,0.677333
5,500_6_gym_toy_text_Taxi,bertsekas_pi,0.99,202.044696,0.003454897,500,6,_name_Taxi-v3,0.01,0.002,0.677333
7,500_6_gym_toy_text_Taxi,marmote_vigs,0.99,0.069939,0.0002051156,500,6,_name_Taxi-v3,0.01,0.002,0.677333
9,500_6_gym_toy_text_Taxi,mdptoolbox_pim,0.99,4.441997,2.775919e-05,500,6,_name_Taxi-v3,0.01,0.002,0.677333
11,500_6_gym_toy_text_Taxi,gurobi_primal,0.99,165.676788,5.042999e-09,500,6,_name_Taxi-v3,0.01,0.002,0.677333
13,500_6_gym_toy_text_Taxi,aggregated_pim,0.99,2.75212,5.042949e-09,500,6,_name_Taxi-v3,0.01,0.002,0.677333
15,500_6_gym_toy_text_Taxi,personal_pim,0.99,42.505891,3.496679e-05,500,6,_name_Taxi-v3,0.01,0.002,0.677333
17,500_6_gym_toy_text_Taxi,aggregated_vi,0.99,1.464405,2.292456e-05,500,6,_name_Taxi-v3,0.01,0.002,0.677333
19,500_6_gym_toy_text_Taxi,personal_vi,0.99,65.058749,3.462716e-05,500,6,_name_Taxi-v3,0.01,0.002,0.677333


### Aggregation comparison

In [97]:
adversarial_results = df[df['solver_name'].apply(lambda x : ('personal' in x) or ('bertsekas' in x) or ('chen' in x))]
assert len(adversarial_results) == 4
adversarial_results

Unnamed: 0,instance_name,solver_name,discount,runtime,distance_to_optimal,state_dim,action_dim,instance_parameters,final_precision,transition_density,reward_density
5,500_6_gym_toy_text_Taxi,bertsekas_pi,0.99,202.044696,0.003454897,500,6,_name_Taxi-v3,0.01,0.002,0.677333
15,500_6_gym_toy_text_Taxi,personal_pim,0.99,42.505891,3.496679e-05,500,6,_name_Taxi-v3,0.01,0.002,0.677333
19,500_6_gym_toy_text_Taxi,personal_vi,0.99,65.058749,3.462716e-05,500,6,_name_Taxi-v3,0.01,0.002,0.677333
25,500_6_gym_toy_text_Taxi,chen_td,0.99,26.184056,2.566936e-07,500,6,_name_Taxi-v3,0.01,0.002,0.677333


In [99]:
aggregated_results = df[df['solver_name'].apply(lambda x : 'aggregated' in x)]
assert len(aggregated_results) == 3
aggregated_results

Unnamed: 0,instance_name,solver_name,discount,runtime,distance_to_optimal,state_dim,action_dim,instance_parameters,final_precision,transition_density,reward_density
13,500_6_gym_toy_text_Taxi,aggregated_pim,0.99,2.75212,5.042949e-09,500,6,_name_Taxi-v3,0.01,0.002,0.677333
17,500_6_gym_toy_text_Taxi,aggregated_vi,0.99,1.464405,2.292456e-05,500,6,_name_Taxi-v3,0.01,0.002,0.677333


In [100]:
if aggregated_results['runtime'].min() < adversarial_results['runtime'].min():
    print('Success of aggregation.')
else:
    print("Failed")

Success of aggregation.
