In [9]:
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
import numpy as np
import pandas as pd
import json
import os

In [10]:
# Directory containing the JSON files
dir_path_main = "../results/experiments/"
folders = []
for folder_name in os.listdir(dir_path_main):
    if os.path.isdir(dir_path_main+folder_name):
        folders.append(folder_name)

folder_combined_dfs = []
for folder in folders:
    dir_path = dir_path_main + folder
    # List to store individual DataFrames
    dfs = []
    brute_dfs = []

    for file_name in os.listdir(dir_path):
        if file_name.endswith('.json') and file_name.startswith("brute"):
            file_path = os.path.join(dir_path, file_name)
            with open(file_path) as f:
                data = json.load(f)
                try:
                    size = data["size"][0]
                except:
                    size = np.nan
            data = {key: value for key, value in data.items() if key in {"solver", "time", "best"}}
            brute_df = pd.DataFrame(data)


    # Iterate over each JSON file in the directory
    for file_name in os.listdir(dir_path):
        if file_name.endswith('.json'):
            file_path = os.path.join(dir_path, file_name)
            # Read JSON file into a DataFrame
            with open(file_path) as f:
                data = json.load(f)
            if file_name.startswith("gen"):
                data = {key: value for key, value in data.items() if key in {"solver", "times", "best_final"}}
                df = pd.DataFrame(data)
                
                df["optimum"] = (df['best_final'] == brute_df.loc[0, "best"]).astype(int)
                column_names = {"solvers":"solver", "times":"time", "best_final":"best"}
                df.rename(columns=column_names, inplace=True)
                df = df.groupby("solver").agg({"solver" : "min", "time": "mean", "best":"mean", "optimum":"sum"})
                df["optimum"] = df["optimum"] / len(data["best_final"])

                # calculate mean delta to brute solution:
                delta = pd.DataFrame([v - brute_df.loc[0, "best"]  for v in  data["best_final"]]).mean()
                df["delta"] = delta[0]

            else:
                data = {key: value for key, value in data.items() if key in {"solver", "time", "best"}}
                df = pd.DataFrame(data)
                delta = data["best"] - brute_df.loc[0,"best"]
                df["delta"] = delta
                df["optimum"] = (df['best'] == brute_df.loc[0, "best"]).astype(int)
                df["optimum"] = df["optimum"] / len(data["best"])
            
            df["time"] = df["time"].round(2)
            # Append the DataFrame to the list       
            dfs.append(df)      

    # Concatenate all DataFrames in the list into one DataFrame
    combined_df = pd.concat(dfs, ignore_index=True).fillna(0.0)
    combined_df["optimum"] = (combined_df["optimum"]*100).round(0).astype(str) + "%"

    # Set the "solver" column as the index
    combined_df.set_index('solver', inplace=True)

    # Reindex the DataFrame with the specified order
    desired_order = ['brute', 'heuristic', 'plain', 'elitist']
    combined_df = combined_df.reindex(desired_order)
    #combined_df = combined_df.add_suffix("_" + folder)
    combined_df.columns = pd.MultiIndex.from_product([[folder + " size: " + str(size) ], combined_df.columns])

    folder_combined_dfs.append(combined_df)


 
# Concatenate the DataFrames
combined_df = pd.concat(folder_combined_dfs, axis=1)
combined_df.rename(columns={"time":"time in sec", "optimum" : "optimum perc."}, inplace=True)


# Display the combined DataFrame
combined_df



Unnamed: 0_level_0,fully_synthetic size: 7779240,fully_synthetic size: 7779240,fully_synthetic size: 7779240,fully_synthetic size: 7779240,no_deletes size: 691200,no_deletes size: 691200,no_deletes size: 691200,no_deletes size: 691200,heterogen size: 537600,heterogen size: 537600,heterogen size: 537600,heterogen size: 537600,close_maxima size: 4147200,close_maxima size: 4147200,close_maxima size: 4147200,close_maxima size: 4147200,invalid_branches size: 1658880,invalid_branches size: 1658880,invalid_branches size: 1658880,invalid_branches size: 1658880
Unnamed: 0_level_1,time in sec,best,delta,optimum perc.,time in sec,best,delta,optimum perc.,time in sec,best,delta,optimum perc.,time in sec,best,delta,optimum perc.,time in sec,best,delta,optimum perc.
solver,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
brute,62618.45,357.0,0.0,100.0%,1301.12,492.0,0.0,100.0%,1661.0,491.0,0.0,100.0%,13904.79,172.0,0.0,100.0%,6198.89,187.0,0.0,100.0%
heuristic,9.75,370.0,13.0,0.0%,2.17,492.0,0.0,100.0%,3.24,491.0,0.0,100.0%,5.52,183.0,11.0,0.0%,2.21,253.0,66.0,0.0%
plain,35.23,446.1,89.1,0.0%,11.36,580.2,88.2,0.0%,21.46,533.3,42.3,0.0%,5.69,172.1,0.1,90.0%,24.56,187.0,0.0,100.0%
elitist,10.37,364.3,7.3,0.0%,2.99,496.3,4.3,60.0%,5.44,499.8,8.8,50.0%,3.69,172.0,0.0,100.0%,5.41,187.0,0.0,100.0%
