In [1]:
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
import numpy as np
import pandas as pd
import json
import os

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
# Directory containing the JSON files
dir_path_main = "../results/experiments/"
folders = []
for folder_name in os.listdir(dir_path_main):
    if os.path.isdir(dir_path_main+folder_name):
        folders.append(folder_name)

folder_combined_dfs = []
for folder in folders:
    dir_path = dir_path_main + folder
    # List to store individual DataFrames
    dfs = []
    brute_dfs = []

    for file_name in os.listdir(dir_path):
        if file_name.endswith('.json') and file_name.startswith("brute"):
            file_path = os.path.join(dir_path, file_name)
            with open(file_path) as f:
                data = json.load(f)
                try:
                    size = data["size"][0]
                except:
                    size = np.nan
            data = {key: value for key, value in data.items() if key in {"solver", "time", "best"}}
            brute_df = pd.DataFrame(data)


    # Iterate over each JSON file in the directory
    for file_name in os.listdir(dir_path):
        if file_name.endswith('.json'):
            file_path = os.path.join(dir_path, file_name)
            # Read JSON file into a DataFrame
            with open(file_path) as f:
                data = json.load(f)
            if file_name.startswith("gen"):
                data = {key: value for key, value in data.items() if key in {"solver", "times", "best_final"}}
                df = pd.DataFrame(data)
                
                df["optimum"] = (df['best_final'] == brute_df.loc[0, "best"]).astype(int)
                column_names = {"solvers":"solver", "times":"time", "best_final":"best"}
                df.rename(columns=column_names, inplace=True)
                df = df.groupby("solver").agg({"solver" : "min", "time": "mean", "best":"mean", "optimum":"sum"})
                df["optimum"] = df["optimum"] / len(data["best_final"])

                # calculate mean delta to brute solution:
                delta = pd.DataFrame([v - brute_df.loc[0, "best"]  for v in  data["best_final"]]).mean()
                df["delta"] = delta[0]

            else:
                data = {key: value for key, value in data.items() if key in {"solver", "time", "best"}}
                df = pd.DataFrame(data)
                delta = data["best"] - brute_df.loc[0,"best"]
                df["delta"] = delta
                df["optimum"] = (df['best'] == brute_df.loc[0, "best"]).astype(int)
                df["optimum"] = df["optimum"] / len(data["best"])
            
            df["time"] = df["time"].round(2)
            # Append the DataFrame to the list       
            dfs.append(df)      

    # Concatenate all DataFrames in the list into one DataFrame
    combined_df = pd.concat(dfs, ignore_index=True).fillna(0.0)
    combined_df["optimum"] = (combined_df["optimum"]*100).round(0).astype(str) + "%"

    # Set the "solver" column as the index
    combined_df.set_index('solver', inplace=True)

    # Reindex the DataFrame with the specified order
    desired_order = ['brute', 'heuristic', 'plain', 'elitist']
    combined_df = combined_df.reindex(desired_order)
    #combined_df = combined_df.add_suffix("_" + folder)
    combined_df.columns = pd.MultiIndex.from_product([[folder + " size: " + str(size) ], combined_df.columns])

    folder_combined_dfs.append(combined_df)


 
# Concatenate the DataFrames
combined_df = pd.concat(folder_combined_dfs, axis=1)
combined_df.rename(columns={"time":"time in sec", "optimum" : "optimum perc."}, inplace=True)


# Display the combined DataFrame
combined_df



Unnamed: 0_level_0,close_maxima size: 4147200,close_maxima size: 4147200,close_maxima size: 4147200,close_maxima size: 4147200,heterogen size: 537600,heterogen size: 537600,heterogen size: 537600,heterogen size: 537600,fully_synthetic size: 7779240,fully_synthetic size: 7779240,fully_synthetic size: 7779240,fully_synthetic size: 7779240,no_deletes size: 691200,no_deletes size: 691200,no_deletes size: 691200,no_deletes size: 691200,invalid_branches size: 1658880,invalid_branches size: 1658880,invalid_branches size: 1658880,invalid_branches size: 1658880
Unnamed: 0_level_1,time in sec,best,delta,optimum perc.,time in sec,best,delta,optimum perc.,time in sec,best,delta,optimum perc.,time in sec,best,delta,optimum perc.,time in sec,best,delta,optimum perc.
solver,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
brute,23697.23,172.0,0.0,100.0%,2436.09,491.0,0.0,100.0%,49691.16,357.0,0.0,100.0%,2399.55,492.0,0.0,100.0%,14142.22,187.0,0.0,100.0%
heuristic,2.75,183.0,11.0,0.0%,0.58,491.0,0.0,100.0%,4.28,370.0,13.0,0.0%,1.5,492.0,0.0,100.0%,3.09,253.0,66.0,0.0%
plain,7.05,172.0,0.0,100.0%,31.06,528.1,37.1,0.0%,37.93,417.5,60.5,0.0%,20.88,560.9,68.9,0.0%,27.17,187.0,0.0,100.0%
elitist,3.82,172.0,0.0,100.0%,4.96,504.9,13.9,20.0%,11.48,363.4,6.4,0.0%,7.92,504.3,12.3,40.0%,7.71,187.0,0.0,100.0%
