In [None]:
import io
import ast
import numpy as np
import pandas as pd
import os

In [None]:
#utils

def data_string_to_df(data_string):
    data_string_no_header='\n'.join(data_string.split('\n')[1:])
    data=io.StringIO(data_string_no_header)
    df=pd.read_csv(data,sep='\t',index_col=0)
    return df

def compute_retrodicted_fit(absolute_retr,frontier_retr,prediction_type='point'):
    assert prediction_type in ['point','interval']
    if prediction_type=='point':
        absolute_retr_sum = np.sum([np.abs(np.diff(ast.literal_eval(v))) for v in absolute_retr.values.ravel()])
        frontier_retr_sum = np.sum([np.abs(np.diff(ast.literal_eval(v))) for v in frontier_retr.values.ravel()])
    else:
        def extract_numbers_from_string(string):
             # Extract numbers from string like 'a (b,c,d)'
            main_num = int(string.split()[0])
            other_nums = [int(x) for x in string.split('(')[1].strip(')').split(',')]
            return [(main_num)] + [(other_nums)]
        # Process absolute retrodictions
        abs_str_values = [v for v in absolute_retr.values.ravel()]
        abs_nums = [extract_numbers_from_string(v) for v in abs_str_values]
        abs_min_diffs = [0 if v[0] in range(v[1][0],v[1][-1]+1) else np.min(np.abs(np.array(v[1])-v[0])) for v in abs_nums]
        absolute_retr_sum = np.sum(abs_min_diffs)

        # Process frontier retrodictions 
        front_str_values = [v for v in frontier_retr.values.ravel()]
        front_nums = [extract_numbers_from_string(v) for v in front_str_values]
        front_min_diffs = [0 if v[0] in range(v[1][0],v[1][-1]+1) else np.min(np.abs(np.array(v[1])-v[0])) for v in front_nums]
        frontier_retr_sum = np.sum(front_min_diffs)

    return absolute_retr_sum+frontier_retr_sum,absolute_retr_sum, frontier_retr_sum


In [None]:
path="results/test_folder"

RETR_FITS={}

for file_idx, filename in enumerate(os.listdir(path)):
    if not filename.endswith(".csv"): continue
    file_path = os.path.join(path,filename)
    store_obj=[]
    with open(file_path, 'r') as file:
        content = file.read().strip().split('\n\n')

        if idx==0:
            config = content[idx]
            config_lines = config.split('\n')
            config_dict = {}
            for line in config_lines:
                if ':' in line:
                    key, value = line.split(':', 1)
                    config_dict[key.strip()] = value.strip()
                config = config_dict
                mini_config = {
                    'filename':file_path,
                    'allocation': config.get('fixed allocation'),
                    'g_global_AI_compute_mean': ast.literal_eval(config.get('growth parameters')).get('g_global_AI_compute_mean'),
                    'g_stdev': ast.literal_eval(config.get('growth parameters')).get('g_stdev')
                }

        else: 
            if idx==1:
                absolute_retr=data_string_to_df(data_string.lstrip())
            elif idx==2:
                frontier_retr=data_string_to_df(data_string.lstrip())
            elif idx==3:
                absolute_pred=data_string_to_df(data_string.lstrip())
            elif idx==4:
                frontier_pred=data_string_to_df(data_string.lstrip())

        total_retr_fit,_,_=compute_retrodicted_fit(absolute_retr,frontier_retr,prediction_type='interval')
        RETR_FITS[file_idx]=(mini_config,total_retr_fit)


if 0: 
    n_lowest_configs=100
    sorted_retr_fits = dict(sorted(RETR_FITS.items(), key=lambda x: x[1][1]))
    lowest_configs = [v[0] for v in list(sorted_retr_fits.values())][:n_lowest_configs]
    print(lowest_configs)
    #get actual dfs



    if 1:
        results_data = []

        for idx, config in enumerate(lowest_configs):
            file_path = config['filename']
            with open(file_path, 'r') as file:
                content = file.read().strip().split('\n\n')
                config_results = {'filename': filename}
                config = content[0]
                config_lines = config.split('\n')
                config_dict = {}
                for line in config_lines:
                    if ':' in line:
                        key, value = line.split(':', 1)
                        config_dict[key.strip()] = value.strip()
                config = config_dict
                config_results['config'] = config
                for idx, data_string in enumerate(content[1:]):
                    if idx == 0:
                        config_results['absolute_retr'] = data_string_to_df(data_string.lstrip())
                    elif idx == 1:
                        config_results['frontier_retr'] = data_string_to_df(data_string.lstrip())
                    elif idx == 2:
                        config_results['absolute_pred'] = data_string_to_df(data_string.lstrip())
                    elif idx == 3:
                        config_results['frontier_pred'] = data_string_to_df(data_string.lstrip())
                results_data.append(config_results)


        for results in results_data:
            absolute_retr,frontier_retr,absolute_pred,frontier_pred=results['absolute_retr'],results['frontier_retr'],results['absolute_pred'],results['frontier_pred']
            fits=compute_retrodicted_fit(absolute_retr,frontier_retr,prediction_type='interval')
            print(fits)