In [11]:
import glob
import pickle
from pathlib import Path

import pandas as pd

In [12]:
dir="./../data"

In [13]:
experiments=glob.glob(dir+"/*")
experiments

['./../data/DIRECT_MEANS_AVERAGE_AGGREGATION_4_NODES',
 './../data/NO_META_LEARNING_DYNAMIC_CLIPPED_AVERAGE_AGGREGATION_2_NODES',
 './../data/NO_META_LEARNING_MEDIAN_AGGREGATION_4_NODES',
 './../data/NO_META_LEARNING_WEIGHTED_AVERAGE_RECENCY_2_NODES',
 './../data/GRADUAL_GENETIC_ALGORITHM_MATING_AVERAGE_AGGREGATION_4_NODES',
 './../data/NO_META_LEARNING_DYNAMIC_CLIPPED_AVERAGE_AGGREGATION_4_NODES',
 './../data/NO_META_LEARNING_MEDIAN_AGGREGATION_2_NODES',
 './../data/NO_META_LEARNING_WEIGHTED_AVERAGE_RECENCY_4_NODES',
 './../data/NO_META_LEARNING_WEIGHTED_AVERAGE_PERFORMANCE_SCORES_2_NODES',
 './../data/DIRECT_GENETIC_ALGORITHM_MATING_AVERAGE_AGGREGATION_4_NODES',
 './../data/GRADUAL_SCORE_WEIGHTED_MEANS_AVERAGE_AGGREGATION_4_NODES',
 './../data/NO_META_LEARNING_AVERAGE_AGGREGATION_2_NODES',
 './../data/GRADUAL_SCORE_WEIGHTED_MEANS_AVERAGE_AGGREGATION_2_NODES',
 './../data/DIRECT_MEANS_AVERAGE_AGGREGATION_2_NODES',
 './../data/NO_META_LEARNING_WEIGHTED_AVERAGE_PUNISHING_UPDATES_2_NODES

In [14]:
def read_parameter_server_data(folder_path: str):
    files = glob.glob(folder_path + "/parameter_server/*.pkl")
    init_data = None
    update_data = None
    for file in files:
        if "initialization" in file:
            init_data = pickle.load(open(file, "rb"))
        else:
            loaded_data = pickle.load(open(file, "rb"))
            if update_data is None:
                update_data = dict()
                for key, value in loaded_data.items():
                    if len(key)>30: # ingnore node specific data at this point
                        continue
                    update_data[key] = [value]
            else:
                for key, value in loaded_data.items():
                    if len(key)>30: # ingnore node specific data at this point
                        continue
                    update_data[key].append(value)
            # update_data.append(pickle.load(open(file, "rb")))
    return init_data, update_data

def read_node_data(folder_path:str):
    node_folders=[folder for folder in glob.glob(folder_path + "/*/") if not "parameter" in folder]
    iter_data = dict()
    training_data = dict()
    init_data = dict()
    for idx, folder in enumerate(node_folders):
        files = glob.glob(f"{folder}/*.pkl")
        for file in files:
            if "initialization" in file:
                init_data[f"node_{idx}"] = pickle.load(open(file, "rb"))
            elif "iter_" in file:
                loaded_data = pickle.load(open(file, "rb"))
                for key, value in loaded_data.items():
                    if f"{key}_{idx}" in iter_data:
                        iter_data[f"{key}_{idx}"].append(value)        
                    else:
                        iter_data[f"{key}_{idx}"]=[value]
            else:
                loaded_data = pickle.load(open(file, "rb"))
                for key, value in loaded_data.items():
                    if f"{key}_{idx}" in training_data:
                        training_data[f"{key}_{idx}"].append(value)        
                    else:
                        training_data[f"{key}_{idx}"]=[value]
    return init_data, iter_data, training_data

In [15]:
for experiment in experiments:
    experiment_name = experiment.split("/")[-1]
    Path(f"./../results/{experiment_name}/").mkdir(parents=True, exist_ok=True)
    server_init_data, update_data = read_parameter_server_data(experiment)
    if not "NO_FEDERATED_LEARNING" in experiment:
        server_hp_config = (
            pd.DataFrame()
            .from_dict(update_data)
            .sort_values(by="timestamp", inplace=False, ignore_index=True)
        )
        server_hp_config.to_pickle(f"./../results/{experiment_name}/server_hp_config.pkl")
    node_init_data, iter_data, training_data = read_node_data(
        experiment
    )  # node_init_data, iter_data, training_data
    max_len = max([len(val) for val in iter_data.values()])
    for key, value in iter_data.items():
        iter_data[key] = value + [None] * (max_len - len(value))
    node_idxs = sorted(set([val[-1] for val in iter_data.keys()]))
    iter_df = None
    for node_idx in node_idxs:
        rel_cols = [col for col in iter_data.keys() if node_idx in col]
        iter_df_fraction = (
            pd.DataFrame()
            .from_dict(iter_data)[rel_cols]
            .sort_values(by=f"timestamp_{node_idx}", inplace=False, ignore_index=True)
        )
        if iter_df is None:
            iter_df = iter_df_fraction
        else:
            iter_df=iter_df.merge(right=iter_df_fraction,
                left_on=f"iter_counter_0",
                right_on=f"iter_counter_{node_idx}",
                how="inner",
            )
    iter_df.to_pickle(f"./../results/{experiment_name}/iter_df.pkl")
    max_len = max([len(val) for val in training_data.values()])
    for key, value in training_data.items():
        training_data[key] = value + [None] * (max_len - len(value))
    training_df = pd.DataFrame().from_dict(
        training_data
    )  # .sort_values(by="timestamp",inplace=False,ignore_index=True)
    training_df.to_pickle(f"./../results/{experiment_name}/training_df.pkl")

In [16]:
iter_data.keys()

dict_keys(['curr_score_0', 'iter_counter_0', 'DG_rotation_0', 'timestamp_0', 'curr_score_1', 'iter_counter_1', 'DG_rotation_1', 'timestamp_1'])

In [17]:
training_df.keys()

Index(['learning_rate_mean_0', 'learning_rate_std_0', 'n_epochs_mean_0',
       'n_epochs_std_0', 'momentum_mean_0', 'momentum_std_0',
       'batch_size_mean_0', 'batch_size_std_0', 'nesterov_mean_0',
       'nesterov_std_0', 'n_runs_mean_0', 'n_runs_std_0', 'n_runs_0',
       'n_epochs_0', 'weights_0', 'iter_count_0', 'pre_training_score_0',
       'scores_0', 'best_score_0', 'best_hyper_params_0',
       'best_model_weights_0', 'timestamp_0', 'batch_size_mean_1',
       'batch_size_std_1', 'learning_rate_mean_1', 'learning_rate_std_1',
       'nesterov_mean_1', 'nesterov_std_1', 'momentum_mean_1',
       'momentum_std_1', 'n_runs_mean_1', 'n_runs_std_1', 'n_epochs_mean_1',
       'n_epochs_std_1', 'n_runs_1', 'n_epochs_1', 'weights_1', 'iter_count_1',
       'pre_training_score_1', 'scores_1', 'best_score_1',
       'best_hyper_params_1', 'best_model_weights_1', 'timestamp_1'],
      dtype='object')

In [18]:
pd.DataFrame().from_dict(iter_data)

Unnamed: 0,curr_score_0,iter_counter_0,DG_rotation_0,timestamp_0,curr_score_1,iter_counter_1,DG_rotation_1,timestamp_1
0,0.790000,857,184,2024-02-20 21:19:22.403457,0.785000,857,540,2024-02-20 21:21:01.538925
1,0.789000,809,169,2024-02-20 21:18:49.970652,0.809000,809,502,2024-02-20 21:20:24.383793
2,0.808000,898,193,2024-02-20 21:19:49.151214,0.796333,898,567,2024-02-20 21:21:33.722337
3,0.674667,122,23,2024-02-20 21:11:05.151803,0.669333,122,76,2024-02-20 21:11:40.174777
4,0.780333,7,2,2024-02-20 21:09:48.676418,0.775000,7,5,2024-02-20 21:09:52.415897
...,...,...,...,...,...,...,...,...
995,0.773667,493,100,2024-02-20 21:15:20.951387,0.735000,493,310,2024-02-20 21:16:39.774837
996,0.738333,41,9,2024-02-20 21:10:09.828775,0.741667,41,29,2024-02-20 21:10:31.498652
997,0.691333,597,123,2024-02-20 21:16:28.050185,0.664333,597,371,2024-02-20 21:17:48.631850
998,0.788667,870,187,2024-02-20 21:19:30.451493,0.745667,870,549,2024-02-20 21:21:10.887136


In [19]:
update_data.keys()

dict_keys(['learning_rate_mean', 'learning_rate_std', 'n_epochs_mean', 'n_epochs_std', 'momentum_mean', 'momentum_std', 'batch_size_mean', 'batch_size_std', 'nesterov_mean', 'nesterov_std', 'n_runs_mean', 'n_runs_std', 'weights', 'timestamp'])

In [20]:
server_hp_config=pd.DataFrame().from_dict(update_data).sort_values(by="timestamp",inplace=False,ignore_index=True)
server_hp_config

Unnamed: 0,learning_rate_mean,learning_rate_std,n_epochs_mean,n_epochs_std,momentum_mean,momentum_std,batch_size_mean,batch_size_std,nesterov_mean,nesterov_std,n_runs_mean,n_runs_std,weights,timestamp
0,-3.000000,1.000000,10.000000,3.000000,0.500000,0.300000,64.000000,20.000000,0.500000,0.300000,10.000000,3.000000,"[[[0.836649477481842, -0.7435741424560547, 0.9...",2024-02-20 21:09:44.920428
1,-3.000000,1.000000,10.000000,3.000000,0.500000,0.300000,64.000000,20.000000,0.500000,0.300000,10.000000,3.000000,"[[[0.9439019858837128, -0.7845838963985443, 0....",2024-02-20 21:09:48.664870
2,-3.000000,1.000000,10.000000,3.000000,0.500000,0.300000,64.000000,20.000000,0.500000,0.300000,10.000000,3.000000,"[[[0.9332194725672404, -0.867597758769989, 0.1...",2024-02-20 21:10:02.250012
3,-3.000000,1.000000,10.000000,3.000000,0.500000,0.300000,64.000000,20.000000,0.500000,0.300000,10.000000,3.000000,"[[[0.9180756509304047, -0.9520684629678726, 0....",2024-02-20 21:10:12.804219
4,-2.394166,0.027453,9.417601,0.027453,0.658789,0.027453,71.545298,0.027453,0.411993,0.027453,7.232097,0.027453,"[[[0.9404425621032715, -0.9901475071907043, 0....",2024-02-20 21:10:27.788616
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,-2.932799,0.019447,3.392991,0.019447,0.695620,0.019447,61.483938,0.019447,0.000000,0.019447,2.199207,0.019447,"[[[1.3441488862037658, -1.0814330577850342, 0....",2024-02-20 21:22:04.621815
78,-2.968433,0.019989,3.394969,0.019989,0.719320,0.019989,61.793920,0.019989,0.000000,0.019989,2.200210,0.019989,"[[[1.356984543800354, -1.0957716822624206, 0.9...",2024-02-20 21:22:16.395656
79,-2.967455,0.023915,3.206533,0.023915,0.717806,0.023915,62.099684,0.023915,0.000000,0.023915,2.100738,0.023915,"[[[1.369737195968628, -1.1309879660606383, 0.9...",2024-02-20 21:22:25.484473
80,-2.917993,0.019993,3.208024,0.019993,0.718323,0.019993,62.523243,0.019993,0.000000,0.019993,2.101465,0.019993,"[[[1.3896974205970765, -1.1593047857284546, 0....",2024-02-20 21:22:32.921185


In [21]:
max_len = max([len(val) for val in iter_data.values()])
for key, value in iter_data.items():
    iter_data[key] = value + [None] * (max_len-len(value))
iter_df = pd.DataFrame().from_dict(iter_data)
iter_df

Unnamed: 0,curr_score_0,iter_counter_0,DG_rotation_0,timestamp_0,curr_score_1,iter_counter_1,DG_rotation_1,timestamp_1
0,0.790000,857,184,2024-02-20 21:19:22.403457,0.785000,857,540,2024-02-20 21:21:01.538925
1,0.789000,809,169,2024-02-20 21:18:49.970652,0.809000,809,502,2024-02-20 21:20:24.383793
2,0.808000,898,193,2024-02-20 21:19:49.151214,0.796333,898,567,2024-02-20 21:21:33.722337
3,0.674667,122,23,2024-02-20 21:11:05.151803,0.669333,122,76,2024-02-20 21:11:40.174777
4,0.780333,7,2,2024-02-20 21:09:48.676418,0.775000,7,5,2024-02-20 21:09:52.415897
...,...,...,...,...,...,...,...,...
995,0.773667,493,100,2024-02-20 21:15:20.951387,0.735000,493,310,2024-02-20 21:16:39.774837
996,0.738333,41,9,2024-02-20 21:10:09.828775,0.741667,41,29,2024-02-20 21:10:31.498652
997,0.691333,597,123,2024-02-20 21:16:28.050185,0.664333,597,371,2024-02-20 21:17:48.631850
998,0.788667,870,187,2024-02-20 21:19:30.451493,0.745667,870,549,2024-02-20 21:21:10.887136


In [22]:
max_len = max([len(val) for val in training_data.values()])
for key, value in training_data.items():
    training_data[key] = value + [None] * (max_len-len(value))
training_df = pd.DataFrame().from_dict(training_data)
training_df

Unnamed: 0,learning_rate_mean_0,learning_rate_std_0,n_epochs_mean_0,n_epochs_std_0,momentum_mean_0,momentum_std_0,batch_size_mean_0,batch_size_std_0,nesterov_mean_0,nesterov_std_0,...,n_runs_1,n_epochs_1,weights_1,iter_count_1,pre_training_score_1,scores_1,best_score_1,best_hyper_params_1,best_model_weights_1,timestamp_1
0,-2.741101,0.041547,7.170265,0.041547,0.654545,0.041547,71.294681,0.041547,0.204095,0.041547,...,9,10,,1,0.636667,"[0.6616666666666666, 0.7933333333333333, 0.8, ...",0.818333,"{'learning_rate': 0.0016362689205570182, 'batc...","[[[1.0511544942855835, -0.8255936503410339, -0...",2024-02-20 21:09:48.663341
1,-2.635812,1.000000,7.538958,3.000000,0.618419,0.300000,69.180397,20.000000,0.227543,0.042485,...,3,5,"[[[0.9845368444919587, -1.3138595223426819, 0....",557,0.528333,"[0.715, 0.715, 0.715]",0.715000,"{'learning_rate': 0.0010179244043330253, 'batc...","[[[0.8139707446098328, -1.1830787658691406, 0....",2024-02-20 21:17:23.948663
2,-2.977129,0.026908,5.204511,0.026908,0.663157,0.300000,58.305452,0.026908,0.000000,0.026908,...,2,4,"[[[1.3011824131011962, -1.1989259779453278, 0....",850,0.775000,"[0.8133333333333334, 0.795]",0.813333,"{'learning_rate': 0.0007235029340695445, 'batc...","[[[1.120685338973999, -1.2771055698394775, 0.7...",2024-02-20 21:20:57.159404
3,-2.917802,0.011532,4.400000,0.011532,0.755955,0.011532,61.782049,0.011532,0.000000,0.011532,...,4,6,"[[[0.9468892872333526, -1.374571645259857, 0.2...",527,0.513333,"[0.7533333333333333, 0.7533333333333333, 0.753...",0.753333,"{'learning_rate': 0.0010302568138036558, 'batc...","[[[0.9030010104179382, -0.9936693906784058, 0....",2024-02-20 21:17:03.343208
4,-2.984326,1.000000,3.696803,0.019642,0.670385,0.019642,59.976921,0.019642,0.000000,0.019642,...,2,3,"[[[1.369737195968628, -1.1309879660606383, 0.9...",982,0.501667,"[0.7733333333333333, 0.7733333333333333]",0.773333,"{'learning_rate': 0.003985941745648338, 'batch...","[[[1.3992538452148438, -1.2900665998458862, 1....",2024-02-20 21:22:32.919139
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56,,,,,,,,,,,...,6,6,"[[[1.0560413658618928, -1.3792971849441529, 0....",254,0.511667,"[0.7716666666666666, 0.7716666666666666, 0.771...",0.771667,"{'learning_rate': 0.00025151170466294534, 'bat...","[[[1.3704115152359009, -1.3646135330200195, 0....",2024-02-20 21:13:15.542438
57,,,,,,,,,,,...,2,4,"[[[1.0506595075130463, -1.2736420631408691, 0....",671,0.490000,"[0.7866666666666666, 0.7866666666666666]",0.786667,"{'learning_rate': 0.0010990187224176385, 'batc...","[[[0.9609156250953674, -1.411204218864441, 0.8...",2024-02-20 21:18:43.536475
58,,,,,,,,,,,...,2,3,"[[[1.3056369304656983, -1.246260905265808, 0.5...",807,0.471667,"[0.8183333333333334, 0.8183333333333334]",0.818333,"{'learning_rate': 3.380149633031917e-05, 'batc...","[[[1.2305772304534912, -1.269688367843628, 0.3...",2024-02-20 21:20:23.141023
59,,,,,,,,,,,...,5,6,"[[[0.9513649821281434, -1.4154964208602905, 0....",484,0.515000,"[0.7416666666666667, 0.7416666666666667, 0.741...",0.741667,"{'learning_rate': 0.0011750381265603835, 'batc...","[[[0.8834633827209473, -1.1628843545913696, 0....",2024-02-20 21:16:34.199199
