In [1]:
import glob
import pickle
from pathlib import Path

import pandas as pd

In [2]:
dir="./../data"

In [3]:
experiments=glob.glob(dir+"/*")
experiments

['./../data/DIRECT_MEANS_AVERAGE_AGGREGATION_4_NODES',
 './../data/NO_META_LEARNING_DYNAMIC_CLIPPED_AVERAGE_AGGREGATION_2_NODES',
 './../data/NO_META_LEARNING_MEDIAN_AGGREGATION_4_NODES',
 './../data/NO_META_LEARNING_WEIGHTED_AVERAGE_RECENCY_2_NODES',
 './../data/GRADUAL_GENETIC_ALGORITHM_MATING_AVERAGE_AGGREGATION_4_NODES',
 './../data/NO_META_LEARNING_DYNAMIC_CLIPPED_AVERAGE_AGGREGATION_4_NODES',
 './../data/NO_META_LEARNING_MEDIAN_AGGREGATION_2_NODES',
 './../data/NO_META_LEARNING_WEIGHTED_AVERAGE_RECENCY_4_NODES',
 './../data/NO_META_LEARNING_WEIGHTED_AVERAGE_PERFORMANCE_SCORES_2_NODES',
 './../data/DIRECT_GENETIC_ALGORITHM_MATING_AVERAGE_AGGREGATION_4_NODES',
 './../data/GRADUAL_SCORE_WEIGHTED_MEANS_AVERAGE_AGGREGATION_4_NODES',
 './../data/NO_META_LEARNING_AVERAGE_AGGREGATION_2_NODES',
 './../data/GRADUAL_SCORE_WEIGHTED_MEANS_AVERAGE_AGGREGATION_2_NODES',
 './../data/DIRECT_MEANS_AVERAGE_AGGREGATION_2_NODES',
 './../data/NO_META_LEARNING_WEIGHTED_AVERAGE_PUNISHING_UPDATES_2_NODES

In [4]:
def read_parameter_server_data(folder_path: str):
    files = glob.glob(folder_path + "/parameter_server/*.pkl")
    init_data = None
    update_data = None
    for file in files:
        if "initialization" in file:
            init_data = pickle.load(open(file, "rb"))
        else:
            loaded_data = pickle.load(open(file, "rb"))
            if update_data is None:
                update_data = dict()
                for key, value in loaded_data.items():
                    if len(key)>30: # ingnore node specific data at this point
                        continue
                    update_data[key] = [value]
            else:
                for key, value in loaded_data.items():
                    if len(key)>30: # ingnore node specific data at this point
                        continue
                    update_data[key].append(value)
            # update_data.append(pickle.load(open(file, "rb")))
    return init_data, update_data

def read_node_data(folder_path:str):
    node_folders=[folder for folder in glob.glob(folder_path + "/*/") if not "parameter" in folder]
    iter_data = dict()
    training_data = dict()
    init_data = dict()
    for idx, folder in enumerate(node_folders):
        files = glob.glob(f"{folder}/*.pkl")
        for file in files:
            if "initialization" in file:
                init_data[f"node_{idx}"] = pickle.load(open(file, "rb"))
            elif "iter_" in file:
                loaded_data = pickle.load(open(file, "rb"))
                for key, value in loaded_data.items():
                    if f"{key}_{idx}" in iter_data:
                        iter_data[f"{key}_{idx}"].append(value)        
                    else:
                        iter_data[f"{key}_{idx}"]=[value]
            else:
                loaded_data = pickle.load(open(file, "rb"))
                for key, value in loaded_data.items():
                    if f"{key}_{idx}" in training_data:
                        training_data[f"{key}_{idx}"].append(value)        
                    else:
                        training_data[f"{key}_{idx}"]=[value]
    return init_data, iter_data, training_data

In [5]:
for experiment in experiments:
    experiment_name = experiment.split("/")[-1]
    Path(f"./../results/{experiment_name}/").mkdir(parents=True, exist_ok=True)
    server_init_data, update_data = read_parameter_server_data(experiment)
    if not "NO_FEDERATED_LEARNING" in experiment:
        server_hp_config = (
            pd.DataFrame()
            .from_dict(update_data)
            .sort_values(by="timestamp", inplace=False, ignore_index=True)
        )
        server_hp_config.to_pickle(f"./../results/{experiment_name}/server_hp_config.pkl")
    node_init_data, iter_data, training_data = read_node_data(
        experiment
    )  # node_init_data, iter_data, training_data
    max_len = max([len(val) for val in iter_data.values()])
    for key, value in iter_data.items():
        iter_data[key] = value + [None] * (max_len - len(value))
    node_idxs = sorted(set([val[-1] for val in iter_data.keys()]))
    iter_df = None
    for node_idx in node_idxs:
        rel_cols = [col for col in iter_data.keys() if node_idx in col]
        iter_df_fraction = (
            pd.DataFrame()
            .from_dict(iter_data)[rel_cols]
            .sort_values(by=f"timestamp_{node_idx}", inplace=False, ignore_index=True)
        )
        if iter_df is None:
            iter_df = iter_df_fraction
        else:
            iter_df=iter_df.merge(right=iter_df_fraction,
                left_on=f"iter_counter_0",
                right_on=f"iter_counter_{node_idx}",
                how="inner",
            )
    iter_df.to_pickle(f"./../results/{experiment_name}/iter_df.pkl")
    max_len = max([len(val) for val in training_data.values()])
    for key, value in training_data.items():
        training_data[key] = value + [None] * (max_len - len(value))
    training_df = pd.DataFrame().from_dict(
        training_data
    )  # .sort_values(by="timestamp",inplace=False,ignore_index=True)
    training_df.to_pickle(f"./../results/{experiment_name}/training_df.pkl")

In [6]:
iter_data.keys()

dict_keys(['curr_score_0', 'iter_counter_0', 'DG_rotation_0', 'timestamp_0', 'curr_score_1', 'iter_counter_1', 'DG_rotation_1', 'timestamp_1'])

In [7]:
training_df.keys()

Index(['momentum_mean_0', 'momentum_std_0', 'nesterov_mean_0',
       'nesterov_std_0', 'n_epochs_mean_0', 'n_epochs_std_0',
       'learning_rate_mean_0', 'learning_rate_std_0', 'n_runs_mean_0',
       'n_runs_std_0', 'batch_size_mean_0', 'batch_size_std_0', 'n_runs_0',
       'n_epochs_0', 'weights_0', 'iter_count_0', 'pre_training_score_0',
       'scores_0', 'best_score_0', 'best_hyper_params_0',
       'best_model_weights_0', 'timestamp_0', 'momentum_mean_1',
       'momentum_std_1', 'n_runs_mean_1', 'n_runs_std_1', 'nesterov_mean_1',
       'nesterov_std_1', 'n_epochs_mean_1', 'n_epochs_std_1',
       'learning_rate_mean_1', 'learning_rate_std_1', 'batch_size_mean_1',
       'batch_size_std_1', 'n_runs_1', 'n_epochs_1', 'weights_1',
       'iter_count_1', 'pre_training_score_1', 'scores_1', 'best_score_1',
       'best_hyper_params_1', 'best_model_weights_1', 'timestamp_1'],
      dtype='object')

In [8]:
pd.DataFrame().from_dict(iter_data)

Unnamed: 0,curr_score_0,iter_counter_0,DG_rotation_0,timestamp_0,curr_score_1,iter_counter_1,DG_rotation_1,timestamp_1
0,0.793667,857,540,2024-02-27 22:09:12.820982,0.725000,857,184,2024-02-27 22:06:39.554054
1,0.699333,809,502,2024-02-27 22:08:27.507439,0.575000,809,169,2024-02-27 22:06:03.158119
2,0.732333,898,567,2024-02-27 22:09:46.645081,0.827000,898,193,2024-02-27 22:07:12.861774
3,0.802333,122,76,2024-02-27 21:58:07.712297,0.768000,122,23,2024-02-27 21:57:44.598067
4,0.770667,7,5,2024-02-27 21:56:11.252935,0.778000,7,2,2024-02-27 21:56:12.501478
...,...,...,...,...,...,...,...,...
995,0.749333,493,310,2024-02-27 22:04:09.116765,0.813333,493,100,2024-02-27 22:02:20.668979
996,0.764333,41,29,2024-02-27 21:56:43.654452,0.780667,41,9,2024-02-27 21:56:42.900514
997,0.796333,597,371,2024-02-27 22:05:37.828033,0.743000,597,123,2024-02-27 22:03:32.518443
998,0.803333,870,549,2024-02-27 22:09:23.573978,0.828333,870,187,2024-02-27 22:06:52.235594


In [9]:
update_data.keys()

dict_keys(['momentum_mean', 'momentum_std', 'nesterov_mean', 'nesterov_std', 'n_epochs_mean', 'n_epochs_std', 'learning_rate_mean', 'learning_rate_std', 'n_runs_mean', 'n_runs_std', 'batch_size_mean', 'batch_size_std', 'weights', 'timestamp'])

In [10]:
server_hp_config=pd.DataFrame().from_dict(update_data).sort_values(by="timestamp",inplace=False,ignore_index=True)
server_hp_config

Unnamed: 0,momentum_mean,momentum_std,nesterov_mean,nesterov_std,n_epochs_mean,n_epochs_std,learning_rate_mean,learning_rate_std,n_runs_mean,n_runs_std,batch_size_mean,batch_size_std,weights,timestamp
0,0.500000,0.300000,0.500000,0.300000,10.000000,3.000000,-3.000000,1.000000,10.000000,3.000000,64.000000,20.000000,"[[[-0.26170825958251953, -0.19085796177387238,...",2024-02-27 21:56:07.482781
1,0.500000,0.300000,0.500000,0.300000,10.000000,3.000000,-3.000000,1.000000,10.000000,3.000000,64.000000,20.000000,"[[[-0.2573569267988205, -0.315193273127079, 0....",2024-02-27 21:56:08.736245
2,0.500000,0.300000,0.500000,0.300000,10.000000,3.000000,-3.000000,1.000000,10.000000,3.000000,64.000000,20.000000,"[[[-0.2565191288789113, -0.3032847096522649, 0...",2024-02-27 21:56:26.633971
3,0.500000,0.300000,0.500000,0.300000,10.000000,3.000000,-3.000000,1.000000,10.000000,3.000000,64.000000,20.000000,"[[[-0.5110945180058479, -0.28083062171936035, ...",2024-02-27 21:56:30.404953
4,0.545812,0.022363,0.390785,0.022363,7.810154,0.022363,-2.116207,0.022363,8.367321,0.022363,63.272611,0.022363,"[[[-0.5514616191387176, -0.2808268189430237, 0...",2024-02-27 21:56:39.856267
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100,0.299227,0.022455,0.000000,0.022455,6.707657,0.022455,-1.351457,0.022455,3.408032,0.022455,46.301290,0.022455,"[[[0.29047320410609245, -1.0482680901885033, 1...",2024-02-27 22:10:35.346877
101,0.297780,0.022047,0.000000,0.022047,6.705309,0.022047,-1.330145,0.022047,3.406678,0.022047,46.104521,0.022047,"[[[0.27163584157824516, -1.0561298593878745, 1...",2024-02-27 22:10:40.549724
102,0.292486,0.025683,0.000000,0.025683,6.699219,0.025683,-1.315699,0.025683,3.403166,0.025683,46.586760,0.025683,"[[[0.26257255896925924, -1.0590273961424828, 1...",2024-02-27 22:10:51.466688
103,0.311376,0.026599,0.000000,0.026599,6.697641,0.026599,-1.353176,0.026599,3.912821,0.026599,48.420308,0.026599,"[[[0.2743171684443951, -1.028350804746151, 1.9...",2024-02-27 22:11:01.641741


In [11]:
max_len = max([len(val) for val in iter_data.values()])
for key, value in iter_data.items():
    iter_data[key] = value + [None] * (max_len-len(value))
iter_df = pd.DataFrame().from_dict(iter_data)
iter_df

Unnamed: 0,curr_score_0,iter_counter_0,DG_rotation_0,timestamp_0,curr_score_1,iter_counter_1,DG_rotation_1,timestamp_1
0,0.793667,857,540,2024-02-27 22:09:12.820982,0.725000,857,184,2024-02-27 22:06:39.554054
1,0.699333,809,502,2024-02-27 22:08:27.507439,0.575000,809,169,2024-02-27 22:06:03.158119
2,0.732333,898,567,2024-02-27 22:09:46.645081,0.827000,898,193,2024-02-27 22:07:12.861774
3,0.802333,122,76,2024-02-27 21:58:07.712297,0.768000,122,23,2024-02-27 21:57:44.598067
4,0.770667,7,5,2024-02-27 21:56:11.252935,0.778000,7,2,2024-02-27 21:56:12.501478
...,...,...,...,...,...,...,...,...
995,0.749333,493,310,2024-02-27 22:04:09.116765,0.813333,493,100,2024-02-27 22:02:20.668979
996,0.764333,41,29,2024-02-27 21:56:43.654452,0.780667,41,9,2024-02-27 21:56:42.900514
997,0.796333,597,371,2024-02-27 22:05:37.828033,0.743000,597,123,2024-02-27 22:03:32.518443
998,0.803333,870,549,2024-02-27 22:09:23.573978,0.828333,870,187,2024-02-27 22:06:52.235594


In [12]:
max_len = max([len(val) for val in training_data.values()])
for key, value in training_data.items():
    training_data[key] = value + [None] * (max_len-len(value))
training_df = pd.DataFrame().from_dict(training_data)
training_df

Unnamed: 0,momentum_mean_0,momentum_std_0,nesterov_mean_0,nesterov_std_0,n_epochs_mean_0,n_epochs_std_0,learning_rate_mean_0,learning_rate_std_0,n_runs_mean_0,n_runs_std_0,...,n_runs_1,n_epochs_1,weights_1,iter_count_1,pre_training_score_1,scores_1,best_score_1,best_hyper_params_1,best_model_weights_1,timestamp_1
0,0.375210,0.033521,0.088451,0.033521,6.812618,0.033521,-1.689800,0.033521,3.414379,0.033521,...,8.0,0.875,"[[[-0.5514616191387176, -0.2808268189430237, 0...",55.0,0.653333,"[0.805, 0.805, 0.805, 0.805, 0.805, 0.80333333...",0.805000,"{'learning_rate': 0.008231901598430411, 'batch...","[[[-0.7696338891983032, -0.26974985003471375, ...",2024-02-27 21:56:57.835545
1,0.493976,0.026734,0.099012,0.300000,6.495480,3.000000,-1.379751,0.026734,6.621610,0.026734,...,4.0,1.250,"[[[-0.9828324019908905, -0.42250716388225557, ...",514.0,0.690000,"[0.7866666666666666, 0.7816666666666666, 0.783...",0.786667,"{'learning_rate': 0.049212369129960264, 'batch...","[[[-0.5862321853637695, -0.423790842294693, 1....",2024-02-27 22:02:36.818232
2,0.435181,0.025719,0.197462,0.025719,6.390762,0.025719,-1.316835,0.025719,5.698710,0.025719,...,8.0,0.500,"[[[-0.590293838456273, -0.8625234007835388, 2....",640.0,0.490000,"[0.7916666666666666, 0.785, 0.78, 0.7783333333...",0.791667,"{'learning_rate': 0.04173706455356028, 'batch_...","[[[0.27608492970466614, -0.8696699142456055, 1...",2024-02-27 22:04:07.116119
3,0.335962,0.017067,0.000000,0.017067,6.703122,0.017067,-1.549103,0.017067,3.609262,0.017067,...,6.0,1.000,"[[[-1.4423826694488526, -0.31680739372968675, ...",359.0,0.486667,"[0.8033333333333333, 0.8, 0.7833333333333333, ...",0.803333,"{'learning_rate': 0.037528810830446215, 'batch...","[[[-1.4869590997695923, -0.3071986138820648, 1...",2024-02-27 22:00:42.831710
4,0.361463,0.061897,0.000000,0.061897,5.942188,3.000000,-1.627203,0.061897,4.221792,0.061897,...,4.0,1.250,"[[[0.24520641192793846, -1.1229375362396241, 1...",848.0,0.506667,"[0.7783333333333333, 0.78, 0.785, 0.78]",0.785000,"{'learning_rate': 0.03222442819517501, 'batch_...","[[[0.22738540172576904, -1.0721635818481445, 1...",2024-02-27 22:06:33.855470
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72,0.544881,0.027614,0.098309,0.300000,6.491964,0.027614,-1.416531,0.027614,6.823419,0.027614,...,,,,,,,,,,NaT
73,0.386660,0.300000,0.107504,0.062708,5.523758,3.000000,-1.567093,1.000000,4.107504,3.000000,...,,,,,,,,,,NaT
74,0.353389,0.020905,0.000000,0.020905,6.109015,3.000000,-1.527806,0.020905,3.819706,3.000000,...,,,,,,,,,,NaT
75,0.423571,0.025349,0.103348,0.025349,5.217239,0.025349,-1.345218,1.000000,4.596321,0.025349,...,,,,,,,,,,NaT
