In [7]:
import glob
import pickle
from pathlib import Path

import pandas as pd

In [8]:
dir="./../data"

In [9]:
experiments=glob.glob(dir+"/*")
experiments

['./../data/NO_META_LEARNING_DYNAMIC_CLIPPED_AVERAGE_AGGREGATION_2_NODES',
 './../data/NO_META_LEARNING_MEDIAN_AGGREGATION_4_NODES',
 './../data/NO_META_LEARNING_WEIGHTED_AVERAGE_RECENCY_2_NODES',
 './../data/NO_META_LEARNING_DYNAMIC_CLIPPED_AVERAGE_AGGREGATION_4_NODES',
 './../data/NO_META_LEARNING_MEDIAN_AGGREGATION_2_NODES',
 './../data/NO_META_LEARNING_WEIGHTED_AVERAGE_RECENCY_4_NODES',
 './../data/NO_META_LEARNING_WEIGHTED_AVERAGE_PERFORMANCE_SCORES_2_NODES',
 './../data/NO_META_LEARNING_AVERAGE_AGGREGATION_2_NODES',
 './../data/NO_META_LEARNING_WEIGHTED_AVERAGE_PUNISHING_UPDATES_2_NODES',
 './../data/NO_META_LEARNING_AVERAGE_AGGREGATION_4_NODES',
 './../data/NO_META_LEARNING_NO_FEDERATED_LEARNING_4_NODES',
 './../data/NO_META_LEARNING_FIXED_CLIPPED_AVERAGE_AGGREGATION_4_NODES',
 './../data/NO_META_LEARNING_FIXED_CLIPPED_AVERAGE_AGGREGATION_2_NODES',
 './../data/NO_META_LEARNING_NO_FEDERATED_LEARNING_2_NODES',
 './../data/NO_META_LEARNING_WEIGHTED_AVERAGE_PUNISHING_UPDATES_4_NODES

In [10]:
def read_parameter_server_data(folder_path: str):
    files = glob.glob(folder_path + "/parameter_server/*.pkl")
    init_data = None
    update_data = None
    for file in files:
        if "initialization" in file:
            init_data = pickle.load(open(file, "rb"))
        else:
            loaded_data = pickle.load(open(file, "rb"))
            if update_data is None:
                update_data = dict()
                for key, value in loaded_data.items():
                    if len(key)>30: # ingnore node specific data at this point
                        continue
                    update_data[key] = [value]
            else:
                for key, value in loaded_data.items():
                    if len(key)>30: # ingnore node specific data at this point
                        continue
                    update_data[key].append(value)
            # update_data.append(pickle.load(open(file, "rb")))
    return init_data, update_data

def read_node_data(folder_path:str):
    node_folders=[folder for folder in glob.glob(folder_path + "/*/") if not "parameter" in folder]
    iter_data = dict()
    training_data = dict()
    init_data = dict()
    for idx, folder in enumerate(node_folders):
        files = glob.glob(f"{folder}/*.pkl")
        for file in files:
            if "initialization" in file:
                init_data[f"node_{idx}"] = pickle.load(open(file, "rb"))
            elif "iter_" in file:
                loaded_data = pickle.load(open(file, "rb"))
                for key, value in loaded_data.items():
                    if f"{key}_{idx}" in iter_data:
                        iter_data[f"{key}_{idx}"].append(value)        
                    else:
                        iter_data[f"{key}_{idx}"]=[value]
            else:
                loaded_data = pickle.load(open(file, "rb"))
                for key, value in loaded_data.items():
                    if f"{key}_{idx}" in training_data:
                        training_data[f"{key}_{idx}"].append(value)        
                    else:
                        training_data[f"{key}_{idx}"]=[value]
    return init_data, iter_data, training_data

In [11]:
for experiment in experiments:
    experiment_name = experiment.split("/")[-1]
    Path(f"./../results/{experiment_name}/").mkdir(parents=True, exist_ok=True)
    server_init_data, update_data = read_parameter_server_data(experiment)
    if not "NO_FEDERATED_LEARNING" in experiment:
        server_hp_config = (
            pd.DataFrame()
            .from_dict(update_data)
            .sort_values(by="timestamp", inplace=False, ignore_index=True)
        )
        server_hp_config.to_pickle(f"./../results/{experiment_name}/server_hp_config.pkl")
    node_init_data, iter_data, training_data = read_node_data(
        experiment
    )  # node_init_data, iter_data, training_data
    max_len = max([len(val) for val in iter_data.values()])
    for key, value in iter_data.items():
        iter_data[key] = value + [None] * (max_len - len(value))
    node_idxs = sorted(set([val[-1] for val in iter_data.keys()]))
    iter_df = None
    for node_idx in node_idxs:
        rel_cols = [col for col in iter_data.keys() if node_idx in col]
        iter_df_fraction = (
            pd.DataFrame()
            .from_dict(iter_data)[rel_cols]
            .sort_values(by=f"timestamp_{node_idx}", inplace=False, ignore_index=True)
        )
        if iter_df is None:
            iter_df = iter_df_fraction
        else:
            iter_df=iter_df.merge(right=iter_df_fraction,
                left_on=f"iter_counter_0",
                right_on=f"iter_counter_{node_idx}",
                how="inner",
            )
    iter_df.to_pickle(f"./../results/{experiment_name}/iter_df.pkl")
    max_len = max([len(val) for val in training_data.values()])
    for key, value in training_data.items():
        training_data[key] = value + [None] * (max_len - len(value))
    training_df = pd.DataFrame().from_dict(
        training_data
    )  # .sort_values(by="timestamp",inplace=False,ignore_index=True)
    training_df.to_pickle(f"./../results/{experiment_name}/training_df.pkl")

In [30]:
iter_df

Unnamed: 0,curr_score_0,iter_counter_0,DG_rotation_0,timestamp_0,curr_score_1,iter_counter_1,DG_rotation_1,timestamp_1,curr_score_2,iter_counter_2,DG_rotation_2,timestamp_2,curr_score_3,iter_counter_3,DG_rotation_3,timestamp_3
0,,1,0,2024-02-19 19:28:23.908164,,1,0,2024-02-19 19:28:23.795988,,1,1,2024-02-19 19:28:23.167590,,1,1,2024-02-19 19:28:23.751663
1,0.786000,2,0,2024-02-19 19:28:29.114035,0.808000,2,0,2024-02-19 19:28:31.302050,0.809333,2,2,2024-02-19 19:28:32.934835,0.810333,2,1,2024-02-19 19:28:34.089434
2,0.788000,3,0,2024-02-19 19:28:29.781780,0.805667,3,0,2024-02-19 19:28:31.939781,0.806333,3,3,2024-02-19 19:28:33.563379,0.805667,3,1,2024-02-19 19:28:34.714444
3,0.780667,4,1,2024-02-19 19:28:30.426179,0.808333,4,0,2024-02-19 19:28:32.576398,0.792667,4,4,2024-02-19 19:28:34.210928,0.807667,4,2,2024-02-19 19:28:35.343020
4,0.778667,5,1,2024-02-19 19:28:31.066766,0.810333,5,0,2024-02-19 19:28:33.209050,0.775667,5,5,2024-02-19 19:28:34.851264,0.810333,5,2,2024-02-19 19:28:35.982566
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,0.750667,996,420,2024-02-19 19:44:59.566584,0.764333,996,197,2024-02-19 19:42:25.096608,0.818000,996,590,2024-02-19 19:46:51.459008,0.755000,996,280,2024-02-19 19:43:56.213667
996,0.748000,997,420,2024-02-19 19:45:00.206109,0.764000,997,197,2024-02-19 19:42:25.734874,0.818333,997,590,2024-02-19 19:46:52.091703,0.756000,997,280,2024-02-19 19:43:56.853501
997,0.752000,998,420,2024-02-19 19:45:00.844641,0.754667,998,198,2024-02-19 19:42:26.388853,0.820667,998,590,2024-02-19 19:46:52.715677,0.735000,998,281,2024-02-19 19:43:57.499396
998,0.749333,999,420,2024-02-19 19:45:01.494840,0.752333,999,198,2024-02-19 19:42:27.033738,0.801333,999,591,2024-02-19 19:46:53.345936,0.711333,999,282,2024-02-19 19:43:58.132758


In [None]:
iter_data.keys()

dict_keys(['curr_score_0', 'iter_counter_0', 'DG_rotation_0', 'timestamp_0', 'curr_score_1', 'iter_counter_1', 'DG_rotation_1', 'timestamp_1'])

In [None]:
training_df.keys()

Index(['batch_size_mean_0', 'batch_size_std_0', 'learning_rate_mean_0',
       'learning_rate_std_0', 'nesterov_mean_0', 'nesterov_std_0',
       'momentum_mean_0', 'momentum_std_0', 'n_runs_mean_0', 'n_runs_std_0',
       'n_epochs_mean_0', 'n_epochs_std_0', 'n_runs_0', 'n_epochs_0',
       'weights_0', 'pre_training_score_0', 'scores_0', 'best_score_0',
       'best_hyper_params_0', 'best_model_weights_0', 'timestamp_0',
       'batch_size_mean_1', 'batch_size_std_1', 'learning_rate_mean_1',
       'learning_rate_std_1', 'nesterov_mean_1', 'nesterov_std_1',
       'momentum_mean_1', 'momentum_std_1', 'n_runs_mean_1', 'n_runs_std_1',
       'n_epochs_mean_1', 'n_epochs_std_1', 'n_runs_1', 'n_epochs_1',
       'weights_1', 'pre_training_score_1', 'scores_1', 'best_score_1',
       'best_hyper_params_1', 'best_model_weights_1', 'timestamp_1'],
      dtype='object')

In [None]:
pd.DataFrame().from_dict(iter_data)

Unnamed: 0,curr_score_0,iter_counter_0,DG_rotation_0,timestamp_0,curr_score_1,iter_counter_1,DG_rotation_1,timestamp_1
0,0.806000,857,280,2024-02-19 11:56:22.218931,0.796000,857,396,2024-02-19 11:56:43.074605
1,0.764667,809,267,2024-02-19 11:55:39.407542,0.792000,809,377,2024-02-19 11:55:58.617362
2,0.771000,898,288,2024-02-19 11:56:54.511453,0.785667,898,408,2024-02-19 11:57:16.433316
3,0.790333,122,40,2024-02-19 11:44:37.342404,0.796667,122,59,2024-02-19 11:44:51.864959
4,0.762000,7,3,2024-02-19 11:42:48.515523,0.802000,7,3,2024-02-19 11:42:52.352437
...,...,...,...,...,...,...,...,...
995,0.820000,493,160,2024-02-19 11:50:28.675758,0.760333,493,235,2024-02-19 11:51:00.800810
996,0.781333,41,12,2024-02-19 11:43:16.545210,0.801000,41,19,2024-02-19 11:43:25.842570
997,0.749000,597,191,2024-02-19 11:52:04.327286,0.769667,597,282,2024-02-19 11:52:41.167451
998,0.777000,870,282,2024-02-19 11:56:30.283094,0.796333,870,399,2024-02-19 11:56:51.134512


In [None]:
update_data.keys()

dict_keys(['batch_size_mean', 'batch_size_std', 'learning_rate_mean', 'learning_rate_std', 'nesterov_mean', 'nesterov_std', 'momentum_mean', 'momentum_std', 'n_runs_mean', 'n_runs_std', 'n_epochs_mean', 'n_epochs_std', 'weights', 'timestamp'])

In [None]:
server_hp_config=pd.DataFrame().from_dict(update_data).sort_values(by="timestamp",inplace=False,ignore_index=True)
server_hp_config

Unnamed: 0,batch_size_mean,batch_size_std,learning_rate_mean,learning_rate_std,nesterov_mean,nesterov_std,momentum_mean,momentum_std,n_runs_mean,n_runs_std,n_epochs_mean,n_epochs_std,weights,timestamp
0,64,20,-3,1,0.5,0.3,0.5,0.3,10,3,10,3,"[[[0.0011845575645565987, -0.7017977237701416,...",2024-02-19 11:42:44.795036
1,64,20,-3,1,0.5,0.3,0.5,0.3,10,3,10,3,"[[[-0.5834018117748201, -0.5702593624591827, -...",2024-02-19 11:42:48.647225
2,64,20,-3,1,0.5,0.3,0.5,0.3,10,3,10,3,"[[[-0.6387321238095561, -0.6355371077855428, -...",2024-02-19 11:43:04.145033
3,64,20,-3,1,0.5,0.3,0.5,0.3,10,3,10,3,"[[[-0.6783459845464677, -0.6505565047264099, -...",2024-02-19 11:43:05.530829
4,64,20,-3,1,0.5,0.3,0.5,0.3,10,3,10,3,"[[[-0.7145196726545692, -0.6817489743232727, -...",2024-02-19 11:43:22.719387
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98,64,20,-3,1,0.5,0.3,0.5,0.3,10,3,10,3,"[[[-0.5530268054455518, 0.7981683403253556, -0...",2024-02-19 11:58:01.786922
99,64,20,-3,1,0.5,0.3,0.5,0.3,10,3,10,3,"[[[-0.6451109569519758, 0.8650975286960602, -0...",2024-02-19 11:58:17.200350
100,64,20,-3,1,0.5,0.3,0.5,0.3,10,3,10,3,"[[[-0.6155302632600069, 0.9270588397979737, -0...",2024-02-19 11:58:18.472948
101,64,20,-3,1,0.5,0.3,0.5,0.3,10,3,10,3,"[[[-0.5917328629642725, 0.9863682806491851, -0...",2024-02-19 11:58:31.036570


In [None]:
max_len = max([len(val) for val in iter_data.values()])
for key, value in iter_data.items():
    iter_data[key] = value + [None] * (max_len-len(value))
iter_df = pd.DataFrame().from_dict(iter_data)
iter_df

Unnamed: 0,curr_score_0,iter_counter_0,DG_rotation_0,timestamp_0,curr_score_1,iter_counter_1,DG_rotation_1,timestamp_1
0,0.806000,857,280,2024-02-19 11:56:22.218931,0.796000,857,396,2024-02-19 11:56:43.074605
1,0.764667,809,267,2024-02-19 11:55:39.407542,0.792000,809,377,2024-02-19 11:55:58.617362
2,0.771000,898,288,2024-02-19 11:56:54.511453,0.785667,898,408,2024-02-19 11:57:16.433316
3,0.790333,122,40,2024-02-19 11:44:37.342404,0.796667,122,59,2024-02-19 11:44:51.864959
4,0.762000,7,3,2024-02-19 11:42:48.515523,0.802000,7,3,2024-02-19 11:42:52.352437
...,...,...,...,...,...,...,...,...
995,0.820000,493,160,2024-02-19 11:50:28.675758,0.760333,493,235,2024-02-19 11:51:00.800810
996,0.781333,41,12,2024-02-19 11:43:16.545210,0.801000,41,19,2024-02-19 11:43:25.842570
997,0.749000,597,191,2024-02-19 11:52:04.327286,0.769667,597,282,2024-02-19 11:52:41.167451
998,0.777000,870,282,2024-02-19 11:56:30.283094,0.796333,870,399,2024-02-19 11:56:51.134512


In [None]:
max_len = max([len(val) for val in training_data.values()])
for key, value in training_data.items():
    training_data[key] = value + [None] * (max_len-len(value))
training_df = pd.DataFrame().from_dict(training_data)
training_df

Unnamed: 0,batch_size_mean_0,batch_size_std_0,learning_rate_mean_0,learning_rate_std_0,nesterov_mean_0,nesterov_std_0,momentum_mean_0,momentum_std_0,n_runs_mean_0,n_runs_std_0,...,n_epochs_std_1,n_runs_1,n_epochs_1,weights_1,pre_training_score_1,scores_1,best_score_1,best_hyper_params_1,best_model_weights_1,timestamp_1
0,64.0,20.0,-3.0,1.0,0.5,0.3,0.5,0.3,10.0,3.0,...,3,10,8,"[[[-0.4725919276475906, -0.801616233587265, -1...",0.498333,"[0.7983333333333333, 0.7933333333333333, 0.788...",0.798333,"{'learning_rate': 0.0016338410772480778, 'batc...","[[[-0.36300209164619446, -0.05381699651479721,...",2024-02-19 11:50:53.904075
1,64.0,20.0,-3.0,1.0,0.5,0.3,0.5,0.3,10.0,3.0,...,3,5,10,"[[[-0.5917328629642725, 0.9863682806491851, -0...",0.478333,"[0.8183333333333334, 0.8216666666666667, 0.816...",0.825,"{'learning_rate': 0.006194648801565914, 'batch...","[[[-0.7555824518203735, 1.2923808097839355, -0...",2024-02-19 11:58:38.761013
2,64.0,20.0,-3.0,1.0,0.5,0.3,0.5,0.3,10.0,3.0,...,3,9,10,"[[[-0.7683615379833749, -0.7343702571732658, -...",0.661667,"[0.8083333333333333, 0.8016666666666666, 0.808...",0.813333,"{'learning_rate': 3.859894854311302e-05, 'batc...","[[[-0.8070716857910156, -0.8328192234039307, -...",2024-02-19 11:43:59.554360
3,64.0,20.0,-3.0,1.0,0.5,0.3,0.5,0.3,10.0,3.0,...,3,5,10,"[[[-0.4508458971977234, -0.837892934679985, -1...",0.498333,"[0.7933333333333333, 0.7816666666666666, 0.783...",0.798333,"{'learning_rate': 0.0004538157558918391, 'batc...","[[[-0.46794775128364563, -0.33674827218055725,...",2024-02-19 11:50:33.209335
4,64.0,20.0,-3.0,1.0,0.5,0.3,0.5,0.3,10.0,3.0,...,3,7,10,"[[[-0.44055536985397337, -0.9130759924650192, ...",0.651667,"[0.79, 0.7933333333333333, 0.78, 0.78833333333...",0.806667,"{'learning_rate': 0.00010849479109098753, 'bat...","[[[-0.27567970752716064, -0.745586633682251, -...",2024-02-19 11:48:41.799224
5,64.0,20.0,-3.0,1.0,0.5,0.3,0.5,0.3,10.0,3.0,...,3,10,4,"[[[-0.4434230104088783, -0.4850049205124378, -...",0.505,"[0.8183333333333334, 0.8183333333333334, 0.823...",0.833333,"{'learning_rate': 0.0012652362536794512, 'batc...","[[[-0.5072449445724487, -0.13282284140586853, ...",2024-02-19 11:52:17.169894
6,64.0,20.0,-3.0,1.0,0.5,0.3,0.5,0.3,10.0,3.0,...,3,6,10,"[[[-0.4076225906610489, -0.8080056339502335, -...",0.503333,"[0.8116666666666666, 0.815, 0.7933333333333333...",0.815,"{'learning_rate': 0.001953796302579794, 'batch...","[[[-0.4712076783180237, -0.7314751744270325, -...",2024-02-19 11:50:00.874339
7,64.0,20.0,-3.0,1.0,0.5,0.3,0.5,0.3,10.0,3.0,...,3,10,9,"[[[-0.7323968495242298, -0.7604510545730591, -...",0.531667,"[0.8133333333333334, 0.8, 0.8033333333333333, ...",0.813333,"{'learning_rate': 0.002235193193670438, 'batch...","[[[-0.6041584014892578, -0.6093866229057312, -...",2024-02-19 11:44:33.184167
8,64.0,20.0,-3.0,1.0,0.5,0.3,0.5,0.3,10.0,3.0,...,3,10,9,"[[[-0.730083680152893, -0.7720148265361786, -1...",0.523333,"[0.815, 0.8, 0.785, 0.815, 0.78, 0.815, 0.8116...",0.816667,"{'learning_rate': 0.016819491418937314, 'batch...","[[[-0.595647394657135, -1.0963531732559204, -1...",2024-02-19 11:45:11.636202
9,64.0,20.0,-3.0,1.0,0.5,0.3,0.5,0.3,10.0,3.0,...,3,10,10,"[[[-0.5218090683221817, -1.0238750755786896, -...",0.445,"[0.8166666666666667, 0.8016666666666666, 0.805...",0.816667,"{'learning_rate': 0.024774319459408674, 'batch...","[[[-0.5581839680671692, -0.7086818814277649, -...",2024-02-19 11:46:51.882126


In [None]:
training_df["timestamp"]

KeyError: 'timestamp'

In [None]:
iter_data.keys()

dict_keys(['curr_score_0', 'iter_counter_0', 'DG_rotation_0', 'curr_score_1', 'iter_counter_1', 'DG_rotation_1', 'curr_score_2', 'iter_counter_2', 'DG_rotation_2', 'curr_score_3', 'iter_counter_3', 'DG_rotation_3', 'curr_score_4', 'iter_counter_4', 'DG_rotation_4', 'curr_score_5', 'iter_counter_5', 'DG_rotation_5', 'curr_score_6', 'iter_counter_6', 'DG_rotation_6', 'curr_score_7', 'iter_counter_7', 'DG_rotation_7'])