# Research FFD Experiment MinIO Object Parsing

In this notebook we show how to parse metrics, resource and times objects generated during experiments by central and workers. If you are intrested about the data, please check /data/minio path before running this notebook. The necessery packages are:
- NumPy
- Pandas
- Matplotlib
- MinIO

In [8]:
import io
import pickle
import requests
import json

import numpy as np
import pandas as pd

from minio import Minio
from collections import OrderedDict

## Client

In [2]:
minio_client = Minio(
    endpoint = "127.0.0.1:9000", 
    access_key = '23034opsdjhksd', 
    secret_key = 'sdkl3slömdm',
    secure = False
)

## Functions

In [3]:
def create_bucket(
    minio_client: any,
    bucket_name: str
) -> bool:
    MINIO_CLIENT = minio_client 
    try:
        MINIO_CLIENT.make_bucket(
            bucket_name = bucket_name
        )
        return True
    except Exception as e:
        print(e)
        return False
    
def check_bucket(
    minio_client: any,
    bucket_name:str
) -> bool:
    MINIO_CLIENT = minio_client
    try:
        status = MINIO_CLIENT.bucket_exists(bucket_name = bucket_name)
        return status
    except Exception as e:
        print(e)
        return False 
       
def delete_bucket(
    minio_client: any,
    bucket_name:str
) -> bool:
    MINIO_CLIENT = minio_client
    try:
        MINIO_CLIENT.remove_bucket(
            bucket_name = bucket_name
        )
        return True
    except Exception as e:
        print(e)
        return False
# Works
def create_object(
    minio_client: any,
    bucket_name: str, 
    object_path: str, 
    data: any,
    metadata: dict
) -> bool: 
    # Be aware that MinIO objects have a size limit of 1GB, 
    # which might result to large header error
    MINIO_CLIENT = minio_client
    
    pickled_data = pickle.dumps(data)
    length = len(pickled_data)
    buffer = io.BytesIO()
    buffer.write(pickled_data)
    buffer.seek(0)
    try:
        MINIO_CLIENT.put_object(
            bucket_name = bucket_name,
            object_name = object_path + '.pkl',
            data = buffer,
            length = length,
            metadata = metadata
        )
        return True
    except Exception as e:
        print(e)
        return False
# Works
def check_object(
    minio_client: any,
    bucket_name: str, 
    object_path: str
) -> bool: 
    MINIO_CLIENT = minio_client
    try:
        object_info = MINIO_CLIENT.stat_object(
            bucket_name = bucket_name,
            object_name = object_path + '.pkl'
        )      
        return True
    except Exception as e:
        return False 
# Works
def delete_object(
    minio_client: any,
    bucket_name: str, 
    object_path: str
) -> bool: 
    MINIO_CLIENT = minio_client
    try:
        MINIO_CLIENT.remove_object(
            bucket_name = bucket_name, 
            object_name = object_path + '.pkl'
        )
        return True
    except Exception as e:
        print(e)
        return False
# Works
def update_object(
    minio_client: any,
    bucket_name: str, 
    object_path: str, 
    data: any,
    metadata: dict
) -> bool:  
    remove = delete_object(minio_client,bucket_name, object_path)
    if remove:
        create = create_object(minio_client, bucket_name, object_path, data, metadata)
        if create:
            return True
    return False
# works
def create_or_update_object(
    minio_client: any,
    bucket_name: str, 
    object_path: str, 
    data: any, 
    metadata: dict
) -> any:
    bucket_status = check_bucket(minio_client,bucket_name)
    if not bucket_status:
        creation_status = create_bucket(minio_client,bucket_name)
        if not creation_status:
            return None
    object_status = check_object(minio_client,bucket_name, object_path)
    if not object_status:
        return create_object(minio_client,bucket_name, object_path, data, metadata)
    else:
        return update_object(minio_client,bucket_name, object_path, data, metadata)

def get_object_data_and_metadata(
    minio_client: any,
    bucket_name: str, 
    object_path: str
) -> dict:
    MINIO_CLIENT = minio_client
    
    try:
        given_object_info = MINIO_CLIENT.stat_object(
            bucket_name = bucket_name, 
            object_name = object_path + '.pkl'
        )
        # There seems to be some kind of a limit
        # with the amount of request a client 
        # can make, which is why this variable
        # is set here to give more time got the client
        # to complete the request
        given_metadata = given_object_info.metadata
        
        given_object_data = MINIO_CLIENT.get_object(
            bucket_name = bucket_name, 
            object_name = object_path + '.pkl'
        )
        given_pickled_data = given_object_data.data
        
        try:
            given_data = pickle.loads(given_pickled_data)
            relevant_metadata = {} 
            for key, value in given_metadata.items():
                if 'x-amz-meta' in key:
                    key_name = key[11:]
                    relevant_metadata[key_name] = value
            return {'data': given_data, 'metadata': relevant_metadata}
        except Exception as e:
            print('MinIO object pickle decoding error')
            print(e)
            return None 
    except Exception as e:
        print('MinIO object fetching error')
        print(e)
        return None
# Works
def get_object_list(
    minio_client: any,
    bucket_name: str,
    path_prefix: str
) -> dict:
    MINIO_CLIENT = minio_client
    try:
        objects = MINIO_CLIENT.list_objects(bucket_name = bucket_name, prefix = path_prefix, recursive = True)
        object_dict = {}
        for obj in objects:
            object_name = obj.object_name
            object_info = MINIO_CLIENT.stat_object(
                bucket_name = bucket_name,
                object_name = object_name
            )
            given_metadata = {} 
            for key, value in object_info.metadata.items():
                if 'X-Amz-Meta' in key:
                    key_name = key[11:]
                    given_metadata[key_name] = value
            object_dict[obj.object_name] = given_metadata
        return object_dict
    except Exception as e:
        return None  
    
def format_metadata_dict(
    given_metadata: dict
) -> dict:
    # MinIO metadata is first characeter capitalized 
    # and their values are strings due to AMZ format, 
    # which is why the key strings must be made lower
    # and their stirng integers need to be changed to integers 
    fixed_dict = {}
    for key, value in given_metadata.items():
        if value.replace('.','',1).isdigit():
            fixed_dict[key.lower()] = int(value)
        else: 
            fixed_dict[key.lower()] = value
    fixed_dict = decode_metadata_strings_to_lists(fixed_dict)
    return fixed_dict
# Created and works
def encode_metadata_lists_to_strings(
    given_metadata: dict
) -> dict:
    # MinIO metadata only accepts strings and integers, 
    # that have keys without _ characters
    # which is why saving lists in metadata requires
    # making them strings
    modified_dict = {}
    for key,value in given_metadata.items():
        if isinstance(value, list):
            modified_dict[key] = 'list=' + ','.join(map(str, value))
            continue
        modified_dict[key] = value
    return modified_dict 
# Created and works
def decode_metadata_strings_to_lists(
    given_metadata: dict
) -> dict:
    modified_dict = {}
    for key, value in given_metadata.items():
        if isinstance(value, str):
            if 'list=' in value:
                string_integers = value.split('=')[1]
                values = string_integers.split(',')
                if len(values) == 1 and values[0] == '':
                    modified_dict[key] = []
                else:
                    try:
                        modified_dict[key] = list(map(int, values))
                    except:
                        modified_dict[key] = list(map(str, values))
                continue
        modified_dict[key] = value
    return modified_dict

def get_experiments_objects(
    minio_client: any,
    object_bucket: str,
    object_path: str
) -> any:
    object_exists = check_object(
        minio_client = minio_client,
        bucket_name = object_bucket,
        object_path = object_path
    )
    
    object_data = None
    object_metadata = None
    if object_exists:
        fetched_object = get_object_data_and_metadata(
            minio_client = minio_client,
            bucket_name = object_bucket,
            object_path = object_path
        )
        object_data = fetched_object['data']
        object_metadata = format_metadata_dict(fetched_object['metadata'])
    return object_data, object_metadata
# Created and works 
def set_experiments_objects(
    minio_client: any,
    object_bucket: str,
    object_path: str,
    overwrite: bool,
    object_data: any,
    object_metadata: any
):
    object_exists = check_object(
        minio_client = minio_client,
        bucket_name = object_bucket,
        object_path = object_path
    )
    perform = True
    if object_exists and not overwrite:
        perform = False

    if perform:
        create_or_update_object(
            minio_client = minio_client,
            bucket_name = object_bucket,
            object_path = object_path,
            data = object_data,
            metadata = encode_metadata_lists_to_strings(object_metadata)
        )

## Central Parsing

In [43]:
def set_central_objects_and_paths(
    experiments_folder: str,
    experiment_name: str,
    experiment: str
) -> any:
    central_objects = {
        'specifications': {},
        'times': {},
        'task': pd.DataFrame(),
        'function': pd.DataFrame(),
        'network': pd.DataFrame(),
        'training': pd.DataFrame(),
        'inference': pd.DataFrame(),
        'metrics': pd.DataFrame(),
        'system': pd.DataFrame(),
        'server': pd.DataFrame()
    }
    
    object_paths = {
        'specifications': experiments_folder + '/specifications',
        'times': experiments_folder + '/' + str(experiment_name) + '/' + str(experiment) + '/times',
        'task': experiments_folder + '/' + str(experiment_name) + '/' + str(experiment) + '/c/times/task',
        'function': experiments_folder + '/' + str(experiment_name) + '/' + str(experiment) + '/c/times/function',
        'network': experiments_folder + '/' + str(experiment_name) + '/' + str(experiment) + '/c/times/network',
        'training': experiments_folder + '/' + str(experiment_name) + '/' + str(experiment) + '/c/times/training',
        'inference': experiments_folder + '/' + str(experiment_name) + '/' + str(experiment) + '/c/times/inference',
        'metrics': experiments_folder + '/' + str(experiment_name) + '/' + str(experiment) + '/c/metrics',
        'system': experiments_folder + '/' + str(experiment_name) + '/' + str(experiment) + '/c/resources/system',
        'server': experiments_folder + '/' + str(experiment_name) + '/' + str(experiment) + '/c/resources/server'
    }
    
    return central_objects, object_paths

def PyTorch_model_into_data_and_columns(
    parameters: any
) -> any:
    columns = []
    values = []
    for key,value in data.items():
        numpy_format = value.numpy().tolist()
        shape = value.shape

        if 'weight' in key:
            for i in range(1,shape[1]+1):
                columns.append('w-' + str(i))
            for weights in numpy_format:
                values.append(weights)

        if 'bias' in key:
            columns.append('b-1')
            i = 0
            for bias in numpy_format:
                values[i].append(bias)
                i = i + 1
    return values, columns

def format_central_object(
    collected_objects: any,
    bucket: str,
    path: str,
    key: str,
    cycle: str
) -> bool:    
    formatted_data = collected_objects[key]
    data, metadata = get_experiments_objects(
        minio_client = minio_client,
        object_bucket = bucket,
        object_path = path
    )
    
    if isinstance(data, OrderedDict):
        path_split = path.split('/')
        
        if key in path_split[-2]:
            model_values, model_columns = PyTorch_model_into_data_and_columns(
                parameters = data
            )
            model_df = pd.DataFrame(model_values, columns = model_columns)
            model_df['name'] = path_split[-1]
            model_df['cycle'] = int(cycle)
            
            result = pd.concat([formatted_data,model_df])
            result = result.reset_index(drop=True)
            collected_objects[key] = result
        else:
            model_values, model_columns = PyTorch_model_into_data_and_columns(
                parameters = data
            )
            model_df = pd.DataFrame(model_values, columns = model_columns)
            model_df['cycle'] = int(cycle)

            result = pd.concat([formatted_data,model_df])
            result = result.reset_index(drop=True)
            collected_objects[key] = result
        return True
        
    if isinstance(formatted_data, dict):
        if cycle is None:
            collected_objects[key] = data
        else:
            collected_objects[key][cycle] = data
        return True
            
    if isinstance(formatted_data, pd.DataFrame):
        if not metadata is None:
            if 'header' in metadata:
                if cycle is None:
                    object_df = pd.DataFrame(data, columns = metadata['header'])
                    collected_objects[key] = pd.concat([formatted_data,object_df])
                else:
                    path_split = path.split('/')
                    object_df = pd.DataFrame(data, columns = metadata['header'])
                    object_df['name'] = path_split[-1]
                    object_df['cycle'] = int(cycle)
                    result = pd.concat([formatted_data,object_df])
                    result = result.reset_index(drop=True)
                    collected_objects[key] = result
                return True
        if not data is None:
            object_df = pd.DataFrame.from_dict(data,orient='index')
            object_df['cycle'] = int(cycle)
            result = pd.concat([formatted_data,object_df])
            result = result.reset_index(drop=True)
            collected_objects[key] = result
        return True
        
def format_central_experiment_objects(
    minio_client: any,
    experiments_folder: str,
    experiment_name: str,
    experiment: str,
    cycles: int
):
    target_bucket = 'central'
    collected_objects, storage_paths = set_central_objects_and_paths(
        experiments_folder = experiments_folder,
        experiment_name = experiment_name,
        experiment = experiment
    )
    
    max_cycles = cycles + 1
    for name in collected_objects.keys():
        whole_path = storage_paths[name]
        path_split = whole_path.split('/')
        used_key = path_split[-1]
        if 4 < len(path_split):
            if path_split[3] == 'c':
                for cycle in range(1,max_cycles + 1):
                    path_split[3] = str(cycle)
                    cycle_path = '/'.join(path_split)
                    if '|' in path_split[-1]:
                        folder_path = cycle_path.split('|')[0]
                        folder_objects = get_object_list(
                            minio_client = minio_client,
                            bucket_name = target_bucket,
                            path_prefix = folder_path
                        )
                        formatted_key = used_key.split('|')[0]
                        for object_name in folder_objects.keys():
                            object_path = object_name.split('.')[0]
                            format_central_object(
                                collected_objects = collected_objects,
                                bucket = target_bucket,
                                path = object_path,
                                key = formatted_key,
                                cycle = str(cycle)
                            )
                        continue
                    format_central_object(
                        collected_objects = collected_objects,
                        bucket = target_bucket,
                        path = cycle_path,
                        key = used_key,
                        cycle = str(cycle)
                    )
                continue
        format_central_object(
            collected_objects = collected_objects,
            bucket = target_bucket,
            path = whole_path,
            key = used_key,
            cycle = None
        )
    return collected_objects

## Central Experiment 1

In [44]:
formatted_objects = format_central_experiment_objects(
    minio_client = minio_client,
    experiments_folder = 'experiments',
    experiment_name = 'ffd-experiment-1',
    experiment = '2',
    cycles = 5
)

In [15]:
formatted_objects['specifications']

{'activation-date': '2024-04-15-12:26:58.442775',
 'host-kernel-version': '6.5.0-27-generic',
 'host-system-name': 'Linux',
 'host-node-name': '37d0384a5b3b',
 'host-machine': 'x86_64',
 'physical-cpu-amount': 4,
 'total-cpu-amount': 8,
 'min-cpu-frequency-mhz': 400.0,
 'max-cpu-frequency-mhz': 3400.0,
 'total-ram-amount-bytes': 16650469376,
 'available-ram-amount-bytes': 3984777216,
 'total-disk-amount-bytes': 370490241024,
 'available-disk-amount-bytes': 96718618624}

In [23]:
with open('../data/minio/experiment_1/central/specifications.json', 'w') as f:
    json.dump(formatted_objects['specifications'],f, indent=4)

In [16]:
formatted_objects['times']

{'experiment-date': '2024-04-15-14:06:21.844794',
 'experiment-time-start': 1713189981.844839,
 'experiment-time-end': 1713191611.9666667,
 'experiment-total-seconds': 1630.1218276023865,
 '1': {'cycle-time-start': 1713190000.6045034,
  'cycle-time-end': 1713190355.2821698,
  'cycle-total-seconds': 354.67766642570496},
 '2': {'cycle-time-start': 1713190355.2821767,
  'cycle-time-end': 1713190655.056086,
  'cycle-total-seconds': 299.77390933036804},
 '3': {'cycle-time-start': 1713190655.056091,
  'cycle-time-end': 1713190954.9803233,
  'cycle-total-seconds': 299.9242322444916},
 '4': {'cycle-time-start': 1713190954.9803278,
  'cycle-time-end': 1713191306.0382032,
  'cycle-total-seconds': 351.05787539482117},
 '5': {'cycle-time-start': 1713191306.0382087,
  'cycle-time-end': 1713191604.9112403,
  'cycle-total-seconds': 298.87303161621094}}

In [24]:
with open('../data/minio/experiment_1/central/times.json', 'w') as f:
    json.dump(formatted_objects['times'],f, indent=4)

In [17]:
formatted_objects['task']

Unnamed: 0,name,action-time-start,action-time-end,action-total-seconds,cycle
0,check-workers,1.713190e+09,1.713190e+09,0.03677,1
1,server-monitoring,1.713190e+09,1.713190e+09,0.39376,1
2,check-workers,1.713190e+09,1.713190e+09,0.11157,1
3,server-monitoring,1.713190e+09,1.713190e+09,0.54921,1
4,system-monitoring,1.713190e+09,1.713190e+09,0.58195,1
...,...,...,...,...,...
1317,check-workers,1.713194e+09,1.713194e+09,0.09315,6
1318,server-monitoring,1.713194e+09,1.713194e+09,0.22480,6
1319,check-workers,1.713194e+09,1.713194e+09,0.11400,6
1320,check-workers,1.713194e+09,1.713194e+09,0.10118,6


In [20]:
formatted_objects['task'].groupby(['name','cycle'])['action-total-seconds'].mean()

name                  cycle
aggregation-pipeline  1         1.951418
                      2         1.180195
                      3         1.333520
                      4         1.322665
                      5         1.285965
                      6         0.552683
check-workers         1         0.266580
                      2         0.263307
                      3         0.206628
                      4         0.177215
                      5         0.197442
                      6         0.140348
data-pipeline         1         1.853719
                      2         0.451693
                      3         0.728547
                      4         0.479218
                      5         0.524537
                      6         0.332079
model-pipeline        1        15.347420
                      2         0.167510
                      3         0.165030
                      4         0.174783
                      5         0.249933
                      6      

In [21]:
formatted_objects['task'].groupby(['name'])['action-total-seconds'].mean()

name
aggregation-pipeline    1.135968
check-workers           0.196276
data-pipeline           0.653544
model-pipeline          2.342926
server-monitoring       0.477170
system-monitoring       0.684200
update-pipeline         3.318979
Name: action-total-seconds, dtype: float64

In [19]:
formatted_objects['task'].to_csv('../data/minio/experiment_1/central/task.csv', index = False)

In [25]:
formatted_objects['function']

Unnamed: 0,name,action-time-start,action-time-end,action-total-seconds,cycle
0,store-worker-a53e6dc2-a4dd-4514-b9f8-0f337fbc6816,1.713190e+09,1.713190e+09,0.06407,1
1,store-worker-d23a4790-2964-4b15-99ec-eee56d5c60bd,1.713190e+09,1.713190e+09,0.10915,1
2,start-pipeline,1.713190e+09,1.713190e+09,1.41430,1
3,store-worker-af5b7dae-cb38-4c82-994d-4e8b8c198a27,1.713190e+09,1.713190e+09,0.25267,1
4,store-worker-0f6c6657-b591-4523-935d-1abd09835f5e,1.713190e+09,1.713190e+09,0.29617,1
...,...,...,...,...,...
906,store-worker-a53e6dc2-a4dd-4514-b9f8-0f337fbc6816,1.713194e+09,1.713194e+09,0.19893,6
907,store-worker-af5b7dae-cb38-4c82-994d-4e8b8c198a27,1.713194e+09,1.713194e+09,0.12218,6
908,store-worker-0f6c6657-b591-4523-935d-1abd09835f5e,1.713194e+09,1.713194e+09,0.25394,6
909,store-worker-a53e6dc2-a4dd-4514-b9f8-0f337fbc6816,1.713194e+09,1.713194e+09,0.23103,6


In [26]:
formatted_objects['function'].groupby(['name','cycle'])['action-total-seconds'].mean()

name                                               cycle
central-worker-data-split                          1         8.865320
evaluate-global-model                              1         3.779780
                                                   2         3.479230
                                                   3         3.464730
                                                   4         4.502110
                                                   5         3.399060
initial-model-training                             1        75.654920
preprocess-into-train-test-and-evalute-tensors     1         3.706740
send-context-to-workers                            2        17.545030
                                                   3        16.554680
                                                   4        20.666700
                                                   5        21.112900
                                                   6         1.094900
split-data-between-workers       

In [27]:
formatted_objects['function'].groupby(['name'])['action-total-seconds'].mean()

name
central-worker-data-split                             8.865320
evaluate-global-model                                 3.724982
initial-model-training                               75.654920
preprocess-into-train-test-and-evalute-tensors        3.706740
send-context-to-workers                              15.394842
split-data-between-workers                            8.176509
start-pipeline                                        1.414300
store-worker-0f6c6657-b591-4523-935d-1abd09835f5e     0.359296
store-worker-a53e6dc2-a4dd-4514-b9f8-0f337fbc6816     0.227455
store-worker-af5b7dae-cb38-4c82-994d-4e8b8c198a27     0.331105
store-worker-d23a4790-2964-4b15-99ec-eee56d5c60bd     0.361951
update-from-worker-<built-in function id>             0.852176
update-global-model                                   0.786930
Name: action-total-seconds, dtype: float64

In [28]:
formatted_objects['function'].to_csv('../data/minio/experiment_1/central/function.csv', index = False)

In [29]:
formatted_objects['network']

Unnamed: 0,name,status-code,payload-size-bytes,processing-time-seconds,elapsed-time-seconds,action-time-start,action-time-end,action-total-seconds,cycle
0,sending-context-to-worker-0f6c6657-b591-4523-9...,200,11748116,5.05063,2.437452,1713190000.0,1713190000.0,5.63221,1
1,sending-context-to-worker-af5b7dae-cb38-4c82-9...,200,11748209,4.271789,2.544996,1713190000.0,1713190000.0,9.93553,1
2,sending-context-to-worker-a53e6dc2-a4dd-4514-b...,200,11748875,4.739095,2.496518,1713190000.0,1713190000.0,15.16549,1
3,sending-context-to-worker-d23a4790-2964-4b15-9...,200,11748556,4.029041,2.295377,1713190000.0,1713190000.0,19.326,1
4,sending-context-to-worker-a53e6dc2-a4dd-4514-b...,200,11748996,4.662378,2.346255,1713190000.0,1713190000.0,5.35222,2
5,sending-context-to-worker-d23a4790-2964-4b15-9...,200,11748651,4.613393,2.318626,1713190000.0,1713190000.0,9.99441,2
6,sending-context-to-worker-0f6c6657-b591-4523-9...,200,11749519,3.625725,1.739627,1713190000.0,1713190000.0,13.83835,2
7,sending-context-to-worker-af5b7dae-cb38-4c82-9...,200,11748536,3.596836,1.801796,1713190000.0,1713190000.0,17.48147,2
8,sending-context-to-worker-a53e6dc2-a4dd-4514-b...,200,11748243,3.576961,1.955725,1713191000.0,1713191000.0,4.09109,3
9,sending-context-to-worker-d23a4790-2964-4b15-9...,200,11747185,3.82827,1.71091,1713191000.0,1713191000.0,7.9572,3


In [30]:
formatted_objects['network'].groupby(['name'])['action-total-seconds'].mean()

name
sending-context-to-worker-0f6c6657-b591-4523-935d-1abd09835f5e    12.775618
sending-context-to-worker-a53e6dc2-a4dd-4514-b9f8-0f337fbc6816     6.272382
sending-context-to-worker-af5b7dae-cb38-4c82-994d-4e8b8c198a27    12.823630
sending-context-to-worker-d23a4790-2964-4b15-99ec-eee56d5c60bd    13.895892
Name: action-total-seconds, dtype: float64

In [31]:
formatted_objects['network'].groupby(['name'])['processing-time-seconds'].mean()

name
sending-context-to-worker-0f6c6657-b591-4523-935d-1abd09835f5e    4.456167
sending-context-to-worker-a53e6dc2-a4dd-4514-b9f8-0f337fbc6816    3.905354
sending-context-to-worker-af5b7dae-cb38-4c82-994d-4e8b8c198a27    3.731573
sending-context-to-worker-d23a4790-2964-4b15-99ec-eee56d5c60bd    4.373741
Name: processing-time-seconds, dtype: float64

In [32]:
formatted_objects['network'].groupby(['name'])['elapsed-time-seconds'].mean()

name
sending-context-to-worker-0f6c6657-b591-4523-935d-1abd09835f5e    2.304123
sending-context-to-worker-a53e6dc2-a4dd-4514-b9f8-0f337fbc6816    1.972518
sending-context-to-worker-af5b7dae-cb38-4c82-994d-4e8b8c198a27    1.939231
sending-context-to-worker-d23a4790-2964-4b15-99ec-eee56d5c60bd    2.240845
Name: elapsed-time-seconds, dtype: float64

In [33]:
formatted_objects['network'].to_csv('../data/minio/experiment_1/central/network.csv', index = False)

In [34]:
formatted_objects['training']

Unnamed: 0,name,epochs,batches,average-batch-size,action-time-start,action-time-end,action-total-seconds,cycle
0,logistic-regression-training,10,5,360000.0,1713190000.0,1713190000.0,62.51575,1
1,logistic-regression-testing,0,782,63.938619,1713190000.0,1713190000.0,2.05058,1
2,logistic-regression-evaluation,0,782,63.938619,1713190000.0,1713190000.0,3.26171,1
3,logistic-regression-evaluation,0,782,63.938619,1713190000.0,1713190000.0,2.53685,1
4,logistic-regression-evaluation,0,782,63.938619,1713191000.0,1713191000.0,2.40603,2
5,logistic-regression-evaluation,0,782,63.938619,1713191000.0,1713191000.0,2.33583,3
6,logistic-regression-evaluation,0,782,63.938619,1713191000.0,1713191000.0,3.09273,4
7,logistic-regression-evaluation,0,782,63.938619,1713192000.0,1713192000.0,2.3886,5


In [35]:
formatted_objects['training'].groupby(['name'])['action-total-seconds'].mean()

name
logistic-regression-evaluation     2.670292
logistic-regression-testing        2.050580
logistic-regression-training      62.515750
Name: action-total-seconds, dtype: float64

In [36]:
formatted_objects['training'].to_csv('../data/minio/experiment_1/central/training.csv', index = False)

In [37]:
formatted_objects['metrics']

Unnamed: 0,name,true-positives,false-positives,true-negatives,false-negatives,recall,selectivity,precision,miss-rate,fall-out,balanced-accuracy,accuracy,train-amount,test-amount,eval-amount,cycle
0,logistic-regression-testing,1908,542,39342,8208,0.18861,0.98641,0.77878,0.81139,0.01359,0.58751,0.825,180000,50000,0,1
1,logistic-regression-evaluation,1908,542,39342,8208,0.18861,0.98641,0.77878,0.81139,0.01359,0.58751,0.825,180000,0,50000,1
2,logistic-regression-evaluation,2329,861,39023,7787,0.23023,0.97841,0.73009,0.76977,0.02159,0.60432,0.82704,720000,0,50000,1
3,logistic-regression-evaluation,2414,1194,38690,7702,0.23863,0.97006,0.66907,0.76137,0.02994,0.60435,0.82208,720000,0,50000,2
4,logistic-regression-evaluation,2491,1467,38417,7625,0.24624,0.96322,0.62936,0.75376,0.03678,0.60473,0.81816,720000,0,50000,3
5,logistic-regression-evaluation,2477,1705,38179,7639,0.24486,0.95725,0.5923,0.75514,0.04275,0.60106,0.81312,720000,0,50000,4
6,logistic-regression-evaluation,2566,1892,37992,7550,0.25366,0.95256,0.57559,0.74634,0.04744,0.60311,0.81116,720000,0,50000,5


In [38]:
formatted_objects['metrics'].to_csv('../data/minio/experiment_1/central/metrics.csv', index = False)

In [39]:
formatted_objects['system']

Unnamed: 0,name,date,time,cpu-percent,ram-percent,ram-total-bytes,ram-free-bytes,ram-used-bytes,disk-total-bytes,disk-free-bytes,disk-used-bytes,network-sent-bytes,network-received-bytes,network-packets-sent,network-packets-received,network-packets-sending-errors,network-packets-reciving-errors,network-packets-outgoing-dropped,network-packets-incoming-dropped,cycle
0,system,2024-04-15-14:06:30.602990,1.713190e+09,48.0,62.7,16650469376,2561605632,9162817536,370490241024,96270147584,255325175808,21095016,29534835,4776,4895,0,0,0,0,1
1,system,2024-04-15-14:06:40.604878,1.713190e+09,32.5,62.5,16650469376,2589261824,9133789184,370490241024,96269828096,255325495296,21237730,29663996,5015,5132,0,0,0,0,1
2,system,2024-04-15-14:06:50.790951,1.713190e+09,30.4,63.6,16650469376,2386833408,9320415232,370490241024,96253562880,255341760512,37371208,54722395,6197,6224,0,0,0,0,1
3,system,2024-04-15-14:07:00.603911,1.713190e+09,34.2,63.9,16650469376,2325241856,9390174208,370490241024,96246177792,255349145600,44635189,54956188,6732,6617,0,0,0,0,1
4,system,2024-04-15-14:07:10.603412,1.713190e+09,40.3,63.6,16650469376,2374590464,9340051456,370490241024,96245837824,255349485568,44806396,55113900,6980,6856,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
230,system,2024-04-15-15:13:06.357354,1.713194e+09,6.2,70.6,16650469376,1170808832,10161635328,370490241024,95846146048,255749177344,12000222,11872770,5537,5602,0,0,0,0,6
231,system,2024-04-15-15:13:16.354433,1.713194e+09,5.1,70.5,16650469376,1186160640,10159185920,370490241024,95844347904,255750975488,12837288,12702571,5872,5937,0,0,0,0,6
232,system,2024-04-15-15:13:26.354465,1.713194e+09,32.5,70.6,16650469376,1170624512,10163740672,370490241024,95842533376,255752790016,13565607,13422161,6169,6234,0,0,0,0,6
233,system,2024-04-15-15:13:36.354990,1.713194e+09,21.0,70.7,16650469376,1153392640,10170904576,370490241024,95840550912,255754772480,14424206,14273885,6509,6573,0,0,0,0,6


In [40]:
formatted_objects['system'].to_csv('../data/minio/experiment_1/central/system.csv', index = False)

In [41]:
formatted_objects['server']

Unnamed: 0,name,date,time,cpu-percent,ram-resident-set-size-bytes,ram-virtual-memory-size-bytes,ram-shared-memory-bytes,ram-code-segment-size-bytes,ram-data-segment-size-bytes,ram-library-size-bytes,ram-dirty-pages-bytes,ram-unique-set-size-bytes,disk-read-bytes,disk-write-bytes,disk-read-characters-bytes,disk-write-characters-bytes,cycle
0,server,2024-04-15-14:06:25.723474,1.713190e+09,29.8,597893120,2834661376,127664128,4096,1238863872,0,0,586170368,28508160,32768,55304560,19962,1
1,server,2024-04-15-14:06:30.727832,1.713190e+09,59.0,350715904,2640142336,127664128,4096,1044348928,0,0,338944000,28508160,36864,55326230,20560,1
2,server,2024-04-15-14:06:35.718348,1.713190e+09,39.9,350715904,2640109568,127664128,4096,1044320256,0,0,338939904,28508160,36864,55328019,20697,1
3,server,2024-04-15-14:06:40.731871,1.713190e+09,57.0,350846976,2640158720,127664128,4096,1044369408,0,0,338997248,28508160,40960,55345925,21052,1
4,server,2024-04-15-14:06:52.234281,1.713190e+09,578.0,530112512,2776064000,129236992,4096,1246453760,0,0,514224128,28532736,45056,55363835,21983,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
461,server,2024-04-15-15:13:31.477197,1.713194e+09,29.4,348839936,2437885952,126746624,4096,1027674112,0,0,336596992,64364544,53248,55430755,22496,6
462,server,2024-04-15-15:13:36.480191,1.713194e+09,49.6,348839936,2437885952,126746624,4096,1027682304,0,0,336605184,64364544,53248,55448670,22936,6
463,server,2024-04-15-15:13:41.476445,1.713194e+09,19.4,348839936,2429476864,126746624,4096,1019277312,0,0,336580608,64364544,53248,55450438,23004,6
464,server,2024-04-15-15:13:46.478526,1.713194e+09,39.6,348839936,2429476864,126746624,4096,1019277312,0,0,336580608,64364544,53248,55468353,23279,6


In [42]:
formatted_objects['server'].to_csv('../data/minio/experiment_1/central/server.csv', index = False)

## Central experiment 2

In [45]:
formatted_objects = format_central_experiment_objects(
    minio_client = minio_client,
    experiments_folder = 'experiments',
    experiment_name = 'ffd-experiment-2',
    experiment = '3',
    cycles = 5
)

In [47]:
with open('../data/minio/experiment_2/central/specifications.json', 'w') as f:
    json.dump(formatted_objects['specifications'],f, indent=4)

In [49]:
formatted_objects['times']

{'experiment-date': '2024-04-15-15:13:58.379929',
 'experiment-time-start': 1713194038.3799868,
 'experiment-time-end': 1713196247.578954,
 'experiment-total-seconds': 2209.198967218399,
 '1': {'cycle-time-start': 1713194046.3542984,
  'cycle-time-end': 1713194562.9667897,
  'cycle-total-seconds': 516.6124913692474},
 '2': {'cycle-time-start': 1713194562.966797,
  'cycle-time-end': 1713194965.837049,
  'cycle-total-seconds': 402.8702518939972},
 '3': {'cycle-time-start': 1713194965.837056,
  'cycle-time-end': 1713195362.6009166,
  'cycle-total-seconds': 396.76386070251465},
 '4': {'cycle-time-start': 1713195362.600921,
  'cycle-time-end': 1713195813.9810982,
  'cycle-total-seconds': 451.3801772594452},
 '5': {'cycle-time-start': 1713195813.981102,
  'cycle-time-end': 1713196213.8650253,
  'cycle-total-seconds': 399.88392329216003}}

In [48]:
with open('../data/minio/experiment_2/central/times.json', 'w') as f:
    json.dump(formatted_objects['times'],f, indent=4)

In [54]:
metrics_keys = [
    'task',
    'function',
    'network', 
    'training', 
    'metrics', 
    'system', 
    'server'
]


for metric in metrics_keys:
    path = '../data/minio/experiment_2/central/replace.csv'
    used_path = path.replace('replace',metric)
    formatted_objects[metric].to_csv(used_path, index = False)

## Central experiment 3

In [55]:
formatted_objects = format_central_experiment_objects(
    minio_client = minio_client,
    experiments_folder = 'experiments',
    experiment_name = 'ffd-experiment-3',
    experiment = '4',
    cycles = 5
)

In [56]:
with open('../data/minio/experiment_3/central/specifications.json', 'w') as f:
    json.dump(formatted_objects['specifications'],f, indent=4)

In [57]:
formatted_objects['times']

{'experiment-date': '2024-04-15-16:23:09.288677',
 'experiment-time-start': 1713198189.288723,
 'experiment-time-end': 1713199528.6423333,
 'experiment-total-seconds': 1339.353610277176,
 '1': {'cycle-time-start': 1713198207.5864313,
  'cycle-time-end': 1713198572.9048412,
  'cycle-total-seconds': 365.31840991973877},
 '2': {'cycle-time-start': 1713198572.9048464,
  'cycle-time-end': 1713198771.7802806,
  'cycle-total-seconds': 198.87543416023254},
 '3': {'cycle-time-start': 1713198771.7802846,
  'cycle-time-end': 1713199021.5684373,
  'cycle-total-seconds': 249.78815269470215},
 '4': {'cycle-time-start': 1713199021.5684412,
  'cycle-time-end': 1713199321.8666508,
  'cycle-total-seconds': 300.2982096672058},
 '5': {'cycle-time-start': 1713199321.8666542,
  'cycle-time-end': 1713199522.2935383,
  'cycle-total-seconds': 200.42688417434692}}

In [58]:
with open('../data/minio/experiment_3/central/times.json', 'w') as f:
    json.dump(formatted_objects['times'],f, indent=4)

In [59]:
metrics_keys = [
    'task',
    'function',
    'network', 
    'training', 
    'metrics', 
    'system', 
    'server'
]


for metric in metrics_keys:
    path = '../data/minio/experiment_3/central/replace.csv'
    used_path = path.replace('replace',metric)
    formatted_objects[metric].to_csv(used_path, index = False)

## Central experiment 4

In [60]:
formatted_objects = format_central_experiment_objects(
    minio_client = minio_client,
    experiments_folder = 'experiments',
    experiment_name = 'ffd-experiment-4',
    experiment = '5',
    cycles = 5
)

In [61]:
with open('../data/minio/experiment_4/central/specifications.json', 'w') as f:
    json.dump(formatted_objects['specifications'],f, indent=4)

In [62]:
formatted_objects['times']

{'experiment-date': '2024-04-15-17:09:54.402082',
 'experiment-time-start': 1713200994.4021316,
 'experiment-time-end': 1713202872.7543774,
 'experiment-total-seconds': 1878.3522458076477,
 '1': {'cycle-time-start': 1713201021.8382921,
  'cycle-time-end': 1713201457.9604793,
  'cycle-total-seconds': 436.12218713760376},
 '2': {'cycle-time-start': 1713201457.9604857,
  'cycle-time-end': 1713201808.4665098,
  'cycle-total-seconds': 350.50602412223816},
 '3': {'cycle-time-start': 1713201808.4665136,
  'cycle-time-end': 1713202158.1005187,
  'cycle-total-seconds': 349.63400506973267},
 '4': {'cycle-time-start': 1713202158.1005223,
  'cycle-time-end': 1713202507.8137615,
  'cycle-total-seconds': 349.71323919296265},
 '5': {'cycle-time-start': 1713202507.8137648,
  'cycle-time-end': 1713202857.870541,
  'cycle-total-seconds': 350.0567762851715}}

In [63]:
with open('../data/minio/experiment_4/central/times.json', 'w') as f:
    json.dump(formatted_objects['times'],f, indent=4)

In [64]:
metrics_keys = [
    'task',
    'function',
    'network', 
    'training', 
    'metrics', 
    'system', 
    'server'
]


for metric in metrics_keys:
    path = '../data/minio/experiment_4/central/replace.csv'
    used_path = path.replace('replace',metric)
    formatted_objects[metric].to_csv(used_path, index = False)

## Workers Parsing

In [95]:
def set_workers_objects_and_paths(
    experiments_folder: str,
    experiment_name: str,
    experiment: str
) -> any:
    workers_objects = {
        'times': {},
        'task': pd.DataFrame(),
        'function': pd.DataFrame(),
        'network': pd.DataFrame(),
        'training': pd.DataFrame(),
        'metrics': pd.DataFrame(),
        'server': pd.DataFrame()
    }
    
    return workers_objects

def format_worker_object(
    collected_objects: any,
    bucket: str,
    path: str,
    worker: str,
    key: str,
    cycle: str
) -> bool:    
    formatted_data = collected_objects[key]
    data, metadata = get_experiments_objects(
        minio_client = minio_client,
        object_bucket = bucket,
        object_path = path
    )
    
    if data is None:
        return False
    
    if isinstance(formatted_data, dict):
        collected_objects[key][worker] = data
        return True
         
    if isinstance(formatted_data, pd.DataFrame):
        if not cycle is None:
            object_df = pd.DataFrame.from_dict(data,orient='index')
            object_df['worker'] = worker
            object_df['cycle'] = int(cycle)
            result = pd.concat([formatted_data,object_df])
            result = result.reset_index(drop=True)
            collected_objects[key] = result
            
    return True

def format_workers_experiment_objects(
    minio_client: any,
    experiments_folder: str,
    experiment_name: str,
    experiment: str
):
    target_bucket = 'workers'
    collected_objects = set_workers_objects_and_paths(
        experiments_folder = experiments_folder,
        experiment_name = experiment_name,
        experiment = experiment
    )
    
    worker_objects = get_object_list(
        minio_client = minio_client,
        bucket_name = target_bucket,
        path_prefix = ''
    )
    
    relevant_paths = []
    for object_path in worker_objects:
        formatted_path = object_path.split('.')[0]
        formatted_path_split = formatted_path.split('/')
        
        if len(formatted_path_split) < 4:
            continue
        
        path_key = formatted_path_split[-1]
        path_experiment_name = formatted_path_split[2] 
        path_experiment = formatted_path_split[3]
        
        if not path_key in collected_objects:
            continue
        
        if not path_experiment_name == experiment_name:
            continue
        
        if not path_experiment == experiment:
            continue
             
        relevant_paths.append(formatted_path)
    
    for path in relevant_paths:
        formatted_path_split = path.split('/')
        path_worker = formatted_path_split[0]
        path_key = formatted_path_split[-1]
        
        path_cycle = str(formatted_path_split[4])
        format_worker_object(
            collected_objects = collected_objects,
            bucket = target_bucket,
            path = path,
            worker = path_worker,
            key = path_key,
            cycle = path_cycle
        )
            
    return collected_objects

## Workers experiment 1 

In [96]:
formatted_worker_objects = format_workers_experiment_objects(
    minio_client = minio_client,
    experiments_folder = 'experiments',
    experiment_name = 'ffd-experiment-1',
    experiment = '2'
)

In [97]:
formatted_worker_objects['times']

{'0f6c6657-b591-4523-935d-1abd09835f5e': {'experiment-name': 'ffd-experiment-1',
  'experiment': '2',
  'cycle': '1',
  'experiment-date': '2024-04-15-14:08:55.807253',
  'experiment-time-start': 1713190135.8073025,
  'experiment-time-end': 0,
  'experiment-total-seconds': 0,
  '1': {'cycle-time-start': 1713190150.256231,
   'cycle-time-end': 1713190331.7233646,
   'cycle-total-seconds': 181.4671335220337},
  '2': {'cycle-time-start': 1713190480.2549942,
   'cycle-time-end': 1713190611.5849276,
   'cycle-total-seconds': 131.32993340492249},
  '3': {'cycle-time-start': 1713190750.2567058,
   'cycle-time-end': 1713190931.5365317,
   'cycle-total-seconds': 181.27982592582703},
  '4': {'cycle-time-start': 1713191080.2567997,
   'cycle-time-end': 1713191251.0091941,
   'cycle-total-seconds': 170.75239443778992},
  '5': {'cycle-time-start': 1713191440.2557259,
   'cycle-time-end': 1713191572.2852483,
   'cycle-total-seconds': 132.02952241897583}},
 'a53e6dc2-a4dd-4514-b9f8-0f337fbc6816': {'e

In [98]:
formatted_worker_objects['task']

Unnamed: 0,name,action-time-start,action-time-end,action-total-seconds,worker,cycle
0,server-monitoring,1.713190e+09,1.713190e+09,0.67821,0f6c6657-b591-4523-935d-1abd09835f5e,1
1,server-monitoring,1.713190e+09,1.713190e+09,0.20623,0f6c6657-b591-4523-935d-1abd09835f5e,1
2,status-pipeline,1.713190e+09,1.713190e+09,0.59417,0f6c6657-b591-4523-935d-1abd09835f5e,1
3,server-monitoring,1.713190e+09,1.713190e+09,0.24081,0f6c6657-b591-4523-935d-1abd09835f5e,1
4,model-pipeline,1.713190e+09,1.713190e+09,0.06034,0f6c6657-b591-4523-935d-1abd09835f5e,1
...,...,...,...,...,...,...
3721,server-monitoring,1.713194e+09,1.713194e+09,0.23439,d23a4790-2964-4b15-99ec-eee56d5c60bd,6
3722,server-monitoring,1.713194e+09,1.713194e+09,0.20829,d23a4790-2964-4b15-99ec-eee56d5c60bd,6
3723,status-pipeline,1.713194e+09,1.713194e+09,0.67785,d23a4790-2964-4b15-99ec-eee56d5c60bd,6
3724,server-monitoring,1.713194e+09,1.713194e+09,0.20477,d23a4790-2964-4b15-99ec-eee56d5c60bd,6


In [99]:
formatted_worker_objects['server']

Unnamed: 0,name,date,time,cpu-percent,ram-resident-set-size-bytes,ram-virtual-memory-size-bytes,ram-shared-memory-bytes,ram-code-segment-size-bytes,ram-data-segment-size-bytes,ram-library-size-bytes,ram-dirty-pages-bytes,ram-unique-set-size-bytes,disk-read-bytes,disk-write-bytes,disk-read-characters-bytes,disk-write-characters-bytes,worker,cycle
0,server,2024-04-15-14:08:55.911701,1.713190e+09,59.9,454635520,2156032000,128843776,4096,1031925760,0,0,327450624,26890240,73728,55108424,17203,0f6c6657-b591-4523-935d-1abd09835f5e,1
1,server,2024-04-15-14:09:00.380138,1.713190e+09,19.9,473772032,2382520320,130285568,4096,1076826112,0,0,346464256,26894336,73728,55113084,17413,0f6c6657-b591-4523-935d-1abd09835f5e,1
2,server,2024-04-15-14:09:05.368722,1.713190e+09,0.0,380145664,2287714304,130285568,4096,982020096,0,0,251146240,26894336,73728,55114852,17489,0f6c6657-b591-4523-935d-1abd09835f5e,1
3,server,2024-04-15-14:09:10.385634,1.713190e+09,78.2,380145664,2287714304,130285568,4096,982020096,0,0,251146240,26894336,73728,55116620,17703,0f6c6657-b591-4523-935d-1abd09835f5e,1
4,server,2024-04-15-14:09:15.383467,1.713190e+09,0.0,542568448,2417737728,133824512,4096,1133404160,0,0,410144768,26894336,77824,55130536,17779,0f6c6657-b591-4523-935d-1abd09835f5e,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016,server,2024-04-15-15:17:34.501433,1.713194e+09,49.2,350347264,2135822336,130154496,4096,953593856,0,0,221216768,6438912,102400,55153283,20473,d23a4790-2964-4b15-99ec-eee56d5c60bd,6
2017,server,2024-04-15-15:17:39.496526,1.713194e+09,0.0,350347264,2135822336,130154496,4096,953597952,0,0,221302784,6438912,102400,55155063,20549,d23a4790-2964-4b15-99ec-eee56d5c60bd,6
2018,server,2024-04-15-15:17:44.505092,1.713194e+09,10.0,350347264,2135822336,130154496,4096,953597952,0,0,221306880,6438912,102400,55156843,20617,d23a4790-2964-4b15-99ec-eee56d5c60bd,6
2019,server,2024-04-15-15:17:49.493382,1.713194e+09,0.0,350478336,2135822336,130154496,4096,953597952,0,0,221401088,6438912,106496,55158623,20693,d23a4790-2964-4b15-99ec-eee56d5c60bd,6


In [100]:
with open('../data/minio/experiment_1/workers/times.json', 'w') as f:
    json.dump(formatted_worker_objects['times'],f, indent=4)

In [101]:
metrics_keys = [
    'task',
    'function',
    'network', 
    'training', 
    'metrics',  
    'server'
]


for metric in metrics_keys:
    path = '../data/minio/experiment_1/workers/replace.csv'
    used_path = path.replace('replace',metric)
    formatted_worker_objects[metric].to_csv(used_path, index = False)

## Workers experiment 2

In [102]:
formatted_worker_objects = format_workers_experiment_objects(
    minio_client = minio_client,
    experiments_folder = 'experiments',
    experiment_name = 'ffd-experiment-2',
    experiment = '3'
)

In [103]:
formatted_worker_objects['times']

{'0f6c6657-b591-4523-935d-1abd09835f5e': {'experiment-name': 'ffd-experiment-2',
  'experiment': '3',
  'cycle': '1',
  'experiment-date': '2024-04-15-15:17:52.425138',
  'experiment-time-start': 1713194272.4251738,
  'experiment-time-end': 1713196246.8989012,
  'experiment-total-seconds': 1974.473727464676,
  '1': {'cycle-time-start': 1713194284.4233296,
   'cycle-time-end': 1713194525.8391745,
   'cycle-total-seconds': 241.41584491729736},
  '2': {'cycle-time-start': 1713194704.4238102,
   'cycle-time-end': 1713194925.137289,
   'cycle-total-seconds': 220.71347880363464},
  '3': {'cycle-time-start': 1713195064.4240348,
   'cycle-time-end': 1713195325.3580098,
   'cycle-total-seconds': 260.933974981308},
  '4': {'cycle-time-start': 1713195514.4252613,
   'cycle-time-end': 1713195725.6225004,
   'cycle-total-seconds': 211.19723916053772},
  '5': {'cycle-time-start': 1713195904.4249315,
   'cycle-time-end': 1713196165.674195,
   'cycle-total-seconds': 261.24926352500916}},
 'a53e6dc2-a4

In [104]:
with open('../data/minio/experiment_2/workers/times.json', 'w') as f:
    json.dump(formatted_worker_objects['times'],f, indent=4)

In [106]:
formatted_worker_objects['task']

Unnamed: 0,name,action-time-start,action-time-end,action-total-seconds,worker,cycle
0,server-monitoring,1.713194e+09,1.713194e+09,0.34245,0f6c6657-b591-4523-935d-1abd09835f5e,1
1,status-pipeline,1.713194e+09,1.713194e+09,4.13592,0f6c6657-b591-4523-935d-1abd09835f5e,1
2,server-monitoring,1.713194e+09,1.713194e+09,0.18902,0f6c6657-b591-4523-935d-1abd09835f5e,1
3,update-pipeline,1.713194e+09,1.713194e+09,0.07836,0f6c6657-b591-4523-935d-1abd09835f5e,1
4,model-pipeline,1.713194e+09,1.713194e+09,0.05664,0f6c6657-b591-4523-935d-1abd09835f5e,1
...,...,...,...,...,...,...
4065,server-monitoring,1.713198e+09,1.713198e+09,0.23816,e773520a-073e-4b2e-8f27-15275cfd8d4e,6
4066,server-monitoring,1.713198e+09,1.713198e+09,0.23841,e773520a-073e-4b2e-8f27-15275cfd8d4e,6
4067,status-pipeline,1.713198e+09,1.713198e+09,2.22327,e773520a-073e-4b2e-8f27-15275cfd8d4e,6
4068,server-monitoring,1.713198e+09,1.713198e+09,0.19118,e773520a-073e-4b2e-8f27-15275cfd8d4e,6


In [105]:
metrics_keys = [
    'task',
    'function',
    'network', 
    'training', 
    'metrics',  
    'server'
]


for metric in metrics_keys:
    path = '../data/minio/experiment_2/workers/replace.csv'
    used_path = path.replace('replace',metric)
    formatted_worker_objects[metric].to_csv(used_path, index = False)

In [107]:
formatted_worker_objects = format_workers_experiment_objects(
    minio_client = minio_client,
    experiments_folder = 'experiments',
    experiment_name = 'ffd-experiment-3',
    experiment = '4'
)

In [108]:
formatted_worker_objects['times']

{'66e8acb0-574e-4392-a8ab-cdcb9fca760a': {'experiment-name': 'ffd-experiment-3',
  'experiment': '4',
  'cycle': '1',
  'experiment-date': '2024-04-15-16:26:12.414063',
  'experiment-time-start': 1713198372.414114,
  'experiment-time-end': 1713199527.8847919,
  'experiment-total-seconds': 1155.4706778526306,
  '1': {'cycle-time-start': 1713198387.12735,
   'cycle-time-end': 1713198527.5686584,
   'cycle-total-seconds': 140.441308259964},
  '2': {'cycle-time-start': 1713198657.1264822,
   'cycle-time-end': 1713198767.6272583,
   'cycle-total-seconds': 110.50077605247498},
  '3': {'cycle-time-start': 1713198867.1273265,
   'cycle-time-end': 1713199008.1012614,
   'cycle-total-seconds': 140.97393488883972},
  '4': {'cycle-time-start': 1713199107.126826,
   'cycle-time-end': 1713199247.6147108,
   'cycle-total-seconds': 140.48788475990295},
  '5': {'cycle-time-start': 1713199377.126261,
   'cycle-time-end': 1713199487.781052,
   'cycle-total-seconds': 110.65479111671448}},
 'e773520a-073e-

In [109]:
with open('../data/minio/experiment_3/workers/times.json', 'w') as f:
    json.dump(formatted_worker_objects['times'],f, indent=4)

In [110]:
metrics_keys = [
    'task',
    'function',
    'network', 
    'training', 
    'metrics',  
    'server'
]


for metric in metrics_keys:
    path = '../data/minio/experiment_3/workers/replace.csv'
    used_path = path.replace('replace',metric)
    formatted_worker_objects[metric].to_csv(used_path, index = False)

In [111]:
formatted_worker_objects = format_workers_experiment_objects(
    minio_client = minio_client,
    experiments_folder = 'experiments',
    experiment_name = 'ffd-experiment-4',
    experiment = '5'
)

In [112]:
formatted_worker_objects['times']

{'66e8acb0-574e-4392-a8ab-cdcb9fca760a': {'experiment-name': 'ffd-experiment-4',
  'experiment': '5',
  'cycle': '1',
  'experiment-date': '2024-04-15-17:13:19.693672',
  'experiment-time-start': 1713201199.693725,
  'experiment-time-end': 1713202872.1666222,
  'experiment-total-seconds': 1672.472897052765,
  '1': {'cycle-time-start': 1713201201.7554307,
   'cycle-time-end': 1713201352.702792,
   'cycle-total-seconds': 150.94736123085022},
  '2': {'cycle-time-start': 1713201561.7552931,
   'cycle-time-end': 1713201712.6993773,
   'cycle-total-seconds': 150.94408416748047},
  '3': {'cycle-time-start': 1713201891.757022,
   'cycle-time-end': 1713202112.495494,
   'cycle-total-seconds': 220.73847198486328},
  '4': {'cycle-time-start': 1713202251.7565765,
   'cycle-time-end': 1713202472.4931576,
   'cycle-total-seconds': 220.73658108711243},
  '5': {'cycle-time-start': 1713202611.7576482,
   'cycle-time-end': 1713202832.2263312,
   'cycle-total-seconds': 220.46868300437927}},
 'e773520a-07

In [113]:
with open('../data/minio/experiment_4/workers/times.json', 'w') as f:
    json.dump(formatted_worker_objects['times'],f, indent=4)

In [114]:
metrics_keys = [
    'task',
    'function',
    'network', 
    'training', 
    'metrics',  
    'server'
]


for metric in metrics_keys:
    path = '../data/minio/experiment_4/workers/replace.csv'
    used_path = path.replace('replace',metric)
    formatted_worker_objects[metric].to_csv(used_path, index = False)