In [1]:
import io
import pickle
import requests
import json

import numpy as np
import pandas as pd

from minio import Minio

In [2]:
minio_client = Minio(
    endpoint = "127.0.0.1:9000", 
    access_key = '23034opsdjhksd', 
    secret_key = 'sdkl3slömdm',
    secure = False
)

In [20]:
def create_bucket(
    minio_client: any,
    bucket_name: str
) -> bool:
    MINIO_CLIENT = minio_client 
    try:
        MINIO_CLIENT.make_bucket(
            bucket_name = bucket_name
        )
        return True
    except Exception as e:
        print(e)
        return False
    
def check_bucket(
    minio_client: any,
    bucket_name:str
) -> bool:
    MINIO_CLIENT = minio_client
    try:
        status = MINIO_CLIENT.bucket_exists(bucket_name = bucket_name)
        return status
    except Exception as e:
        print(e)
        return False 
       
def delete_bucket(
    minio_client: any,
    bucket_name:str
) -> bool:
    MINIO_CLIENT = minio_client
    try:
        MINIO_CLIENT.remove_bucket(
            bucket_name = bucket_name
        )
        return True
    except Exception as e:
        print(e)
        return False
# Works
def create_object(
    minio_client: any,
    bucket_name: str, 
    object_path: str, 
    data: any,
    metadata: dict
) -> bool: 
    # Be aware that MinIO objects have a size limit of 1GB, 
    # which might result to large header error
    MINIO_CLIENT = minio_client
    
    pickled_data = pickle.dumps(data)
    length = len(pickled_data)
    buffer = io.BytesIO()
    buffer.write(pickled_data)
    buffer.seek(0)
    try:
        MINIO_CLIENT.put_object(
            bucket_name = bucket_name,
            object_name = object_path + '.pkl',
            data = buffer,
            length = length,
            metadata = metadata
        )
        return True
    except Exception as e:
        print(e)
        return False
# Works
def check_object(
    minio_client: any,
    bucket_name: str, 
    object_path: str
) -> bool: 
    MINIO_CLIENT = minio_client
    try:
        object_info = MINIO_CLIENT.stat_object(
            bucket_name = bucket_name,
            object_name = object_path + '.pkl'
        )      
        return True
    except Exception as e:
        return False 
# Works
def delete_object(
    minio_client: any,
    bucket_name: str, 
    object_path: str
) -> bool: 
    MINIO_CLIENT = minio_client
    try:
        MINIO_CLIENT.remove_object(
            bucket_name = bucket_name, 
            object_name = object_path + '.pkl'
        )
        return True
    except Exception as e:
        print(e)
        return False
# Works
def update_object(
    minio_client: any,
    bucket_name: str, 
    object_path: str, 
    data: any,
    metadata: dict
) -> bool:  
    remove = delete_object(minio_client,bucket_name, object_path)
    if remove:
        create = create_object(minio_client, bucket_name, object_path, data, metadata)
        if create:
            return True
    return False
# works
def create_or_update_object(
    minio_client: any,
    bucket_name: str, 
    object_path: str, 
    data: any, 
    metadata: dict
) -> any:
    bucket_status = check_bucket(minio_client,bucket_name)
    if not bucket_status:
        creation_status = create_bucket(minio_client,bucket_name)
        if not creation_status:
            return None
    object_status = check_object(minio_client,bucket_name, object_path)
    if not object_status:
        return create_object(minio_client,bucket_name, object_path, data, metadata)
    else:
        return update_object(minio_client,bucket_name, object_path, data, metadata)

def get_object_data_and_metadata(
    minio_client: any,
    bucket_name: str, 
    object_path: str
) -> dict:
    MINIO_CLIENT = minio_client
    
    try:
        given_object_info = MINIO_CLIENT.stat_object(
            bucket_name = bucket_name, 
            object_name = object_path + '.pkl'
        )
        # There seems to be some kind of a limit
        # with the amount of request a client 
        # can make, which is why this variable
        # is set here to give more time got the client
        # to complete the request
        given_metadata = given_object_info.metadata
        
        given_object_data = MINIO_CLIENT.get_object(
            bucket_name = bucket_name, 
            object_name = object_path + '.pkl'
        )
        given_pickled_data = given_object_data.data
        
        try:
            given_data = pickle.loads(given_pickled_data)
            relevant_metadata = {} 
            for key, value in given_metadata.items():
                if 'x-amz-meta' in key:
                    key_name = key[11:]
                    relevant_metadata[key_name] = value
            return {'data': given_data, 'metadata': relevant_metadata}
        except Exception as e:
            print('MinIO object pickle decoding error')
            print(e)
            return None 
    except Exception as e:
        print('MinIO object fetching error')
        print(e)
        return None
# Works
def get_object_list(
    minio_client: any,
    bucket_name: str,
    path_prefix: str
) -> dict:
    MINIO_CLIENT = minio_client
    try:
        objects = MINIO_CLIENT.list_objects(bucket_name = bucket_name, prefix = path_prefix, recursive = True)
        object_dict = {}
        for obj in objects:
            object_name = obj.object_name
            object_info = MINIO_CLIENT.stat_object(
                bucket_name = bucket_name,
                object_name = object_name
            )
            given_metadata = {} 
            for key, value in object_info.metadata.items():
                if 'X-Amz-Meta' in key:
                    key_name = key[11:]
                    given_metadata[key_name] = value
            object_dict[obj.object_name] = given_metadata
        return object_dict
    except Exception as e:
        return None  

In [21]:
def format_metadata_dict(
    given_metadata: dict
) -> dict:
    # MinIO metadata is first characeter capitalized 
    # and their values are strings due to AMZ format, 
    # which is why the key strings must be made lower
    # and their stirng integers need to be changed to integers 
    fixed_dict = {}
    for key, value in given_metadata.items():
        if value.replace('.','',1).isdigit():
            fixed_dict[key.lower()] = int(value)
        else: 
            fixed_dict[key.lower()] = value
    fixed_dict = decode_metadata_strings_to_lists(fixed_dict)
    return fixed_dict
# Created and works
def encode_metadata_lists_to_strings(
    given_metadata: dict
) -> dict:
    # MinIO metadata only accepts strings and integers, 
    # that have keys without _ characters
    # which is why saving lists in metadata requires
    # making them strings
    modified_dict = {}
    for key,value in given_metadata.items():
        if isinstance(value, list):
            modified_dict[key] = 'list=' + ','.join(map(str, value))
            continue
        modified_dict[key] = value
    return modified_dict 
# Created and works
def decode_metadata_strings_to_lists(
    given_metadata: dict
) -> dict:
    modified_dict = {}
    for key, value in given_metadata.items():
        if isinstance(value, str):
            if 'list=' in value:
                string_integers = value.split('=')[1]
                values = string_integers.split(',')
                if len(values) == 1 and values[0] == '':
                    modified_dict[key] = []
                else:
                    try:
                        modified_dict[key] = list(map(int, values))
                    except:
                        modified_dict[key] = list(map(str, values))
                continue
        modified_dict[key] = value
    return modified_dict

def get_experiments_objects(
    minio_client: any,
    object_bucket: str,
    object_path: str
) -> any:
    object_exists = check_object(
        minio_client = minio_client,
        bucket_name = object_bucket,
        object_path = object_path
    )
    
    object_data = None
    object_metadata = None
    if object_exists:
        fetched_object = get_object_data_and_metadata(
            minio_client = minio_client,
            bucket_name = object_bucket,
            object_path = object_path
        )
        object_data = fetched_object['data']
        object_metadata = format_metadata_dict(fetched_object['metadata'])
    return object_data, object_metadata
# Created and works 
def set_experiments_objects(
    minio_client: any,
    object_bucket: str,
    object_path: str,
    overwrite: bool,
    object_data: any,
    object_metadata: any
):
    object_exists = check_object(
        minio_client = minio_client,
        bucket_name = object_bucket,
        object_path = object_path
    )
    perform = True
    if object_exists and not overwrite:
        perform = False

    if perform:
        create_or_update_object(
            minio_client = minio_client,
            bucket_name = object_bucket,
            object_path = object_path,
            data = object_data,
            metadata = encode_metadata_lists_to_strings(object_metadata)
        )

In [199]:
def set_central_objects_and_paths(
    experiments_folder: str,
    experiment_name: str,
    experiment: str
) -> any:
    central_objects = {
        'status': {},
        'specifications': {},
        'workers': {},
        'times': {},
        'central-pool': pd.DataFrame(),
        'workers-pool': pd.DataFrame(),
        'global-model': pd.DataFrame(),
        'local-model': pd.DataFrame(),
        'data': pd.DataFrame(),
        'task': pd.DataFrame(),
        'function': pd.DataFrame(),
        'network': pd.DataFrame(),
        'training': pd.DataFrame(),
        'inference': pd.DataFrame(),
        'metrics': pd.DataFrame(),
        'system': pd.DataFrame(),
        'server': pd.DataFrame()
    }
    
    object_paths = {
        'status': experiments_folder + '/status',
        'specifications': experiments_folder + '/specifications',
        'workers': experiments_folder + '/' + str(experiment_name) + '/' + str(experiment) + '/c/workers',
        'central-pool': experiments_folder + '/' + str(experiment_name) + '/' + str(experiment) + '/data/central-pool',
        'workers-pool': experiments_folder + '/' + str(experiment_name) + '/' + str(experiment) + '/data/workers-pool',
        'global-model': experiments_folder + '/' + str(experiment_name) + '/' + str(experiment) + '/c/global-model',
        'local-model': experiments_folder + '/' + str(experiment_name) + '/' + str(experiment) + '/c/local-model|',
        'data': experiments_folder + '/' + str(experiment_name) + '/' + str(experiment) + '/c/data|', # worker-id
        'times': experiments_folder + '/' + str(experiment_name) + '/' + str(experiment) + '/times',
        'task': experiments_folder + '/' + str(experiment_name) + '/' + str(experiment) + '/c/times/task',
        'function': experiments_folder + '/' + str(experiment_name) + '/' + str(experiment) + '/c/times/function',
        'network': experiments_folder + '/' + str(experiment_name) + '/' + str(experiment) + '/c/times/network',
        'training': experiments_folder + '/' + str(experiment_name) + '/' + str(experiment) + '/c/times/training',
        'inference': experiments_folder + '/' + str(experiment_name) + '/' + str(experiment) + '/c/times/inference',
        'metrics': experiments_folder + '/' + str(experiment_name) + '/' + str(experiment) + '/c/metrics',
        'system': experiments_folder + '/' + str(experiment_name) + '/' + str(experiment) + '/c/resources/system',
        'server': experiments_folder + '/' + str(experiment_name) + '/' + str(experiment) + '/c/resources/server'
    }
    
    return central_objects, object_paths

def PyTorch_model_into_data_and_columns(
    parameters: any
) -> any:
    columns = []
    values = []
    for key,value in data.items():
        numpy_format = value.numpy().tolist()
        shape = value.shape

        if 'weight' in key:
            for i in range(1,shape[1]+1):
                columns.append('w-' + str(i))
            for weights in numpy_format:
                values.append(weights)

        if 'bias' in key:
            columns.append('b-1')
            i = 0
            for bias in numpy_format:
                values[i].append(bias)
                i = i + 1
    return values, columns

def format_central_object(
    collected_objects: any,
    bucket: str,
    path: str,
    key: str,
    cycle: str
) -> bool:    
    formatted_data = collected_objects[key]
    data, metadata = get_experiments_objects(
        minio_client = minio_client,
        object_bucket = bucket,
        object_path = path
    )
    
    if isinstance(data, OrderedDict):
        path_split = path.split('/')
        
        if key in path_split[-2]:
            model_values, model_columns = PyTorch_model_into_data_and_columns(
                parameters = data
            )
            model_df = pd.DataFrame(model_values, columns = model_columns)
            model_df['name'] = path_split[-1]
            model_df['cycle'] = int(cycle)
            
            result = pd.concat([formatted_data,model_df])
            result = result.reset_index(drop=True)
            collected_objects[key] = result
        else:
            model_values, model_columns = PyTorch_model_into_data_and_columns(
                parameters = data
            )
            model_df = pd.DataFrame(model_values, columns = model_columns)
            model_df['cycle'] = int(cycle)

            result = pd.concat([formatted_data,model_df])
            result = result.reset_index(drop=True)
            collected_objects[key] = result
        return True
        
    if isinstance(formatted_data, dict):
        if cycle is None:
            collected_objects[key] = data
        else:
            collected_objects[key][cycle] = data
        return True
            
    if isinstance(formatted_data, pd.DataFrame):
        if not metadata is None:
            if 'header' in metadata:
                if cycle is None:
                    object_df = pd.DataFrame(data, columns = metadata['header'])
                    collected_objects[key] = pd.concat([formatted_data,object_df])
                else:
                    path_split = path.split('/')
                    object_df = pd.DataFrame(data, columns = metadata['header'])
                    object_df['name'] = path_split[-1]
                    object_df['cycle'] = int(cycle)
                    result = pd.concat([formatted_data,object_df])
                    result = result.reset_index(drop=True)
                    collected_objects[key] = result
                return True
        if not data is None:
            object_df = pd.DataFrame.from_dict(data,orient='index')
            object_df['cycle'] = int(cycle)
            result = pd.concat([formatted_data,object_df])
            result = result.reset_index(drop=True)
            collected_objects[key] = result
        return True
        
def format_central_experiment_objects(
    minio_client: any,
    experiments_folder: str,
    experiment_name: str,
    experiment: str,
    cycles: int
):
    target_bucket = 'central'
    collected_objects, storage_paths = set_central_objects_and_paths(
        experiments_folder = experiments_folder,
        experiment_name = experiment_name,
        experiment = experiment
    )
    
    max_cycles = cycles + 1
    for name in collected_objects.keys():
        whole_path = storage_paths[name]
        path_split = whole_path.split('/')
        used_key = path_split[-1]
        if 4 < len(path_split):
            if path_split[3] == 'c':
                for cycle in range(1,max_cycles + 1):
                    path_split[3] = str(cycle)
                    cycle_path = '/'.join(path_split)
                    if '|' in path_split[-1]:
                        folder_path = cycle_path.split('|')[0]
                        folder_objects = get_object_list(
                            minio_client = minio_client,
                            bucket_name = target_bucket,
                            path_prefix = folder_path
                        )
                        formatted_key = used_key.split('|')[0]
                        for object_name in folder_objects.keys():
                            object_path = object_name.split('.')[0]
                            format_central_object(
                                collected_objects = collected_objects,
                                bucket = target_bucket,
                                path = object_path,
                                key = formatted_key,
                                cycle = str(cycle)
                            )
                        continue
                    format_central_object(
                        collected_objects = collected_objects,
                        bucket = target_bucket,
                        path = cycle_path,
                        key = used_key,
                        cycle = str(cycle)
                    )
                continue
        format_central_object(
            collected_objects = collected_objects,
            bucket = target_bucket,
            path = whole_path,
            key = used_key,
            cycle = None
        )
    return collected_objects

In [200]:
formatted_objects = format_central_experiment_objects(
    minio_client = minio_client,
    experiments_folder = 'experiments',
    experiment_name = 'central-federated-learning',
    experiment = '1',
    cycles = 5
)

In [252]:
def set_workers_objects_and_paths(
    experiments_folder: str,
    experiment_name: str,
    experiment: str
) -> any:
    workers_objects = {
        'status': {},
        'specifications': {},
        'times': pd.DataFrame(),
        'task': pd.DataFrame(),
        'function': pd.DataFrame(),
        'network': pd.DataFrame(),
        'training': pd.DataFrame(),
        'inference': pd.DataFrame(),
        'metrics': pd.DataFrame(),
        'system': pd.DataFrame(),
        'server': pd.DataFrame()
    }
    
    return workers_objects

def format_worker_object(
    collected_objects: any,
    bucket: str,
    path: str,
    worker: str,
    key: str,
    experiment: str,
    cycle: str
) -> bool:    
    formatted_data = collected_objects[key]
    data, metadata = get_experiments_objects(
        minio_client = minio_client,
        object_bucket = bucket,
        object_path = path
    )
    
    if data is None:
        return False
     
    if isinstance(formatted_data, dict):
        if experiment is None:
            collected_objects[key][worker] = data
        return True
            
    if isinstance(formatted_data, pd.DataFrame):
        if not cycle is None:
            print(path)
            '''
            object_df = pd.DataFrame.from_dict(data,orient='index')
            object_df['worker'] = worker
            object_df['experiment'] = int(experiment)
            object_df['cycle'] = int(cycle)
            result = pd.concat([formatted_data,object_df])
            result = result.reset_index(drop=True)
            collected_objects[key] = result
            '''
    return True

def format_workers_experiment_objects(
    minio_client: any,
    experiments_folder: str,
    experiment_name: str,
    experiment: str
):
    target_bucket = 'workers'
    collected_objects, storage_paths = set_workers_objects_and_paths(
        experiments_folder = experiments_folder,
        experiment_name = experiment_name,
        experiment = experiment
    )
    
    worker_objects = get_object_list(
        minio_client = minio_client,
        bucket_name = 'workers',
        path_prefix = ''
    )
    
    relevant_paths = []
    for object_path in worker_objects:
        formatted_path = object_path.split('.')[0]
        formatted_path_split = formatted_path.split('/')
        path_key = formatted_path_split[-1]
        
        if not path_key in collected_objects:
            continue
        ''' 
        if 3 < len(formatted_path_split):
            path_experiment_name = str(formatted_path_split[2])
            path_experiment = str(formatted_path_split[3])
            if not path_experiment_name == experiment_name:
                continue
            if not path_experiment == experiment:
                continue
        '''          
        relevant_paths.append(formatted_path)
    
    for path in relevant_paths:
        formatted_path_split = path.split('/')
        path_worker = formatted_path_split[0]
        path_key = formatted_path_split[-1]
        
        if 3 < len(formatted_path_split):
            
            path_cycle = str(formatted_path_split[4])
            format_worker_object(
                collected_objects = collected_objects,
                bucket = target_bucket,
                path = path,
                worker = path_worker,
                key = path_key,
                cycle = path_cycle
            )
            continue
        
        format_worker_object(
            collected_objects = collected_objects,
            bucket = target_bucket,
            path = path,
            worker = path_worker,
            key = path_key,
            cycle = None
        )
       
    return collected_objects

In [248]:
formated_worker_objects = format_workers_experiment_objects(
    minio_client = minio_client,
    experiments_folder = 'experiments',
    experiment_name = 'central-federated-learning',
    experiment = '3'
)

In [251]:
formated_worker_objects['task']

Unnamed: 0,name,action-time-start,action-time-end,action-total-seconds,worker,cycle
0,status-pipeline,1.712934e+09,1.712934e+09,0.47923,03814f81-12c3-4367-9bd5-b187d0cfce95,1
1,server-monitoring,1.712934e+09,1.712934e+09,0.18417,03814f81-12c3-4367-9bd5-b187d0cfce95,1
2,data-pipeline,1.712934e+09,1.712934e+09,0.12346,03814f81-12c3-4367-9bd5-b187d0cfce95,1
3,server-monitoring,1.712934e+09,1.712934e+09,0.43101,03814f81-12c3-4367-9bd5-b187d0cfce95,1
4,server-monitoring,1.712934e+09,1.712934e+09,0.28498,03814f81-12c3-4367-9bd5-b187d0cfce95,1
...,...,...,...,...,...,...
913,status-pipeline,1.712934e+09,1.712934e+09,0.88698,c890a70e-5d92-4e32-937a-a657579dc8b1,2
914,server-monitoring,1.712934e+09,1.712934e+09,0.26137,c890a70e-5d92-4e32-937a-a657579dc8b1,2
915,data-pipeline,1.712934e+09,1.712934e+09,0.05146,c890a70e-5d92-4e32-937a-a657579dc8b1,2
916,server-monitoring,1.712934e+09,1.712934e+09,0.31104,c890a70e-5d92-4e32-937a-a657579dc8b1,2


In [209]:
worker_objects = get_object_list(
    minio_client = minio_client,
    bucket_name = 'workers',
    path_prefix = ''
)

03814f81-12c3-4367-9bd5-b187d0cfce95/experiments/central-federated-learning/1/6/resources/server.pkl
03814f81-12c3-4367-9bd5-b187d0cfce95/experiments/central-federated-learning/1/6/times/network.pkl
03814f81-12c3-4367-9bd5-b187d0cfce95/experiments/central-federated-learning/1/6/times/task.pkl
03814f81-12c3-4367-9bd5-b187d0cfce95/experiments/central-federated-learning/2/1/global-model.pkl
03814f81-12c3-4367-9bd5-b187d0cfce95/experiments/central-federated-learning/2/1/local-model.pkl
03814f81-12c3-4367-9bd5-b187d0cfce95/experiments/central-federated-learning/2/1/metrics.pkl
03814f81-12c3-4367-9bd5-b187d0cfce95/experiments/central-federated-learning/2/1/resources/server.pkl
03814f81-12c3-4367-9bd5-b187d0cfce95/experiments/central-federated-learning/2/1/times/function.pkl
03814f81-12c3-4367-9bd5-b187d0cfce95/experiments/central-federated-learning/2/1/times/network.pkl
03814f81-12c3-4367-9bd5-b187d0cfce95/experiments/central-federated-learning/2/1/times/task.pkl
03814f81-12c3-4367-9bd5-b187

In [210]:
for path in worker_objects.keys():
    print(path)

03814f81-12c3-4367-9bd5-b187d0cfce95/experiments/central-federated-learning/1/6/resources/server.pkl
03814f81-12c3-4367-9bd5-b187d0cfce95/experiments/central-federated-learning/1/6/times/network.pkl
03814f81-12c3-4367-9bd5-b187d0cfce95/experiments/central-federated-learning/1/6/times/task.pkl
03814f81-12c3-4367-9bd5-b187d0cfce95/experiments/central-federated-learning/2/1/global-model.pkl
03814f81-12c3-4367-9bd5-b187d0cfce95/experiments/central-federated-learning/2/1/local-model.pkl
03814f81-12c3-4367-9bd5-b187d0cfce95/experiments/central-federated-learning/2/1/metrics.pkl
03814f81-12c3-4367-9bd5-b187d0cfce95/experiments/central-federated-learning/2/1/resources/server.pkl
03814f81-12c3-4367-9bd5-b187d0cfce95/experiments/central-federated-learning/2/1/times/function.pkl
03814f81-12c3-4367-9bd5-b187d0cfce95/experiments/central-federated-learning/2/1/times/network.pkl
03814f81-12c3-4367-9bd5-b187d0cfce95/experiments/central-federated-learning/2/1/times/task.pkl
03814f81-12c3-4367-9bd5-b187