In [4]:
import pandas as pd
import pickle
from pathlib import Path
import yaml
from collections import OrderedDict


PROJECT_DIR = Path('/home/wonseok/PPFL/4_ppfl_simulation/')
DATA_DIR = PROJECT_DIR.joinpath('data')

icus = ['hicu','micu','sicu'] # map hicu -> client_1, micu -> client_2, sicu -> client_3
clients = ['client_1','client_2','client_3']

with open('/home/wonseok/PPFL/4_ppfl_simulation/feature_book.yaml') as f:
    feature_book = yaml.load(f, Loader=yaml.FullLoader)

In [7]:
def read_feature(key):
    return feature_book[key]

def concatXandY(X, Y):
    return pd.concat([X,Y],axis=1)

def load_data(icu):
    with open(f'/home/wonseok/PPFL/4_ppfl_simulation/{icu}/dataset.pkl','rb') as f:
        dataset = pickle.load(f)
    return dataset

def read_data(dataset, train_type, features):
    X = dataset[train_type+'_X'][features]
    Y = dataset[train_type+'_Y']
    concatXY = concatXandY(X, Y)
    return concatXY


In [23]:
def create_icu(feature_type, train_type):
    '''
    create common dataset in a ordered-dict structure. 
    but what becomes a key
    feature_type : common, vertical, full
    train_type : train, valid, test
    '''
    data_path = Path('/home/wonseok/PPFL/4_ppfl_simulation/Personalized-Progressive-Federated-Learning/data')
    client_featureType_type = OrderedDict()
    
    if feature_type == 'common':
        features = [feature_book['common_features']]*3
    elif feature_type == 'vertical':
        feature_type = 'specific'
        features = [feature_book['hicu_specific']] + [feature_book['micu_specific']] + [feature_book['sicu_specific']] 
    else :
        features = [feature_book['common_features'] + feature_book['hicu_specific']] +\
                   [feature_book['common_features'] + feature_book['micu_specific']] +\
                   [feature_book['common_features'] + feature_book['sicu_specific']] 
        
    for idx, icu in enumerate(icus):
        dataset = load_data(icu)
        XY = read_data(dataset=dataset, train_type=train_type, features=features[idx])
        client_featureType_type[icu] = XY
    
    # save OrderedDict
    with open(data_path.joinpath(f'icu_client_{feature_type}_{train_type}.pkl'),'wb') as f:
        pickle.dump(client_featureType_type, f)
    

In [24]:
from itertools import product

train_types = ['train','valid','test']
feature_types = ['common','vertical','full']

all_types = list(product(train_types, feature_types))

In [27]:
for train_type, feature_type in all_types:
    create_icu(feature_type=feature_type, train_type=train_type)