

In [1]:
import requests
import json
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from minio import Minio
import io
import pickle



In [12]:
minio_client = Minio(
    endpoint = "127.0.0.1:9000", 
    access_key = 'minio', 
    secret_key = 'minio123',
    secure = False
)

In [15]:
experiment = {
    'name': 'federated-learning-test',
    'tags': {}
}

In [16]:
parameters = {
    'model':{
        'seed': 42,
        'used-columns': [
            'amount',
            'type_CASH_IN',
            'type_CASH_OUT',
            'type_DEBIT',
            'type_PAYMENT',
            'type_TRANSFER',
            'isFraud'
        ],
        'input-size': 6,
        'target-column': 'isFraud',
        'scaled-columns': [
            'amount'
        ],
        'learning-rate': 0.05,
        'sample-rate': 0.10,
        'optimizer':'SGD',
        'epochs': 10
    },
    'central':{
        'sample-pool': 50000,
        'data-augmentation': {
            'active': True,
            'sample-pool': 100000,
            '1-0-ratio': 0.4
        },
        'eval-ratio': 0.5,
        'train-ratio': 0.8,
        'min-update-amount': 1,
        'max-cycles':3,
        'min-metric-success': 8,
        'metric-thresholds': {
            'true-positives': 50,
            'false-positives': 100,
            'true-negatives': 1000, 
            'false-negatives': 100,
            'recall': 0.40,
            'selectivity': 0.99,
            'precision': 0.80,
            'miss-rate': 0.05,
            'fall-out': 0.05,
            'balanced-accuracy': 0.85,
            'accuracy': 0.99
        },
        'metric-conditions': {
            'true-positives': '>=',
            'false-positives': '<=',
            'true-negatives': '>=', 
            'false-negatives': '<=',
            'recall': '>=',
            'selectivity': '>=',
            'precision': '>=',
            'miss-rate': '<=',
            'fall-out': '<=',
            'balanced-accuracy': '>=',
            'accuracy': '>='
        }
    },
    'worker':{
        'data-augemntation': True,
        'sample-pool': 50000,
        'data-augmentation': {
            'active': True,
            'sample-pool': 100000,
            '1-0-ratio': 0.4
        },
        'eval-ratio': 0.5,
        'train-ratio': 0.8
    }
}

In [5]:
formated_data_df = pd.read_csv('../data/Formated_Fraud_Detection_Data.csv')

In [16]:
formated_data_df

Unnamed: 0,step,amount,nameOrig,nameDest,type_CASH_IN,type_CASH_OUT,type_DEBIT,type_PAYMENT,type_TRANSFER,isFraud,isFlaggedFraud
0,1,9840,1,7233461,0,0,0,1,0,0,0
1,1,1864,2,7735206,0,0,0,1,0,0,0
2,1,181,3,8598945,0,0,0,0,1,1,0
3,1,181,4,7880837,0,1,0,0,0,1,0
4,1,11668,5,7670940,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
6362615,743,339682,6353303,6895525,0,1,0,0,0,1,0
6362616,743,6311409,6353304,7179249,0,0,0,0,1,1,0
6362617,743,6311409,6353305,7452283,0,1,0,0,0,1,0
6362618,743,850003,6353306,7284324,0,0,0,0,1,1,0


In [17]:
data = formated_data_df.iloc[:100000].values.tolist()

In [18]:
columns = formated_data_df.columns.tolist()

In [19]:
context = {
    'experiment': experiment,
    'parameters': parameters,
    'data': data,
    'columns': columns
}

payload = json.dumps(context)

In [20]:
response = requests.post(
    url = 'http://127.0.0.1:7600/start',
    json = payload
)

print(response.status_code)

200


In [40]:
response = requests.get(
    url = 'http://127.0.0.1:7600/demo',
    json = payload
)

print(response.status_code)

200


In [37]:
import os
print(os.uname().release)
print(os.uname().sysname)
print(os.uname().nodename)
print(os.uname().machine)

6.5.0-26-generic
Linux
lx9-500-11169.ad.helsinki.fi
x86_64


In [10]:
def create_bucket(
    minio_client: any,
    bucket_name: str
) -> bool:
    MINIO_CLIENT = minio_client 
    try:
        MINIO_CLIENT.make_bucket(
            bucket_name = bucket_name
        )
        return True
    except Exception as e:
        print(e)
        return False
    
def check_bucket(
    minio_client: any,
    bucket_name:str
) -> bool:
    MINIO_CLIENT = minio_client
    try:
        status = MINIO_CLIENT.bucket_exists(bucket_name = bucket_name)
        return status
    except Exception as e:
        print(e)
        return False 
       
def delete_bucket(
    minio_client: any,
    bucket_name:str
) -> bool:
    MINIO_CLIENT = minio_client
    try:
        MINIO_CLIENT.remove_bucket(
            bucket_name = bucket_name
        )
        return True
    except Exception as e:
        print(e)
        return False
# Works
def create_object(
    minio_client: any,
    bucket_name: str, 
    object_path: str, 
    data: any,
    metadata: dict
) -> bool: 
    # Be aware that MinIO objects have a size limit of 1GB, 
    # which might result to large header error
    MINIO_CLIENT = minio_client
    
    pickled_data = pickle.dumps(data)
    length = len(pickled_data)
    buffer = io.BytesIO()
    buffer.write(pickled_data)
    buffer.seek(0)
    try:
        MINIO_CLIENT.put_object(
            bucket_name = bucket_name,
            object_name = object_path + '.pkl',
            data = buffer,
            length = length,
            metadata = metadata
        )
        return True
    except Exception as e:
        print(e)
        return False
# Works
def check_object(
    minio_client: any,
    bucket_name: str, 
    object_path: str
) -> bool: 
    MINIO_CLIENT = minio_client
    try:
        object_info = MINIO_CLIENT.stat_object(
            bucket_name = bucket_name,
            object_name = object_path + '.pkl'
        )      
        return True
    except Exception as e:
        return False 
# Works
def delete_object(
    minio_client: any,
    bucket_name: str, 
    object_path: str
) -> bool: 
    MINIO_CLIENT = minio_client
    try:
        MINIO_CLIENT.remove_object(
            bucket_name = bucket_name, 
            object_name = object_path + '.pkl'
        )
        return True
    except Exception as e:
        print(e)
        return False
# Works
def update_object(
    minio_client: any,
    bucket_name: str, 
    object_path: str, 
    data: any,
    metadata: dict
) -> bool:  
    remove = delete_object(minio_client,bucket_name, object_path)
    if remove:
        create = create_object(minio_client, bucket_name, object_path, data, metadata)
        if create:
            return True
    return False
# works
def create_or_update_object(
    minio_client: any,
    bucket_name: str, 
    object_path: str, 
    data: any, 
    metadata: dict
) -> any:
    bucket_status = check_bucket(minio_client,bucket_name)
    if not bucket_status:
        creation_status = create_bucket(minio_client,bucket_name)
        if not creation_status:
            return None
    object_status = check_object(minio_client,bucket_name, object_path)
    if not object_status:
        return create_object(minio_client,bucket_name, object_path, data, metadata)
    else:
        return update_object(minio_client,bucket_name, object_path, data, metadata)

def get_object_data_and_metadata(
    minio_client: any,
    bucket_name: str, 
    object_path: str
) -> dict:
    MINIO_CLIENT = minio_client
    
    try:
        given_object_info = MINIO_CLIENT.stat_object(
            bucket_name = bucket_name, 
            object_name = object_path + '.pkl'
        )
        # There seems to be some kind of a limit
        # with the amount of request a client 
        # can make, which is why this variable
        # is set here to give more time got the client
        # to complete the request
        given_metadata = given_object_info.metadata
        
        given_object_data = MINIO_CLIENT.get_object(
            bucket_name = bucket_name, 
            object_name = object_path + '.pkl'
        )
        given_pickled_data = given_object_data.data
        
        try:
            given_data = pickle.loads(given_pickled_data)
            relevant_metadata = {} 
            for key, value in given_metadata.items():
                if 'x-amz-meta' in key:
                    key_name = key[11:]
                    relevant_metadata[key_name] = value
            return {'data': given_data, 'metadata': relevant_metadata}
        except Exception as e:
            print('MinIO object pickle decoding error')
            print(e)
            return None 
    except Exception as e:
        print('MinIO object fetching error')
        print(e)
        return None
# Works
def get_object_list(
    minio_client: any,
    bucket_name: str,
    path_prefix: str
) -> dict:
    MINIO_CLIENT = minio_client
    try:
        objects = MINIO_CLIENT.list_objects(bucket_name = bucket_name, prefix = path_prefix, recursive = True)
        object_dict = {}
        for obj in objects:
            object_name = obj.object_name
            object_info = MINIO_CLIENT.stat_object(
                bucket_name = bucket_name,
                object_name = object_name
            )
            given_metadata = {} 
            for key, value in object_info.metadata.items():
                if 'X-Amz-Meta' in key:
                    key_name = key[11:]
                    given_metadata[key_name] = value
            object_dict[obj.object_name] = given_metadata
        return object_dict
    except Exception as e:
        return None  

In [5]:
objects = get_object_list(
    minio_client = minio_client,
    bucket_name = 'workers', 
    path_prefix = 'experiments/1/1'
)
objects

{'experiments/1/1/global-model.pkl': {},
 'experiments/1/1/metrics.pkl': {},
 'experiments/1/1/resources/function.pkl': {},
 'experiments/1/1/resources/training.pkl': {},
 'experiments/1/1/workers.pkl': {}}

In [22]:
minio_object = get_object_data_and_metadata(
    minio_client = minio_client,
    bucket_name = 'workers', 
    object_path = 'c496210f-e253-4c1c-b3d7-765092ef5d89/experiments/status'
)
minio_object

{'data': {'worker-id': 'c496210f-e253-4c1c-b3d7-765092ef5d89',
  'network-id': '1',
  'central-address': '127.0.0.1',
  'central-port': '7600',
  'worker-address': '127.0.0.1',
  'worker-port': '7500',
  'status': 'waiting',
  'experiment-name': 'central-federated-learning-test',
  'experiment': 2,
  'experiment-id': '2',
  'stored': False,
  'preprocessed': False,
  'trained': False,
  'updated': False,
  'complete': False,
  'train-amount': 8000,
  'test-amount': 2000,
  'eval-amount': 10000,
  'cycle': 1},
 'metadata': {}}

In [21]:
minio_object = get_object_data_and_metadata(
    minio_client = minio_client,
    bucket_name = 'central', 
    object_path = 'experiments/status'
)
minio_object

{'data': {'experiment-name': 'central-federated-learning-test',
  'experiment': 2,
  'experiment-id': '1',
  'start': True,
  'data-split': False,
  'preprocessed': False,
  'trained': False,
  'worker-split': False,
  'sent': False,
  'updated': False,
  'evaluated': False,
  'complete': False,
  'train-amount': 20000,
  'test-amount': 5000,
  'eval-amount': 25000,
  'collective-amount': 8000,
  'worker-updates': 1,
  'cycle': 1,
  'run-id': '6e7cba3ae54a475a893960ee67145589'},
 'metadata': {}}

In [52]:
given_tensor = minio_object['data']
print(len(given_tensor))

20000


In [58]:
from torch.utils.data import DataLoader

data_loader = DataLoader(
    dataset = given_tensor, 
    batch_size = 64
)

for batch in data_loader:
    print(batch[0].shape)

torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])
torch.Size([64, 6])


In [47]:
given_df = pd.DataFrame(minio_object['data'])

In [48]:
given_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,57246,36138,8494015,0,1,0,0,0,0,0
1,984995,3351,7923184,0,0,0,0,1,0,0
2,2497,28182,7494243,0,0,0,1,0,0,0
3,89631,33474,8812962,0,1,0,0,0,1,0
4,934093,30273,6856557,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...
19995,5021,41144,8853985,0,0,0,1,0,0,0
19996,88635,43435,6842613,1,0,0,0,0,0,0
19997,16513,44690,6935376,0,1,0,0,0,0,0
19998,528486,18185,8706585,0,0,0,0,1,0,0


In [78]:
modified_data = minio_object['data']
modified_data['stored'] = True
print(modified_data)

{'worker-id': 'd6203af9-3c9d-401b-bd1b-ed745cf6e234', 'network-id': '1', 'central-address': '127.0.0.1', 'central-port': '7600', 'worker-address': '127.0.0.1', 'worker-port': '7500', 'status': 'waiting', 'experiment': 1, 'experiment-id': '8', 'stored': True, 'preprocessed': True, 'trained': True, 'updated': False, 'complete': False, 'train-amount': 8000, 'test-amount': 2000, 'eval-amount': 10000, 'cycle': 1}


In [79]:
create_or_update_object(   
    minio_client = minio_client,
    bucket_name = 'workers',
    object_path = 'd6203af9-3c9d-401b-bd1b-ed745cf6e234/experiments/status',
    data = minio_object['data'],
    metadata = {}
)

True

In [12]:
create

{'sample-pool': 10000,
 'data-augmentation': {'active': True, 'sample-pool': 50000, '1-0-ratio': 0.2},
 'eval-ratio': 0.5,
 'train-ratio': 0.8,
 'min-update-amount': 2,
 'max-cycles': 3,
 'min-metric-success': 8,
 'metric-thresholds': {'true-positives': 50,
  'false-positives': 100,
  'true-negatives': 1000,
  'false-negatives': 100,
  'recall': 0.4,
  'selectivity': 0.99,
  'precision': 0.8,
  'miss-rate': 0.05,
  'fall-out': 0.05,
  'balanced-accuracy': 0.85,
  'accuracy': 0.99},
 'metric-conditions': {'true-positives': '>=',
  'false-positives': '<=',
  'true-negatives': '>=',
  'false-negatives': '<=',
  'recall': '>=',
  'selectivity': '>=',
  'precision': '>=',
  'miss-rate': '<=',
  'fall-out': '<=',
  'balanced-accuracy': '>=',
  'accuracy': '>='}}

In [56]:
minio_object['metadata']

{}

In [20]:
import mlflow
from mlflow import MlflowClient
mlflow_client = MlflowClient(
    tracking_uri = "http://127.0.0.1:5000"
)

In [21]:
# Refactored
def start_experiment(
    mlflow_client: any,
    experiment_name: str,
    experiment_tags: dict
) -> int:
    MLFLOW_CLIENT = mlflow_client
    try:
        experiment_id = MLFLOW_CLIENT.create_experiment(
            name = experiment_name,
            tags = experiment_tags,
            artifact_location="s3://mlflow/mlruns"
        )
        return experiment_id
    except Exception as e:
        print(e)
        return None
# Refactored
def start_run(
    mlflow_client: any,
    experiment_id: str,
    tags: dict,
    name: str
) -> dict:
    MLFLOW_CLIENT = mlflow_client 
    try:
        run_object = MLFLOW_CLIENT.create_run(
            experiment_id = experiment_id,
            tags = tags,
            run_name = name
        )
        run_dict = {
            'e_id': run_object.info.experiment_id,
            'id': run_object.info.run_id,
            'name': run_object.info.run_name,
            'stage': run_object.info.lifecycle_stage,
            'status': run_object.info.status
        }
        return run_dict
    except Exception as e:
        print(e)
        return None
# Refactored
def update_run(
    mlflow_client: any,
    run_id: str,
    parameters: dict,
    metrics: dict,
    artifacts: list
) -> bool:
    MLFLOW_CLIENT = mlflow_client 
    try:
        print('Params')
        for param_key, param_value in parameters.items():
            MLFLOW_CLIENT.log_param(
                run_id = run_id,
                key = param_key,
                value = param_value
            )
        print('Met')
        for metric_key,metric_value in metrics.items():
            MLFLOW_CLIENT.log_metric(
                run_id = run_id,
                key = metric_key,
                value = metric_value
            )
        print('art')
        for path in artifacts:
            MLFLOW_CLIENT.log_artifact(
                run_id = run_id,
                local_path = path
            )
        return True
    except Exception as e:
        print(e)
        return False
# Recatored
def end_run(
    mlflow_client: any,
    run_id: str,
    status: str
) -> bool:
    # run status are FAILED = 4, FINISHED = 3, KILLED = 5, RUNNING = 1 and SCHEDULED = 2
    MLFLOW_CLIENT = mlflow_client
    try:
        MLFLOW_CLIENT.set_terminated(
            run_id = run_id,
            status = status
        )
        return True
    except Exception as e:
        print(e)
        return False
def check_experiment(
    mlflow_client: any,
    experiment_name: str
) -> dict:
    MLFLOW_CLIENT = mlflow_client
    try:
        experiment_object = MLFLOW_CLIENT.get_experiment_by_name(
            name = experiment_name
        )
        return experiment_object
    except Exception as e:
        logger.error('MLflow experiment checking error')
        logger.error(e)
        return None

In [28]:
test_object = check_experiment(
    mlflow_client = mlflow_client,
    experiment_name = 'central-federated-learning-test-1'
)
test_object.experiment_id

'1'

In [29]:
experiment_id = start_experiment(
    mlflow_client = mlflow_client,
    experiment_name = 'test-7',
    experiment_tags = {}
)

In [30]:
run_data = start_run(
    mlflow_client = mlflow_client,
    experiment_id = experiment_id,
    tags = {},
    name = 'test'
)

In [31]:
from collections import OrderedDict
from torch import tensor

mlflow_parameters = {
    'test-param': 2
}
mlflow_metrics = {
    'test-metric': 2
}
with open("test-artifact.txt", "w") as f:
    f.write("Hello world!")

torch
    
mlflow_artifacts = [
    'test-artifact.txt'
    'initial-model.pth'
]

mlflow_model = [
    OrderedDict([('linear.weight', tensor([[ 0.3506,  0.1832, -0.1272,  0.3614, -0.3695,  0.1168]])), ('linear.bias', tensor([-0.6452]))]), 
    'initial-model'
]

In [32]:
import os
os.environ['MLFLOW_S3_ENDPOINT_URL'] = "http://127.0.0.1:9000"
os.environ['AWS_ACCESS_KEY_ID'] = 'minio'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'minio123'
#mlflow.set_env('AWS_ACCESS_KEY_ID', 'minio')
#mlflow.set_env('AWS_SECRET_ACCESS_KEY', 'minio123')

update_run(
    mlflow_client = mlflow_client,
    run_id = run_data['id'],
    parameters = mlflow_parameters,
    metrics = mlflow_metrics,
    artifacts = mlflow_artifacts,
    model = mlflow_model
)

Params
Met
art
Model
'MlflowClient' object has no attribute 'pytorch'


False

In [22]:
import os
import mlflow
import mlflow.sklearn

In [23]:
mlflow.set_tracking_uri('http://127.0.0.1:5000')

In [24]:
os.environ['MLFLOW_S3_ENDPOINT_URL'] = "http://127.0.0.1:9000"
os.environ['AWS_ACCESS_KEY_ID'] = 'minio'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'minio123'

In [25]:
experiment_name = "demo_experiment"
try:
    mlflow.create_experiment(experiment_name, artifact_location="s3://mlflow/mlruns")
except MlflowException as e:
    print(e)
mlflow.set_experiment(experiment_name)

<Experiment: artifact_location='s3://mlflow/mlruns', creation_time=1711293222724, experiment_id='7', last_update_time=1711293222724, lifecycle_stage='active', name='demo_experiment', tags={}>

In [26]:
mlflow.start_run()
# Log a parameter (key-value pair)
mlflow.log_param("param1", 5)
# Log a metric; metrics can be updated throughout the run
mlflow.log_metric("foo", 1)
mlflow.log_metric("foo", 2)
mlflow.log_metric("foo", 3)
# Log an artifact (output file)
with open("output.txt", "w") as f:
    f.write("Hello world!")
mlflow.log_artifact("output.txt")
mlflow.end_run()