# vMLP Demo

In this demo, we use the homo_logistic_regression(https://github.com/FederatedAI/FATE/tree/master/examples/federatedml-1.x-examples/homo_logistic_regression example. It is:
* Homogeneous federated machine learning example, which both parties share same attributes but different samples;
* Use breast cancer data original from Kaggle: https://www.kaggle.com/uciml/breast-cancer-wisconsin-data
* For easy to demo, both party we use the same FATE cluster: 10000. But the underlayer is the same, each side threat the collborated party go through the network to another party. 

Step 0. Prepare the libary to manage federated machine learning.

In [1]:
import fml_manager
import json, time, requests
import os

manager = fml_manager.FMLManager()

Step 1. Upload the guest, host and test data. Because we use same cluster for this demo, we load all data in same NOTEBOOK. If we use another party for host, the host data should load in the NOTEBOOK of that party.

In [2]:
response = manager.load_data("examples/data/breast_homo_guest.csv", "homo_breast_guest", "homo_breast_guest", 1, 1, 10, 1.3)
output = json.loads(response.content)
print(output)
guest_job_id = output["jobId"]
guest_query_condition = {
    "job_id":guest_job_id
}
response = manager.load_data("examples/data/breast_homo_host.csv", "homo_breast_host", "homo_breast_host", 1, 1, 10, 1.3)
output = json.loads(response.content)
host_job_id = output["jobId"]
host_query_condition = {
    "job_id":host_job_id
}
response = manager.load_data("examples/data/breast_homo_test.csv", "homo_breast_test", "homo_breast_test", 1, 1, 10, 1.3)
output = json.loads(response.content)
test_job_id = output["jobId"]
test_query_condition = {
    "job_id":test_job_id
}

for i in range(500):
    time.sleep(1)
    guest_status = manager.query_job(guest_query_condition).json()["data"][0]["f_status"]
    host_status = manager.query_job(host_query_condition).json()["data"][0]["f_status"]
    test_status = manager.query_job(host_query_condition).json()["data"][0]["f_status"]
    
    print("{},{},{}".format(guest_status, host_status, test_status))
    
    if guest_status == "failed" or host_status == "failed" or test_status == "failed":
        print("Failed")
        raise Exception("Failed to run the jobs")
    if guest_status == "success" and host_status == "success" and test_status == "success":
        print("Success")       
        break

{'data': {'board_url': 'http://fateboard:8080/index.html#/dashboard?job_id=202005070650375318082&role=local&party_id=0', 'job_dsl_path': '/data/projects/fate/python/jobs/202005070650375318082/job_dsl.json', 'job_runtime_conf_path': '/data/projects/fate/python/jobs/202005070650375318082/job_runtime_conf.json', 'logs_directory': '/data/projects/fate/python/logs/202005070650375318082', 'namespace': 'homo_breast_guest', 'table_name': 'homo_breast_guest'}, 'jobId': '202005070650375318082', 'retcode': 0, 'retmsg': 'success'}
running,running,running
running,running,running
running,running,running
running,running,running
running,running,running
success,running,running
success,running,running
success,success,success
Success


Step 2. Create the steps DSL and configuration of each step for training.

In [3]:
dsl = '''
{
    "components" : {
        "dataio_0": {
            "module": "DataIO",
            "input": {
                "data": {
                    "data": [
                        "args.train_data"
                    ]
                }
            },
            "output": {
                "data": ["train"],
                "model": ["dataio"]
            }
         },
        "homo_lr_0": {
            "module": "HomoLR",
            "input": {
                "data": {
                    "train_data": [
                        "dataio_0.train"
                    ]
                }
            },
            "output": {
                "data": ["train"],
                "model": ["homolr"]
            }
        },
        "evaluation_0": {
            "module": "Evaluation",
            "input": {
                "data": {
                    "data": [
                        "homo_lr_0.train"
                    ]
                }
            },
            "output": {
                "data": ["evaluate"]
            }
        }
    }
}

'''

config = '''
{
    "initiator": {
        "role": "guest",
        "party_id": 10000
    },
    "job_parameters": {
        "work_mode": 1
    },
    "role": {
        "guest": [10000],
        "host": [10000],
        "arbiter": [10000]
    },
    "role_parameters": {
        "guest": {
            "args": {
                "data": {
                    "train_data": [{"name": "homo_breast_guest", "namespace": "homo_breast_guest"}]
                }
            }
        },
        "host": {
            "args": {
                "data": {
                    "train_data": [{"name": "homo_breast_host", "namespace": "homo_breast_host"}]
                }
            },
            "evaluation_0": {
                "need_run": [false]
            }
        }
    },
    "algorithm_parameters": {
        "dataio_0":{
            "with_label": true,
            "label_name": "y",
            "label_type": "int",
            "output_format": "dense"
        },
        "homo_lr_0": {
            "penalty": "L2",
            "optimizer": "sgd",
            "eps": 1e-5,
            "alpha": 0.01,
            "max_iter": 10,
            "converge_func": "diff",
            "batch_size": 500,
            "learning_rate": 0.15,
            "decay": 1,
            "decay_sqrt": true,
            "init_param": {
                "init_method": "zeros"
            },
            "encrypt_param": {
                "method": "Paillier"
            },
            "cv_param": {
                "n_splits": 4,
                "shuffle": true,
                "random_seed": 33,
                "need_cv": false
            }
        }
    }
}

'''

Step 3. Submit the training job to GUEST cluster. And it will notify and bring up the HOST cluster and train together. 

In [4]:
response = manager.submit_job(json.loads(dsl),json.loads(config))
manager.prettify(response, True)
stdout = json.loads(response.content)
jobId = stdout["jobId"]
query_condition = {
    "job_id":jobId
}

model_id, model_version = "", ""
for i in range(500):
    time.sleep(3)
    job_detail = manager.query_job(query_condition).json()
    final_status = job_detail["data"][0]["f_status"]
    print(final_status)
    
    if final_status == "failed":
        print("Failed")
        manager.prettify(job_detail, True)
        response = manager.fetch_job_log(jobId)
        raise Exception("Failed to run the job")
    if final_status == "success":
        print("Success")
        manager.prettify(response, True)
        output = json.loads(response.content)
        model_id, model_version = output["data"]["model_info"]["model_id"], output["data"]["model_info"]["model_version"]
        break

Success!
{
    "data": {
        "board_url": "http://fateboard:8080/index.html#/dashboard?job_id=202005070651022620445&role=guest&party_id=10000",
        "job_dsl_path": "/data/projects/fate/python/jobs/202005070651022620445/job_dsl.json",
        "job_runtime_conf_path": "/data/projects/fate/python/jobs/202005070651022620445/job_runtime_conf.json",
        "logs_directory": "/data/projects/fate/python/logs/202005070651022620445",
        "model_info": {
            "model_id": "arbiter-10000#guest-10000#host-10000#model",
            "model_version": "202005070651022620445"
        }
    },
    "jobId": "202005070651022620445",
    "retcode": 0,
    "retmsg": "success"
}
running
running
running
running
running
running
running
running
running
running
running
running
running
running
running
running
running
running
running
running
running
running
running
running
running
running
running
running
running
running
running
running
running
running
running
running
running
running
running
runni

Print existed model, the API is: ```print_model_version(self, role, party_id, model_id)```

In [5]:
if model_id != "":
    response = manager.print_model_version("guest","10000", model_id)

Success!
{
    "data": [
        {
            "commitId": "202005070651022620445",
            "log": "[AUTO] save model at 2020-05-07 06:53:06.274555.",
            "name": "202005070651022620445",
            "namespace": "guest#10000#arbiter-10000#guest-10000#host-10000#model",
            "parent": null,
            "repeatCommit": true,
            "tag": null
        }
    ],
    "retcode": 0,
    "retmsg": "success"
}


Output the model, the API is ```model_output(self, role, party_id, model_id, model_versionï¼Œmodel_component)```. The ```model_component``` is what you defined in training conf. The output is base64 encoding, which need decoding and parsing back.

In [6]:
response = manager.model_output("guest","10000", model_id, model_version, "homo_lr_0.homolr:HomoLogisticRegression")

{'metadata': 'CgJMMhHxaOOItfjkPhl7FK5H4XqEPyIDc2dkMPQDOTMzMzMzM8M/QApKBGRpZmZQAlgB', 'parameters': 'CAoSUP4QBQQ+79o/Y+ScSvKa1j+RTreM/XfUPyk/T2KsNtM/ik0jT2po0j+lMOUXo9zRPwWjqz1MetE/3VWUfhkz0T8KB2etSv7QP5ymdO1A1tA/Ig0KAngwEbJDSoWA/sG/Ig0KAngxEcob/dudLr6/Ig0KAngyEWYZ+hcRpMG/Ig0KAngzEfWv/7Rc+r+/Ig0KAng0EbDD/W9RW7m/Ig0KAng1Eb6OMlppJri/Ig0KAng2EbIEbULVqru/Ig0KAng3ERQQVXbn+MG/Ig0KAng4EW6/Nt4yu7W/Ig0KAng5EQJZbnGtrKW/Ig4KA3gxMBEsGFvBSI3AvyIOCgN4MTERZZM+KKAGur8iDgoDeDEyEdGPglYzgMC/Ig4KA3gxMxHa0QRjolK+vyIOCgN4MTQR9tqVWW+Fr78iDgoDeDE1ETwimmbYbLO/Ig4KA3gyMBG7wwtUoXG0vyIOCgN4MTYRIGmQvHuRur8iDgoDeDIxEdqP/oYJfnY/Ig4KA3gxNxFSVtQreM/AvyIOCgN4MjIRzQmY7no3sr8iDgoDeDIzEcwFGjerVbK/Ig4KA3gxOBHDXhlmJQ2rvyIOCgN4MjQRoPtqyzc7iD8iDgoDeDE5EXOSYAuFHaM/Ig4KA3gyNRGNsnK5wSqBvyIOCgN4MjYRna/Rsrf0Gj8iDgoDeDI3EUcfkMZaT6a/Ig4KA3gyOBHXUPahnWidPyIOCgN4MjkRXdrDc3/ooD8pCAy8iYha4T8yAngwMgJ4MTICeDIyAngzMgJ4NDICeDUyAng2MgJ4NzICeDgyAng5MgN4MTAyA3gxMTIDeDEyMgN4MTMyA3gxNDIDeDE1MgN4MTYyA3gxNzIDeDE4MgN4MTkyA3gyMDIDeDIxMgN4MjIyA3gyMzID

And we can try offline prediction feature. Prediction also need both parts participant.

In [7]:
is_vertical = False
initiator_party_role = "guest"
initiator_party_id = "10000"
work_mode = 1
federated_roles = {
        "guest": [10000],
        "host": [10000],
        "arbiter": [10000]
}
guest_data_name = "homo_breast_test"
guest_data_namespace = "homo_breast_test"
host_data_name = "homo_breast_test"
host_data_namespace = "homo_breast_test"

response = manager.offline_predict_on_dataset(is_vertical, initiator_party_role, initiator_party_id, work_mode, model_id, model_version, federated_roles, guest_data_name, guest_data_namespace, host_data_name, host_data_namespace)
print(response.text)

{"data":{"board_url":"http://fateboard:8080/index.html#/dashboard?job_id=202005070653465729886&role=guest&party_id=10000","job_dsl_path":"/data/projects/fate/python/jobs/202005070653465729886/job_dsl.json","job_runtime_conf_path":"/data/projects/fate/python/jobs/202005070653465729886/job_runtime_conf.json","logs_directory":"/data/projects/fate/python/logs/202005070653465729886","model_info":{"model_id":"arbiter-10000#guest-10000#host-10000#model","model_version":"202005070651022620445"}},"jobId":"202005070653465729886","retcode":0,"retmsg":"success"}



The result can be checked in FATE-Board.