# Homo LR Demo

In this demo, we use the homo_logistic_regression(https://github.com/FederatedAI/FATE/tree/master/examples/federatedml-1.x-examples/homo_logistic_regression example. It is:
* Homogeneous federated machine learning example, which both parties share same attributes but different samples;
* Use breast cancer data original from Kaggle: https://www.kaggle.com/uciml/breast-cancer-wisconsin-data
* For easy to demo, both party we use the same FATE cluster: 10000. But the underlayer is the same, each side threat the collborated party go through the network to another party. 

Step 0. Prepare the libary to manage federated machine learning.

In [None]:
import json
import time
import os
import requests

from fml_manager import *

manager = FMLManager()

## For more details about the FMLManager, please refer to this [document](https://kubefate.readthedocs.io/README.html)

Step 1. Upload the guest, host and test data. Because we use same cluster for this demo, we load all data in same NOTEBOOK. If we use another party for host, the host data should load in the NOTEBOOK of that party.

In [None]:
response = manager.load_data(url='./data/breast_homo_guest.csv', namespace='homo_breast_guest', table_name='homo_breast_guest', work_mode=1, head=1, partition=10)
output = json.loads(response.content)
print(output)
guest_job_id = output['jobId']
guest_query_condition = QueryCondition(job_id=guest_job_id)

response = manager.load_data(url='./data/breast_homo_host.csv', namespace='homo_breast_host', table_name='homo_breast_host', work_mode=1, head=1, partition=10)
output = json.loads(response.content)
host_job_id = output['jobId']
host_query_condition = QueryCondition(job_id=host_job_id)

response = manager.load_data(url='./data/breast_homo_test.csv', namespace='homo_breast_test', table_name='homo_breast_test', work_mode=1, head=1, partition=10)
output = json.loads(response.content)
test_job_id = output['jobId']
test_query_condition = QueryCondition(job_id=test_job_id)


manager.query_job_status(guest_query_condition)
manager.query_job_status(host_query_condition)
manager.query_job_status(host_query_condition)


Step 2. Create the steps DSL and configuration of each step for training.

In [None]:
# dsl
data_io = ComponentBuilder(name='dataio_0',
                           module='DataIO')\
                           .add_input_data('args.train_data')\
                           .add_output_data('train')\
                           .add_output_model('dataio').build()
        

homo_lr = ComponentBuilder(name='homo_lr_0',
                           module='HomoLR')\
                           .add_input_train_data('dataio_0.train')\
                           .add_output_data('train')\
                           .add_output_model('homolr').build()

evaluation = ComponentBuilder(name='evaluation_0',
                              module='Evaluation',
                              need_deploy=False)\
    .add_input_data('homo_lr_0.train')\
    .add_output_data('evaluate').build()

pipeline = Pipeline(
    data_io, 
    homo_lr, 
    evaluation
)

# Configuration
initiator = Initiator(role='guest', party_id=10000)

job_parameters = JobParametersBuilder()\
    .with_work_mode(1).build()

role = RoleBuilder()\
    .add_guest(party_id=10000)\
    .add_host(party_id=10000)\
    .add_arbiter(party_id=10000).build()

eval_config = {
       'need_run': [False]
 }

role_parameters = RoleParametersBuilder()\
    .add_guest_train_data(namespace='homo_breast_guest', name='homo_breast_guest')\
    .add_host_train_data(namespace='homo_breast_host', name='homo_breast_host')\
    .add_host_module_config(module='evaluation_0', config=eval_config).build()


homo_lr_params = {
            'penalty': 'L2',
            'optimizer': 'sgd',
            'eps': 1e-5,
            'alpha': 0.01,
            'max_iter': 10,
            'converge_func': 'diff',
            'batch_size': 500,
            'learning_rate': 0.15,
            'decay': 1,
            'decay_sqrt': True,
            'init_param': {
                'init_method': 'zeros'
            },
            'encrypt_param': {
                'method': 'Paillier'
            },
            'cv_param': {
                'n_splits': 4,
                'shuffle': True,
                'random_seed': 33,
                'need_cv': False
            }
        }
dotaio_config = {
            'with_label': True,
            'label_name': 'y',
            'label_type': 'int',
            'output_format': 'dense'
        }

algorithm_parameters = AlgorithmParametersBuilder()\
    .add_module_config(module='homo_lr_0', config=homo_lr_params)\
    .add_module_config(module='dataio_0', config=dotaio_config).build()

config = Config(
    initiator,
    job_parameters,
    role,
    role_parameters,
    algorithm_parameters
)

Step 3. Submit the training job to GUEST cluster. And it will notify and bring up the HOST cluster and train together. 

In [None]:
response = manager.submit_job(pipeline.to_dict(),config.to_dict())
manager.prettify(response, verbose=True)
stdout = json.loads(response.content)
job_id = stdout['jobId']
query_condition = QueryCondition(job_id)

model_id, model_version = '', ''
manager.query_job_status(query_condition, max_tries=20)

manager.prettify(response, verbose=True)
output = json.loads(response.content)
model_id, model_version = output['data']['model_info']['model_id'], output['data']['model_info']['model_version']

In [None]:
response = manager.model_output(role='guest', party_id='10000', model_id=model_id, model_version=model_version, model_component='homo_lr_0.homolr:HomoLogisticRegression')

And we can try offline prediction feature. Prediction also need both parts participant.

In [None]:
is_vertical = False
initiator_party_role = 'guest'
initiator_party_id = '10000'
work_mode = 1
federated_roles = {
        'guest': [10000],
        'host': [10000],
        'arbiter': [10000]
}
guest_data_name = 'homo_breast_test'
guest_data_namespace = 'homo_breast_test'
host_data_name = 'homo_breast_test'
host_data_namespace = 'homo_breast_test'

response = manager.offline_predict_on_dataset(is_vertical, initiator_party_role, initiator_party_id, work_mode, model_id, model_version, federated_roles, guest_data_name, guest_data_namespace, host_data_name, host_data_namespace)
print(response.text)

## Click [here](/fateboard-10000/) to view jobs in FATE Board