In [9]:
# !pipeline init --ip 192.168.56.22 --port 9380

Pipeline configuration succeeded.


In [10]:
import os
import pandas as pd
import argparse
from typing import List

from pipeline.backend.pipeline import PipeLine
from pipeline.component import DataTransform
from pipeline.component import Evaluation
from pipeline.component import HeteroLR
from pipeline.component import Intersection
from pipeline.component import Reader
from pipeline.interface import Data
from pipeline.utils.tools import load_job_config

DATA_DIR = "E:\\data\\WorkPlace\\Pycharm\\BackDoor_FateAI_Jupyter"

In [11]:
def main(data_dir: str, config: str="./config.yaml", namespace: str="", poisoned_ids: List[int]=[]):
    !pipeline init --ip 192.168.56.22 --port 9380
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    # Setup the configuration for the training data
    guest_train_data = {"name": "breast_hetero_guest_rogue", "namespace": f"experiment{namespace}"}
    host_train_data = {"name": "breast_hetero_host", "namespace": f"experiment{namespace}"}

    # Setup the configuration for the evaluation data
    guest_eval_data_clean = {"name": "breast_hetero_guest", "namespace": "experiment"}
    guest_eval_data_rogue = {"name": "breast_hetero_guest_rogue", "namespace": "experiment"}
    host_eval_data = {"name": "breast_hetero_host", "namespace": "experiment"}

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0", output_format='dense')
    # get DataTransform party instance of guest
    data_transform_0_guest_party_instance = data_transform_0.get_party_instance(role='guest', party_id=guest)
    # configure DataTransform for guest
    data_transform_0_guest_party_instance.component_param(with_label=True)
    # get and configure DataTransform party instance of host
    data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=False)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0")

    lr_param = {
        "name": "hetero_lr_0",
        "penalty": "L2",
        "optimizer": "nesterov_momentum_sgd",
        "tol": 0.0001,
        "alpha": 0.01,
        "max_iter": 10,
        "early_stop": "weight_diff",
        "batch_size": -1,
        "learning_rate": 0.15,
        "init_param": {
            "init_method": "zeros"
        },
        "sqn_param": {
            "update_interval_L": 3,
            "memory_M": 5,
            "sample_size": 5000,
            "random_seed": None
        },
        "cv_param": {
            "n_splits": 5,
            "shuffle": False,
            "random_seed": 103,
            "need_cv": False
        }
    }

    hetero_lr_0 = HeteroLR(**lr_param)

    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data))
    pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data))

    # evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")
    # pipeline.add_component(evaluation_0, data=Data(data=hetero_lr_0.output.data))

    pipeline.compile()
    pipeline.fit()
    
    # Deploy the pipeline
    pipeline.deploy_component([data_transform_0, intersection_0, hetero_lr_0])
    
    !flow init --ip 192.168.56.22 --port 9380
    
    # Download the predictions
    # using the FLOW CLI client
    train_job_id = pipeline.get_train_job_id()
    os.system(
        f"flow component output-data -j {train_job_id} -r guest -p 9999 -cpn hetero_lr_0 --output-path {data_dir}")
    
    # Load in the data
    predictions_dir = os.path.join(data_dir, f"job_{train_job_id}_hetero_lr_0_guest_9999_output_data")
    df = pd.read_csv(os.path.join(predictions_dir, "data.csv"), index_col=False)
    
    # Compute the success rate
    # This is simply the proportion of correctly classified backdoor samples
    success_rate = (df[df['id'].isin(poisoned_ids)]['predict_result'] == 1).mean()
    
    # Compute the poisoning percentage
    poisoning_percentage = len(poisoned_ids) / df.shape[0]
    
    # Setup a prediction pipeline, that will be used to obtain the predictions on the clean 
    # evaluation data
    predict_pipeline = PipeLine()
    
    # Setup a new reader for the evaluation data
    reader_1 = Reader(name="reader_1")
    reader_1.get_party_instance(role="guest", party_id=guest).component_param(table=guest_eval_data_clean)
    reader_1.get_party_instance(role="host", party_id=host).component_param(table=host_eval_data)
    
    # Define the Evaluation component
    # that will compute the AUC score
    evaluation_clean = Evaluation(name="evaluation_clean")
    evaluation_clean.get_party_instance(role="guest", party_id=guest).component_param(need_run=True, eval_type="binary")
    evaluation_clean.get_party_instance(role="host", party_id=host).component_param(need_run=False)
    
    # Setup the pipeline
    predict_pipeline.add_component(reader_1)
    predict_pipeline.add_component(pipeline,data=Data(predict_input={pipeline.data_transform_0.input.data: reader_1.output.data}))
    predict_pipeline.add_component(evaluation_clean, data=Data(data=pipeline.hetero_lr_0.output.data))
    
    # Run the pipeline to obtain the predictions
    predict_pipeline.predict()
    
    # Obtain the clean evaluation summary from the evaluation pipeline
    clean_summary = predict_pipeline.get_component("evaluation_clean").get_summary()
    
    # Extract the AUC
    clean_auc = clean_summary['hetero_lr_0']['predict']['auc']
    
    # Keep track of the results so far
    with open(os.path.join(data_dir, "results_new.txt"), "a+") as f:
        f.write(f"{poisoning_percentage},{success_rate},{clean_auc}\n")

In [14]:
if __name__ == "__main__":
    main(data_dir=DATA_DIR)

[32m2023-04-11 19:48:24.578[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m83[0m - [1mJob id is 202304111148237557010
[0m
[32m2023-04-11 19:48:24.589[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m98[0m - [1m[80D[1A[KJob is still waiting, time elapse: 0:00:00[0m
[32m2023-04-11 19:48:25.600[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m98[0m - [1m[80D[1A[KJob is still waiting, time elapse: 0:00:01[0m
[0mm2023-04-11 19:48:26.623[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m125[0m - [1m
[32m2023-04-11 19:48:26.624[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component reader_0, time elapse: 0:00:02[0m
[32m2023-04-11 19:48:27.643[0m | [1mINFO    

[32m2023-04-11 19:49:00.588[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component hetero_lr_0, time elapse: 0:00:36[0m
[32m2023-04-11 19:49:01.620[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component hetero_lr_0, time elapse: 0:00:37[0m
[32m2023-04-11 19:49:02.643[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component hetero_lr_0, time elapse: 0:00:38[0m
[32m2023-04-11 19:49:03.662[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component hetero_lr_0, time elapse: 0:00:39[0m
[32m2023-04-11 19:49:04.684[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m

[32m2023-04-11 19:49:40.673[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component evaluation_0, time elapse: 0:01:16[0m
[32m2023-04-11 19:49:41.695[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component evaluation_0, time elapse: 0:01:17[0m
[32m2023-04-11 19:49:42.717[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component evaluation_0, time elapse: 0:01:18[0m
[32m2023-04-11 19:49:43.743[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127[0m - [1m[80D[1A[KRunning component evaluation_0, time elapse: 0:01:19[0m
[32m2023-04-11 19:49:44.779[0m | [1mINFO    [0m | [36mpipeline.utils.invoker.job_submitter[0m:[36mmonitor_job_status[0m:[36m127