In [13]:
import secretflow as sf
import spu
import ray

sf.shutdown()

sf.init(parties=['alice','bob'], address='local')

2023-07-26 15:01:05,010	INFO worker.py:1529 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8266 [39m[22m


In [14]:
import numpy as np
from sklearn.datasets import load_iris

data, _ = load_iris(return_X_y=True,as_frame=True)
data['uid'] = np.arange(len(data)).astype('str')
data['month'] = ['July'] * (len(data) // 2) + ['August'] * (len(data) - len(data) // 2)

data

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),uid,month
0,5.1,3.5,1.4,0.2,0,July
1,4.9,3.0,1.4,0.2,1,July
2,4.7,3.2,1.3,0.2,2,July
3,4.6,3.1,1.5,0.2,3,July
4,5.0,3.6,1.4,0.2,4,July
...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,145,August
146,6.3,2.5,5.0,1.9,146,August
147,6.5,3.0,5.2,2.0,147,August
148,6.2,3.4,5.4,2.3,148,August


In [15]:
# 随机拆分数据分给双方
import os
import pandas

dir = os.getcwd()
os.makedirs('.data', exist_ok=True)
data_alice, data_bob = data.sample(frac=0.8), data.sample(frac=0.7)

data_alice.to_csv('.data/alice.csv',index=False)
data_bob.to_csv('.data/bob.csv',index=False)

In [16]:
import spu
# 创建一个集群的配置
cluster_def={
    'nodes': [
        {
            'party': 'alice',
            'address': '192.168.11.93:5000',
        },
        {
            'party': 'bob',
            'address': '192.168.11.93:5000',
        },
    ],
    'runtime_config': {
        'protocol': spu.spu_pb2.SEMI2K,
        'field': spu.spu_pb2.FM128,
        'sigmoid_mode': spu.spu_pb2.RuntimeConfig.SIGMOID_REAL,
    }
}

alice = sf.PYU('alice')
bob = sf.PYU('bob')
# spu_psi = sf.SPU(cluster_def=cluster_def)
spu_psi = sf.SPU(sf.utils.testing.cluster_def(['alice','bob']))
print(spu_psi)

<secretflow.device.device.spu.SPU object at 0x2b3dc3730>


In [17]:
input_path = {alice:'.data/alice.csv' , bob: '.data/bob.csv'}
output_path = {alice:'.data/alice_psi.csv' , bob : '.data/bob_psi.csv'}
spu_psi.psi_csv('uid',input_path,output_path,'alice')

[2m[36m(SPURuntime(device_id=None, party=alice) pid=17562)[0m 2023-07-26 15:01:09.759 [info] [bucket_psi.cc:Init:234] bucket size set to 1048576
[2m[36m(SPURuntime(device_id=None, party=alice) pid=17562)[0m 2023-07-26 15:01:09.759 [info] [bucket_psi.cc:CheckInput:149] Begin sanity check for input file: .data/alice.csv, precheck_switch:true
[2m[36m(SPURuntime(device_id=None, party=alice) pid=17562)[0m 2023-07-26 15:01:09.761 [info] [csv_checker.cc:CsvChecker:121] Executing duplicated scripts: LC_ALL=C sort --buffer-size=1G --temporary-directory=.data --stable selected-keys.1690354869760028000 | LC_ALL=C uniq -d > duplicate-keys.1690354869760028000
[2m[36m(SPURuntime(device_id=None, party=alice) pid=17562)[0m 2023-07-26 15:01:09.768 [info] [bucket_psi.cc:CheckInput:166] End sanity check for input file: .data/alice.csv, size=120
[2m[36m(SPURuntime(device_id=None, party=alice) pid=17562)[0m 2023-07-26 15:01:09.768 [info] [bucket_psi.cc:RunPsi:267] Run psi protocol=2, self_it

[{'party': 'alice', 'original_count': 120, 'intersection_count': 84},
 {'party': 'bob', 'original_count': 105, 'intersection_count': 84}]

[2m[36m(SPURuntime(device_id=None, party=alice) pid=17562)[0m 2023-07-26 15:01:09.806 [info] [bucket_psi.cc:RunBucketPsi:363] psi protocol=2, result_size=84
[2m[36m(SPURuntime(device_id=None, party=alice) pid=17562)[0m 2023-07-26 15:01:09.806 [info] [bucket_psi.cc:ProduceOutput:187] Begin post filtering, indices.size=84, should_sort=true
[2m[36m(SPURuntime(device_id=None, party=alice) pid=17562)[0m 2023-07-26 15:01:09.806 [info] [utils.cc:MultiKeySort:88] Executing sort scripts: tail -n +2 .data/tmp-sort-in-1690354869806628000 | LC_ALL=C sort --buffer-size=3G --parallel=8 --temporary-directory=./ --stable --field-separator=, --key=5,5 >>.data/tmp-sort-out-1690354869806628000
[2m[36m(SPURuntime(device_id=None, party=alice) pid=17562)[0m 2023-07-26 15:01:09.811 [info] [utils.cc:MultiKeySort:90] Finished sort scripts: tail -n +2 .data/tmp-sort-in-1690354869806628000 | LC_ALL=C sort --buffer-size=3G --parallel=8 --temporary-directory=./ --stable --field-separator=, --key=5,5 >>