# LR示例

以下示例仅用于说明LR的工作流程，目前仅支持单机测试。

PSI

In [2]:
import pandas as pd
import os
from PSI import PSICompany, PSIPartner

project_dir = os.path.dirname(os.path.abspath(''))
data_dir = os.path.join(project_dir, 'Datasets', 'data', 'data')
host_data = pd.read_csv(os.path.join(data_dir, 'breast_hetero_host.csv'))
guest_data = pd.read_csv(os.path.join(data_dir, 'breast_hetero_guest.csv'))

company_key, company_features = host_data['id'], host_data.drop(columns=['id'])
partner_key, partner_features = guest_data['id'], guest_data.drop(columns=['id'])
company_key = company_key.astype(str)
partner_key = partner_key.astype(str)
company = PSICompany(company_key, company_features)
partner = PSIPartner(partner_key, partner_features)

U_c, company_pk = company.exchange()
E_c, U_p, partner_pk = partner.exchange(U_c, company_pk)
L, R_cI = company.compute_intersection(E_c, U_p, partner_pk)
R_pI = partner.output_shares(L)
company_share = R_cI[0]
partner_share = R_pI[0]

Computing masked company cipher
Computing masked partner cipher
Computing company shares
Computing partner shares


此时share是`np.ndarray`类型。下面将其放入秘密共享设备spu中。
目前我暂时还没有找到由share直接构造`SPUObject`的方法。可以暂时使用这个方法：

In [24]:
from common import MPCInitializer, sigmoid, softmax
import secretflow as sf
import numpy as np
import jax.numpy as jnp
mpc_init = MPCInitializer()
company, partner, coordinator, spu = mpc_init.company, mpc_init.partner, mpc_init.coordinator, mpc_init.spu
# 假设y由company持有
label_holder = company

def share2spu(X1 : np.ndarray, X2 : np.ndarray):
    """
    X1：由Company持有的share
    X2：由Partner持有的share
    """    
    X1, X2 = jnp.array(X1,dtype=jnp.float32), jnp.array(X2,dtype=jnp.float32)
    # 将X1，X2分别移动到spu
    X1 = sf.to(company, X1).to(spu)
    X2 = sf.to(partner, X2).to(spu)
    # 再在spu内部相加，得到秘密共享的变量X
    def add(X1, X2):
        """
        在SPU中执行加法操作
        """
        return X1 + X2
    return spu(add)(X1, X2)

划分训练集和测试集

In [None]:
from sklearn.model_selection import train_test_split


train_1, test_1, train_2, test_2 = train_test_split(
    company_share, partner_share)
train_X1, train_y1 = train_1[:, :-1], train_1[:, -1]
train_X2, train_y2 = train_2[:, :-1], train_2[:, -1]
test_X1, test_y1 = test_1[:, :-1], test_1[:, -1]
test_X2, test_y2 = test_2[:, :-1], test_2[:, -1]

test_X = share2spu(test_X1, test_X2)
# 目前的模型在推理状态下，预测值按公开处理，因此测试集的y也公开
test_y = test_y1 + test_y2

2025-07-28 19:15:51,484	ERROR worker.py:422 -- Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::SPURuntime.del_share()[39m (pid=6761, ip=192.168.251.137, actor_id=9db9b1eec314a615144a1a7e01000000, repr=SPURuntime(device_id=None, party=company))
  At least one of the input arguments for this task could not be computed:
ray.exceptions.RayTaskError: [36mray::SPURuntime.run()[39m (pid=6761, ip=192.168.251.137, actor_id=9db9b1eec314a615144a1a7e01000000, repr=SPURuntime(device_id=None, party=company))
  At least one of the input arguments for this task could not be computed:
ray.exceptions.RayTaskError: [36mray::compile_fn()[39m (pid=6759, ip=192.168.251.137)
  File "/home/lvx_vitae/AnonymVFL/.conda/lib/python3.10/site-packages/spu/utils/frontend.py", line 269, in compile
    name = fn.func.__name__ if isinstance(fn, functools.partial) else fn.__name__
AttributeError: 'jaxlib.xla_extension.Frame' object has no attribute '__name__'

During handling of the above 

训练集划分batch以实现批量训练

In [29]:
num_samples, num_features = train_X1.shape  # train_X1和train_X2的样本数相同

batch_size = 1024
Xs = []
ys = []
for j in range(0,num_samples,batch_size):
    batch = min(batch_size,num_samples - j)
    X_batch = share2spu(train_X1[j:j+batch], train_X2[j:j+batch])
    y_batch = share2spu(train_y1[j:j+batch], train_y2[j:j+batch])
    Xs.append(X_batch)
    ys.append(y_batch)

训练指定的轮次

In [30]:
from LR import SSLR

model = SSLR(num_features)
model.fit(Xs, ys, n_iter=10)

Epoch 1


5it [00:25,  5.07s/it]


KeyboardInterrupt: 

或手动训练，绘制损失曲线

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from tqdm import tqdm

n_iter = 20
accs = []
max_acc = 0
for t in range(1,n_iter + 1):
    print(f"Epoch {t}")
    for X,y in tqdm(zip(Xs, ys)):
        y_pred = model.forward(X)
        model.backward(X, y, y_pred, 0.1 / t)

    y_pred = model.predict(test_X)
    Accracy = accuracy_score(test_y, y_pred)
    if Accracy > max_acc:
        max_acc = Accracy
        print(f"Iteration {t}, Accuracy: {Accracy:.4f}")
    accs.append(Accracy)

plt.plot(accs,label = "SSLR",color = "blue")