# 基于隐语Secretflow框架的水平联邦学习落地实践——学生在校表现预测的多方联合训练
## 本地模拟测试代码

    该文档包含一个本地模拟流程，从一个.csv文件开始，模拟了双方联合训练的全流程。

### 数据拆分及预处理

In [1]:
import tempfile
import pandas as pd
import secretflow as sf

alldata_df = pd.read_csv("./student-mat.csv")
h_alice_df = alldata_df.loc[:100]
h_bob_df = alldata_df.loc[100:200]
h_test_df=alldata_df.loc[200:]

_, h_alice_path = tempfile.mkstemp()
_, h_bob_path = tempfile.mkstemp()
_,h_test_path=tempfile.mkstemp()
h_alice_df.to_csv("./A.csv", index=False)
h_bob_df.to_csv("./B.csv", index=False)
h_test_df.to_csv("./test.csv", index=False)

### 基于pytorch后端的FLModel联邦学习模拟

    首先经过隐语框架将两份数据分别加载到Frame中（这个过程中双方的数据没有出域）。
    我们要预测的目标特征为G3，即学生的最终测试成绩。我们将数据分为G3与其他特征，用于在后续模型中进行训练。

In [2]:
# Check the version of your SecretFlow
print('The version of SecretFlow: {}'.format(sf.__version__))

# In case you have a running secretflow runtime already.
sf.shutdown()
sf.init(['alice', 'bob', 'charlie'], address="local", log_to_driver=True)
alice, bob, charlie = sf.PYU('alice'), sf.PYU('bob'), sf.PYU('charlie')

from secretflow.data.horizontal import read_csv
from secretflow.security.aggregation.plain_aggregator import PlainAggregator
from secretflow.security.compare.plain_comparator import PlainComparator
from secretflow.data.split import train_test_split

path_dict = {alice: "./A.csv", bob: "./B.csv"}

aggregator = PlainAggregator(charlie)
comparator = PlainComparator(charlie)

hdf = read_csv(filepath=path_dict, aggregator=aggregator, comparator=comparator)
train_label = hdf["G3"]
train_data = hdf.drop(columns="G3")

testframe = pd.read_csv("./test.csv")
test_label = testframe["G3"]
test_data = testframe.drop(columns="G3")

The version of SecretFlow: 1.9.0b1


  self.pid = _posixsubprocess.fork_exec(
2024-09-10 00:46:00,674	INFO worker.py:1724 -- Started a local Ray instance.
INFO:root:Create proxy actor <class 'secretflow.device.proxy.ActorPartitionAgent'> with party alice.
INFO:root:Create proxy actor <class 'secretflow.device.proxy.ActorPartitionAgent'> with party bob.


    接下来开始训练

In [3]:
from secretflow.ml.nn.core.torch import (
    metric_wrapper,
    optim_wrapper,
    BaseModule,
    TorchModel,
)
from secretflow.ml.nn import FLModel
from torchmetrics import Accuracy, Precision
from secretflow.security.aggregation import SecureAggregator
from torch import nn, optim
from torch.nn import functional as F

class ConvNet(BaseModule):
    """Small ConvNet for MNIST."""

    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 3, kernel_size=3)
        self.fc_in_dim = 192
        self.fc = nn.Linear(self.fc_in_dim, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 3))
        x = x.view(-1, self.fc_in_dim)
        x = self.fc(x)
        return F.softmax(x, dim=1)

class SimpleNN(BaseModule):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(31, 64)  # 输入特征数31，隐藏层神经元数64（可以调整）
        self.fc2 = nn.Linear(64, 32)  # 隐藏层神经元数64，下一层神经元数32（可以调整）
        self.fc3 = nn.Linear(32, 10)  # 最后一层，假设有10个类别

    def forward(self, x):
        x = F.relu(self.fc1(x))  # 第一个全连接层及激活函数
        x = F.relu(self.fc2(x))  # 第二个全连接层及激活函数
        x = self.fc3(x)          # 最后一层输出 logits
        return F.softmax(x, dim=1)  # 输出概率分布

loss_fn = nn.CrossEntropyLoss
optim_fn = optim_wrapper(optim.Adam, lr=1e-2)
model_def = TorchModel(
    model_fn=SimpleNN,
    loss_fn=loss_fn,
    optim_fn=optim_fn,
    metrics=[
        metric_wrapper(Accuracy, task="multiclass", num_classes=10, average='micro'),
        metric_wrapper(Precision, task="multiclass", num_classes=10, average='micro'),
    ],
)


In [4]:
device_list = [alice, bob]
server = charlie
aggregator = SecureAggregator(server, [alice, bob])

# spcify params
fl_model = FLModel(
    server=server,
    device_list=device_list,
    model=model_def,
    aggregator=aggregator,
    strategy='fed_avg_w',  # fl strategy
    backend="torch",  # backend support ['tensorflow', 'torch']
)

INFO:root:Create proxy actor <class 'secretflow.device.proxy.Actor_Masker'> with party alice.
INFO:root:Create proxy actor <class 'secretflow.device.proxy.Actor_Masker'> with party bob.
INFO:root:Create proxy actor <class 'abc.ActorPYUFedAvgW'> with party alice.
INFO:root:Create proxy actor <class 'abc.ActorPYUFedAvgW'> with party bob.


In [5]:
history = fl_model.fit(
    train_data,
    train_label,
    validation_data=(test_data, test_label),
    epochs=20,
    batch_size=16,
    aggregate_freq=1,
)

INFO:root:FL Train Params: {'x': HDataFrame(partitions={PYURuntime(alice): <secretflow.data.core.partition.Partition object at 0x7f161070b430>, PYURuntime(bob): <secretflow.data.core.partition.Partition object at 0x7f161070b610>}, aggregator=<secretflow.security.aggregation.plain_aggregator.PlainAggregator object at 0x7f156710f7f0>, comparator=PlainComparator(device=PYURuntime(charlie))), 'y': HDataFrame(partitions={PYURuntime(alice): <secretflow.data.core.partition.Partition object at 0x7f1566f11f00>, PYURuntime(bob): <secretflow.data.core.partition.Partition object at 0x7f161070b2b0>}, aggregator=<secretflow.security.aggregation.plain_aggregator.PlainAggregator object at 0x7f156710f7f0>, comparator=PlainComparator(device=PYURuntime(charlie))), 'batch_size': 16, 'batch_sampling_rate': None, 'epochs': 20, 'verbose': 1, 'callbacks': None, 'validation_data': (     otherschool  sex  age  address  fa1ize  Pstatus  1edu  0edu  1job  0job  \
0              0    0   16        0       1       

Epoch 1/20


RayTaskError(RuntimeError): [36mray::ActorPYUFedAvgW.train_step()[39m (pid=607105, ip=172.25.205.26, actor_id=1a6f3c4cad396403b6f03e2101000000, repr=<abc.ActorPYUFedAvgW object at 0x7f4b2ba95120>)
  File "/home/maoyu/anaconda3/envs/pytorch/lib/python3.10/site-packages/secretflow/device/proxy.py", line 77, in wrapper
    return method(*args, **kwargs)
  File "/home/maoyu/anaconda3/envs/pytorch/lib/python3.10/site-packages/secretflow/ml/nn/fl/backend/torch/strategy/fed_avg_w.py", line 68, in train_step
    loss = self.model.training_step((x, y), cur_steps + step, sample_weight=s_w)
  File "/home/maoyu/anaconda3/envs/pytorch/lib/python3.10/site-packages/secretflow/ml/nn/core/torch/module.py", line 174, in training_step
    _, loss = self.forward_step(batch, batch_idx, dataloader_idx)
  File "/home/maoyu/anaconda3/envs/pytorch/lib/python3.10/site-packages/secretflow/ml/nn/core/torch/module.py", line 205, in forward_step
    y_pred = self(x)
  File "/home/maoyu/anaconda3/envs/pytorch/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/home/maoyu/anaconda3/envs/pytorch/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
    return forward_call(*args, **kwargs)
  File "/tmp/ipykernel_605466/605327549.py", line 37, in forward
  File "/home/maoyu/anaconda3/envs/pytorch/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/home/maoyu/anaconda3/envs/pytorch/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/maoyu/anaconda3/envs/pytorch/lib/python3.10/site-packages/torch/nn/modules/linear.py", line 114, in forward
    return F.linear(input, self.weight, self.bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (16x32 and 31x64)

2024-09-10 00:46:25,714	ERROR worker.py:405 -- Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::Actor_Masker.mask()[39m (pid=606956, ip=172.25.205.26, actor_id=abc622cdae868e05f8ec7b2e01000000, repr=<secretflow.device.proxy.Actor_Masker object at 0x7fd7576df5b0>)
  At least one of the input arguments for this task could not be computed:
ray.exceptions.RayTaskError: [36mray::ActorPYUFedAvgW.train_step()[39m (pid=607105, ip=172.25.205.26, actor_id=1a6f3c4cad396403b6f03e2101000000, repr=<abc.ActorPYUFedAvgW object at 0x7f4b2ba95120>)
  File "/home/maoyu/anaconda3/envs/pytorch/lib/python3.10/site-packages/secretflow/device/proxy.py", line 77, in wrapper
    return method(*args, **kwargs)
  File "/home/maoyu/anaconda3/envs/pytorch/lib/python3.10/site-packages/secretflow/ml/nn/fl/backend/torch/strategy/fed_avg_w.py", line 68, in train_step
    loss = self.model.training_step((x, y), cur_steps + step, sample_weight=s_w)
  File "/home/maoyu/anaconda3/envs/pytorch/l

In [None]:
from matplotlib import pyplot as plt

# Draw accuracy values for training & validation
plt.plot(history["global_history"]['multiclassaccuracy'])
plt.plot(history["global_history"]['val_multiclassaccuracy'])
plt.title('FLModel accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Valid'], loc='upper left')
plt.show()