In [1]:
# 这个notebook 介绍了 如何对split learning 发起 inverse-model attack攻击

In [2]:
# 导包
import sys
sys.path.append('/home/dengruijun/data/FinTech/PP-Split/')
from ppsplit.attacks.model_inversion.inverse_model import InverseModelAttack
from ppsplit.utils.utils import create_dir
import torch
import os


# cifar10 （图像多分类）

In [2]:
# 导包和超参数设置
from target_model.data_preprocessing.preprocess_cifar10 import get_cifar10_normalize,get_one_data,deprocess

from target_model.models.VGG import VGG,VGG5Decoder,model_cfg
from target_model.models.splitnn_utils import split_weights_client

test_num = 2 # 测试编号（对应结果文件夹名称）
split_layer = 2 # 模型切割点 （split point）在该层之前的层（含），作为client的模型，之后的层作为server的模型

# 重要路径设置
unit_net_route = '/home/dengruijun/data/project/Inverse_efficacy/results/VGG5/BN+Tanh/2-20240101/VGG5-params-19ep.pth'
results_dir = f'../results/VGG5/{test_num}/'
inverse_dir = results_dir + 'layer'+str(split_layer)+'/'
decoder_net_route = results_dir + f'Decoder-layer{split_layer}.pth' # 攻击的decoder net存储位置


In [3]:
# 准备基本模型client net
# split_layer_list = list(range(len(model_cfg['VGG5']))) # 可能的切割点

# 创建对应文件夹
create_dir(results_dir)
create_dir(inverse_dir)

# 把unit模型切割成client-server 的模型pair
client_net = VGG('Client','VGG5',split_layer,model_cfg)
pweights = torch.load(unit_net_route)
if split_layer < len(model_cfg['VGG5']):
    pweights = split_weights_client(pweights,client_net.state_dict())
client_net.load_state_dict(pweights)


features.0.weight
features.0.bias
features.1.weight
features.1.bias
features.1.running_mean
features.1.running_var
features.1.num_batches_tracked
features.4.weight
features.4.bias
features.5.weight
features.5.bias
features.5.running_mean
features.5.running_var
features.5.num_batches_tracked


<All keys matched successfully>

In [4]:
# 准备inverse_model attack使用到的东西
# 创建Inverse Model Attack对象
im_attack = InverseModelAttack(decoder_route=decoder_net_route,data_type=1,inverse_dir=inverse_dir)

# 加载decoder模型
if os.path.isfile(decoder_net_route): # 如果已经训练好了
    print("=> loading decoder model '{}'".format(decoder_net_route))
    decoder_net = torch.load(decoder_net_route)
else: # 如果没有
    print("train decoder model...")
    decoder_net = VGG5Decoder(split_layer=split_layer)
    # 训练decoder
    trainloader,testloader = get_cifar10_normalize(batch_size=32)

    decoder_net= im_attack.train_decoder(client_net=client_net,decoder_net=decoder_net,
                            train_loader=trainloader,test_loader=testloader,
                            epochs=20)


=> loading decoder model '../results/VGG5/2/Decoder-layer2.pth'


In [5]:
# 实现攻击,恢复testloader中所有图片
trainloader,testloader = get_cifar10_normalize(batch_size=1)

im_attack.inverse(client_net=client_net,decoder_net=decoder_net,
                  train_loader=trainloader,test_loader=testloader,
                  deprocess=deprocess,
                  save_fake=True)

  0%|          | 0/10000 [00:00<?, ?it/s]

----train decoder----
client_net: 
VGG(
  (features): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): Tanh()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Tanh()
  )
  (denses): Sequential()
)
decoder_net: 
VGG5Decoder(
  (features): Sequential(
    (0): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): Tanh()
    (3): ConvTranspose2d(32, 3, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1))
    (4): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): Tanh()
  )
  (denses): S

100%|██████████| 10000/10000 [00:46<00:00, 215.86it/s]

SSIM: 0.8241773731291294,              MSE:0.0903856434754096
average time: 0.0016465278148651123 avg infer time:0.0011764520883560182





# Bank数据集 （表格数据二分类）

In [6]:
# 导包和超参数设置
from target_model.data_preprocessing.preprocess_bank import preprocess_bank

from target_model.models.BankNet import BankNet1,BankNetDecoder1,bank_cfg
from target_model.models.splitnn_utils import split_weights_client

test_num = 6 # 测试编号（对应结果文件夹名称）
split_layer = 2 # 模型切割点 （split point）在该层之前的层（含），作为client的模型，之后的层作为server的模型

# 重要路径设置
unit_net_route = '/home/dengruijun/data/FinTech/PP-Split/results/trained_models/Bank/bank-20ep_params.pth'
results_dir = f'../results/Bank/{test_num}/'
inverse_dir = results_dir + 'layer'+str(split_layer)+'/'
decoder_net_route = results_dir + f'Decoder-layer{split_layer}.pth' # 攻击的decoder net存储位置


In [7]:
# 准备target model的 client net（对模型进行切割）
create_dir(results_dir)
create_dir(inverse_dir)

client_net = BankNet1(layer=split_layer)
pweights = torch.load(unit_net_route)
if split_layer < len(bank_cfg):
    pweights = split_weights_client(pweights,client_net.state_dict())
client_net.load_state_dict(pweights)



linear1.weight
linear1.bias
linear2.weight
linear2.bias


<All keys matched successfully>

In [8]:
# 准备inverse_model attack使用到的东西
# 创建Inverse Model Attack对象
im_attack = InverseModelAttack(decoder_route=decoder_net_route,data_type=0,inverse_dir=inverse_dir)

# 加载decoder模型
if os.path.isfile(decoder_net_route): # 如果已经训练好了
    print("=> loading decoder model '{}'".format(decoder_net_route))
    decoder_net = torch.load(decoder_net_route)
else: # 如果没有
    print("train decoder model...")
    decoder_net = BankNetDecoder1(layer=split_layer)
    # 训练decoder
    trainloader,testloader = preprocess_bank(batch_size=32)

    decoder_net= im_attack.train_decoder(client_net=client_net,decoder_net=decoder_net,
                            train_loader=trainloader,test_loader=testloader,
                            epochs=20)

=> loading decoder model '../results/Bank/6/Decoder-layer2.pth'


In [9]:
# 实现攻击,恢复testloader中所有表格数据行
trainloader,testloader = preprocess_bank(batch_size=1)

im_attack.inverse(client_net=client_net,decoder_net=decoder_net,
                  train_loader=trainloader,test_loader=testloader,
                  save_fake=True)



  0%|          | 0/8238 [00:00<?, ?it/s]

X_train.shape: (32950, 63)
X_test.shape: (8238, 63)
y_train.shape: (32950, 1)
y_test.shape: (8238, 1) <class 'numpy.ndarray'>
----train decoder----
client_net: 
BankNet1(
  (linear1): Linear(in_features=63, out_features=128, bias=True)
  (linear2): Linear(in_features=128, out_features=64, bias=True)
)
decoder_net: 
BankNetDecoder1(
  (delinear1): Linear(in_features=64, out_features=128, bias=True)
  (ReLU1): ReLU()
  (delinear2): Linear(in_features=128, out_features=63, bias=True)
)


100%|██████████| 8238/8238 [00:20<00:00, 397.55it/s]


cosine: 0.9167903578593969,               Euclidean: 1.294159001821137,              MSE:0.042359481014795926
average time: 0.0006233145525571819 avg infer time:0.0003061840388924788


# Credit 数据集 （表格数据二分类）

In [3]:
# 导包和超参数设置
from target_model.data_preprocessing.preprocess_credit import preprocess_credit

from target_model.models.CreditNet import CreditNet1,CreditNetDecoder1,credit_cfg
from target_model.models.splitnn_utils import split_weights_client

test_num = 10 # 测试编号（对应结果文件夹名称）
split_layer = 3 # 模型切割点 （split point）在该层之前的层（含），作为client的模型，之后的层作为server的模型

# 重要路径设置
unit_net_route = '/home/dengruijun/data/FinTech/PP-Split/results/trained_models/credit/credit-20ep_params.pth'
results_dir = f'../results/Credit/{test_num}/'
inverse_dir = results_dir + 'layer'+str(split_layer)+'/' # 储存
decoder_net_route = results_dir + f'Decoder-layer{split_layer}.pth' # 攻击的decoder net存储位置

In [4]:
# 准备target model的 client net（对模型进行切割）
create_dir(results_dir)
create_dir(inverse_dir)

client_net = CreditNet1(layer=split_layer)
pweights = torch.load(unit_net_route)
if split_layer < len(credit_cfg):
    pweights = split_weights_client(pweights,client_net.state_dict())
client_net.load_state_dict(pweights)


linear1.weight
linear1.bias
batch_norm1.weight
batch_norm1.bias
batch_norm1.running_mean
batch_norm1.running_var
batch_norm1.num_batches_tracked
linear2.weight
linear2.bias


<All keys matched successfully>

In [5]:
# 准备inverse_model attack使用到的东西
# 创建Inverse Model Attack对象
im_attack = InverseModelAttack(decoder_route=decoder_net_route,data_type=0,inverse_dir=inverse_dir)

# 加载decoder模型
if os.path.isfile(decoder_net_route): # 如果已经训练好了
    print("=> loading decoder model '{}'".format(decoder_net_route))
    decoder_net = torch.load(decoder_net_route)
else: # 如果没有
    print("train decoder model...")
    decoder_net = CreditNetDecoder1(layer=split_layer)
    # optimizer = torch.optim.SGD(decoder_net.parameters(), 1e-3)

    # 训练decoder
    trainloader,testloader = preprocess_credit(batch_size=32)

    decoder_net= im_attack.train_decoder(client_net=client_net,decoder_net=decoder_net,
                            train_loader=trainloader,test_loader=testloader,
                            epochs=20,
                            # optimizer=optimizer
                            )

=> loading decoder model '../results/Credit/10/Decoder-layer3.pth'


In [6]:
# 实现攻击,恢复testloader中所有表格数据行
trainloader,testloader = preprocess_credit(batch_size=1)

im_attack.inverse(client_net=client_net,decoder_net=decoder_net,
                  train_loader=trainloader,test_loader=testloader,
                  save_fake=True)



  0%|          | 0/61503 [00:00<?, ?it/s]

X_train.shape: (246008, 250)
X_test.shape: (61503, 250)
y_train.shape: (246008, 1)
y_test.shape: (61503, 1) <class 'numpy.ndarray'>
----train decoder----
client_net: 
CreditNet1(
  (linear1): Linear(in_features=250, out_features=512, bias=True)
  (batch_norm1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (ReLU1): LeakyReLU(negative_slope=0.01)
  (linear2): Linear(in_features=512, out_features=128, bias=True)
)
decoder_net: 
CreditNetDecoder1(
  (delinear1): Linear(in_features=128, out_features=512, bias=True)
  (ReLU1): ReLU()
  (delinear2): Linear(in_features=512, out_features=250, bias=True)
)


100%|██████████| 61503/61503 [01:49<00:00, 562.62it/s] 


cosine: 0.9869267164826434,               Euclidean: 1.4236435810680315,              MSE:0.009183392138517262
average time: 0.00035009122411578644 avg infer time:0.00031623337855465504


# Purchase100 数据集 （表格数据多分类）

In [2]:
# 导包和超参数设置
from target_model.data_preprocessing.preprocess_purchase import preprocess_purchase

from target_model.models.PurchaseNet import PurchaseClassifier1,PurchaseDecoder1,purchase_cfg
from target_model.models.splitnn_utils import split_weights_client

test_num = 1.1 # 测试编号（对应结果文件夹名称）
split_layer = 3 # 模型切割点 （split point）在该层之前的层（含），作为client的模型，之后的层作为server的模型

# 重要路径设置
unit_net_route = '/home/dengruijun/data/FinTech/PP-Split/results/trained_models/Purchase100/Purchase_bestmodel_param.pth'
results_dir = f'../results/Purchase/{test_num}/'
inverse_dir = results_dir + 'layer'+str(split_layer)+'/'
decoder_net_route = results_dir + f'Decoder-layer{split_layer}.pth' # 攻击的decoder net存储位置


In [3]:
# 准备target model的 client net（对模型进行切割）
create_dir(results_dir)
create_dir(inverse_dir)

client_net = PurchaseClassifier1(layer=split_layer)
pweights = torch.load(unit_net_route)
if split_layer < len(purchase_cfg):
    pweights = split_weights_client(pweights,client_net.state_dict())
client_net.load_state_dict(pweights)



linear1.weight
linear1.bias
linear2.weight
linear2.bias


<All keys matched successfully>

In [4]:
# 准备inverse_model attack使用到的东西
# 创建Inverse Model Attack对象
im_attack = InverseModelAttack(decoder_route=decoder_net_route,data_type=0,inverse_dir=inverse_dir)

# 加载decoder模型
if os.path.isfile(decoder_net_route): # 如果已经训练好了
    print("=> loading decoder model '{}'".format(decoder_net_route))
    decoder_net = torch.load(decoder_net_route)
else: # 如果没有
    print("train decoder model...")
    decoder_net = PurchaseDecoder1(layer=split_layer)
    # 训练decoder
    trainloader,testloader = preprocess_purchase(batch_size=32)

    decoder_net= im_attack.train_decoder(client_net=client_net,decoder_net=decoder_net,
                            train_loader=trainloader,test_loader=testloader,
                            epochs=20)

train decoder model...
[('Tanh',), ('D', 1024, 512), ('Tanh',), ('D', 600, 1024)]
purchase100 dataset processing...
datset route: /home/dengruijun/data/FinTech/DATASET/kaggle-dataset/Purchase100//data.npz
original dataset shape:  (197324, 600)
After random selection, dataset shape:  (197324, 600)
After split between classifier and attack: 
training dataset shape:  (157859, 600)
testing dataset shape:  (39465, 600)


  0%|          | 0/4933 [00:00<?, ?it/s]

Data loading finished
----train decoder----
client_net: 
PurchaseClassifier1(
  (linear1): Linear(in_features=600, out_features=1024, bias=True)
  (Tanh1): Tanh()
  (linear2): Linear(in_features=1024, out_features=512, bias=True)
  (Tanh2): Tanh()
)
decoder_net: 
PurchaseDecoder1(
  (delinear1): Linear(in_features=512, out_features=1024, bias=True)
  (Tanh1): Tanh()
  (delinear2): Linear(in_features=1024, out_features=600, bias=True)
  (Tanh2): Tanh()
)
Epoch 0


100%|██████████| 4933/4933 [00:26<00:00, 184.74it/s]
  0%|          | 0/4933 [00:00<?, ?it/s]

--- epoch: 0, train_loss: [0.036627549678087234]
Epoch 1


100%|██████████| 4933/4933 [00:24<00:00, 203.88it/s]
  0%|          | 0/4933 [00:00<?, ?it/s]

--- epoch: 1, train_loss: [0.03575607389211655]
Epoch 2


100%|██████████| 4933/4933 [00:23<00:00, 208.19it/s]
  0%|          | 0/4933 [00:00<?, ?it/s]

--- epoch: 2, train_loss: [0.03371845558285713]
Epoch 3


100%|██████████| 4933/4933 [00:24<00:00, 204.38it/s]
  0%|          | 0/4933 [00:00<?, ?it/s]

--- epoch: 3, train_loss: [0.03246120363473892]
Epoch 4


100%|██████████| 4933/4933 [00:24<00:00, 204.07it/s]
  0%|          | 0/4933 [00:00<?, ?it/s]

--- epoch: 4, train_loss: [0.032881733030080795]
Epoch 5


100%|██████████| 4933/4933 [00:24<00:00, 202.60it/s]
  0%|          | 0/4933 [00:00<?, ?it/s]

--- epoch: 5, train_loss: [0.032808125019073486]
Epoch 6


100%|██████████| 4933/4933 [00:24<00:00, 202.64it/s]
  0%|          | 0/4933 [00:00<?, ?it/s]

--- epoch: 6, train_loss: [0.032848045229911804]
Epoch 7


100%|██████████| 4933/4933 [00:24<00:00, 205.49it/s]
  0%|          | 0/4933 [00:00<?, ?it/s]

--- epoch: 7, train_loss: [0.032901644706726074]
Epoch 8


100%|██████████| 4933/4933 [00:24<00:00, 203.39it/s]
  0%|          | 0/4933 [00:00<?, ?it/s]

--- epoch: 8, train_loss: [0.03116638958454132]
Epoch 9


100%|██████████| 4933/4933 [00:24<00:00, 202.96it/s]
  0%|          | 0/4933 [00:00<?, ?it/s]

--- epoch: 9, train_loss: [0.03199167922139168]
Epoch 10


100%|██████████| 4933/4933 [00:24<00:00, 205.30it/s]
  0%|          | 0/4933 [00:00<?, ?it/s]

--- epoch: 10, train_loss: [0.03230536729097366]
Epoch 11


100%|██████████| 4933/4933 [00:24<00:00, 203.96it/s]
  0%|          | 0/4933 [00:00<?, ?it/s]

--- epoch: 11, train_loss: [0.03261202201247215]
Epoch 12


100%|██████████| 4933/4933 [00:24<00:00, 202.58it/s]
  0%|          | 0/4933 [00:00<?, ?it/s]

--- epoch: 12, train_loss: [0.033292222768068314]
Epoch 13


100%|██████████| 4933/4933 [00:24<00:00, 199.35it/s]
  0%|          | 0/4933 [00:00<?, ?it/s]

--- epoch: 13, train_loss: [0.0322897732257843]
Epoch 14


100%|██████████| 4933/4933 [00:24<00:00, 197.59it/s]
  0%|          | 0/4933 [00:00<?, ?it/s]

--- epoch: 14, train_loss: [0.03349592909216881]
Epoch 15


100%|██████████| 4933/4933 [00:25<00:00, 197.22it/s]
  0%|          | 0/4933 [00:00<?, ?it/s]

--- epoch: 15, train_loss: [0.03360217809677124]
Epoch 16


100%|██████████| 4933/4933 [00:23<00:00, 206.64it/s]
  0%|          | 0/4933 [00:00<?, ?it/s]

--- epoch: 16, train_loss: [0.03368714824318886]
Epoch 17


100%|██████████| 4933/4933 [00:24<00:00, 202.28it/s]
  0%|          | 0/4933 [00:00<?, ?it/s]

--- epoch: 17, train_loss: [0.032218173146247864]
Epoch 18


100%|██████████| 4933/4933 [00:23<00:00, 205.82it/s]
  0%|          | 0/4933 [00:00<?, ?it/s]

--- epoch: 18, train_loss: [0.03365800902247429]
Epoch 19


100%|██████████| 4933/4933 [00:24<00:00, 203.06it/s]

--- epoch: 19, train_loss: [0.033522527664899826]
model saved





In [5]:
# 实现攻击,恢复testloader中所有表格数据行
trainloader,testloader = preprocess_purchase(batch_size=1)

im_attack.inverse(client_net=client_net,decoder_net=decoder_net,
                  train_loader=trainloader,test_loader=testloader,
                  save_fake=True)

purchase100 dataset processing...
datset route: /home/dengruijun/data/FinTech/DATASET/kaggle-dataset/Purchase100//data.npz
original dataset shape:  (197324, 600)
After random selection, dataset shape:  (197324, 600)
After split between classifier and attack: 
training dataset shape:  (157859, 600)
testing dataset shape:  (39465, 600)


  0%|          | 0/39465 [00:00<?, ?it/s]

Data loading finished
----train decoder----
client_net: 
PurchaseClassifier1(
  (linear1): Linear(in_features=600, out_features=1024, bias=True)
  (Tanh1): Tanh()
  (linear2): Linear(in_features=1024, out_features=512, bias=True)
  (Tanh2): Tanh()
)
decoder_net: 
PurchaseDecoder1(
  (delinear1): Linear(in_features=512, out_features=1024, bias=True)
  (Tanh1): Tanh()
  (delinear2): Linear(in_features=1024, out_features=600, bias=True)
  (Tanh2): Tanh()
)


100%|██████████| 39465/39465 [01:33<00:00, 420.57it/s]


cosine: 0.9504989085429434,               Euclidean: 4.34348811342676,              MSE:0.03172383672123201
average time: 0.000556240434597068 avg infer time:0.0003523296538405767


: 