In [1]:
import torch
from dataloder import *
from argument import *
from model import *
from pretrain import *
from utils import *
from parllutils import *
from functorch import vmap
args = argument()
device = 'cuda'
args

Namespace(adv='PGD', batchsize=128, dataset='binaryMnist', deletebatch=1, deletenum=0, isbatch=False, iterneumann=3, lam=0.0001, model='logistic', parllsize=128, remove_type=2, times=0)

In [2]:
delete_num = 600 # 删除600个点
delete_batch = 1 # 从第1个点开始删
pass_batch = args.parllsize 
delete_num, delete_batch, pass_batch

(600, 1, 128)

## Pre-processing
### 1) load data

In [3]:
train_data, test_data, re_sequence = Load_Data(args, delete_num, shuffle=True)
train_loader = make_loader(train_data, batch_size=args.batchsize)
test_loader = make_loader(test_data, batch_size=args.batchsize)
print(f"total number of train data: {len(train_data[0])}, test data: {len(test_data[0])}")

train labels: tensor([1, 1, 1,  ..., 1, 7, 1])
total number of train data: 13007, test data: 2163


### 2) adversarially trained model (original model w*)

In [4]:
# atk training (`torchattacks` package)
model, training_time = train(train_loader, test_loader, args, verbose=True)

model information: 
LinearModel(
  (fc): Linear(in_features=784, out_features=1, bias=False)
)
training type: PGD, epsilon: 0.25098, alpha: 0.03137, steps: 15
training hyperparameters  lr: 0.010, epochs: 100 
Epoch [1/100] training type PGD, learning rate : 0.0100 loss : 0.78296  adv_type : PGD model : logistic  times : 0
Epoch [2/100] training type PGD, learning rate : 0.0100 loss : 0.59278  adv_type : PGD model : logistic  times : 0
Epoch [3/100] training type PGD, learning rate : 0.0100 loss : 0.55324  adv_type : PGD model : logistic  times : 0
Epoch [4/100] training type PGD, learning rate : 0.0100 loss : 0.52451  adv_type : PGD model : logistic  times : 0
Epoch [5/100] training type PGD, learning rate : 0.0100 loss : 0.50273  adv_type : PGD model : logistic  times : 0
Epoch [6/100] training type PGD, learning rate : 0.0100 loss : 0.48579  adv_type : PGD model : logistic  times : 0
Epoch [7/100] training type PGD, learning rate : 0.0100 loss : 0.47229  adv_type : PGD model : logist

Epoch [70/100] training type PGD, learning rate : 0.0100 loss : 0.36018  adv_type : PGD model : logistic  times : 0
Epoch [71/100] training type PGD, learning rate : 0.0100 loss : 0.35991  adv_type : PGD model : logistic  times : 0
Epoch [72/100] training type PGD, learning rate : 0.0100 loss : 0.35957  adv_type : PGD model : logistic  times : 0
Epoch [73/100] training type PGD, learning rate : 0.0100 loss : 0.35931  adv_type : PGD model : logistic  times : 0
Epoch [74/100] training type PGD, learning rate : 0.0100 loss : 0.35905  adv_type : PGD model : logistic  times : 0
Epoch [75/100] training type PGD, learning rate : 0.0100 loss : 0.35878  adv_type : PGD model : logistic  times : 0
Epoch [76/100] training type PGD, learning rate : 0.0100 loss : 0.35854  adv_type : PGD model : logistic  times : 0
Epoch [77/100] training type PGD, learning rate : 0.0100 loss : 0.35825  adv_type : PGD model : logistic  times : 0
Epoch [78/100] training type PGD, learning rate : 0.0100 loss : 0.35802 

### 3) pre-unlearning
1) Calculate the related matrix 
2) Store the matrix to memory
3) delete the matrix variable

In [5]:
# calculate matrix with MUter: parll_calculate_memory_matrix
device = 'cuda'
model = model.to(device)
_, _, atk_info = training_param(args)

atk = PGD(model, eps=atk_info[0], alpha=atk_info[1], steps=atk_info[2], lossfun=LossFunction(args.model), lam=args.lam)

# 将所有参数 vectorization，即 flatten 成一个一维矩阵
weight = vec_param(model.parameters()).detach()
weight.size()

torch.Size([784, 1])

In [6]:
# 计算 ∂_𝜹𝜹 = ww^T 的逆，通过纽曼序列近似计算: I + (I-ww^T) + (I-ww^T)^2 + (I-ww^T)^3 + ……
public_partial_dd = (weight.mm(weight.t())).detach() # w*wT
public_partial_dd_inv = derive_inv(public_partial_dd, method='Neumann', iter=args.iterneumann)

In [9]:
def batch_hessian(weight, X, Y, args, batch_size=50000):
    device = 'cuda'
    weight = weight.to(device)
    X = X.to(device)
    Y = Y.to(device)
    
    # if args.model == 'logistic':
    z = torch.sigmoid(Y * X.mm(weight)) # sigmoid(Y*Xw)
    D = z * (1 - z) # sigmoid(Y*Xw) * (1-sigmoid(Y*Xw))
    print(f"z.shape: {z.shape}, D.shape: {D.shape}")
    H = None
    num_batch = int(math.ceil(X.size(0) / batch_size)) # if batch_size == 50000, then use full batch
    print(f"(in func parllutils.batch_hessian) num_batch : {num_batch}, batch_size : {batch_size}")
    for i in range(num_batch):
        lower = i * batch_size
        upper = min((i+1) * batch_size, X.size(0))
        print(f"lower: {lower}, upper: {upper}")
        X_i = X[lower:upper]
        if H is None:
            H = X_i.t().mm(D[lower:upper] * X_i) # H += X^T D*X
        else:
            H += X_i.t().mm(D[lower:upper] * X_i)
    return (H + args.lam * X.size(0) * torch.eye(X.size(1)).to(device)).detach()


In [10]:
# # 计算n个点的 total Hessian
# pass_loader = make_loader(train_data, batch_size=pass_batch)
# lenth = len(pass_loader)

# feature = get_featrue(args) # featrue_dict[‘binaryMnist’]=784
# matrix = torch.zeros((feature, feature)).to(device) # 784*784
# # if method == 'MUter':
# parll_partial = batch_indirect_hessian(args) # func
# for index, (image, label) in enumerate(pass_loader):
#     print('process : [{}/{}]'.format(index, lenth))
#     image = image.to(device)
#     label = label.to(device)
#     image = atk(image, label).to(device)
#     # direct Hessian
#     matrix += batch_hessian(weight, image.view(image.shape[0], feature), label, args)
#     # indirect Hessian: 用到之前计算的 public_partial_dd_inv
#     matrix -= parll_partial(image.view(image.shape[0], feature, 1), label, weight, public_partial_dd_inv).sum(dim=0).detach()


process : [0/102]
z.shape: torch.Size([128, 1]), D.shape: torch.Size([128, 1])
(in func parllutils.batch_hessian) num_batch : 1, batch_size : 50000
lower: 0, upper: 128
process : [1/102]
z.shape: torch.Size([128, 1]), D.shape: torch.Size([128, 1])
(in func parllutils.batch_hessian) num_batch : 1, batch_size : 50000
lower: 0, upper: 128
process : [2/102]
z.shape: torch.Size([128, 1]), D.shape: torch.Size([128, 1])
(in func parllutils.batch_hessian) num_batch : 1, batch_size : 50000
lower: 0, upper: 128
process : [3/102]
z.shape: torch.Size([128, 1]), D.shape: torch.Size([128, 1])
(in func parllutils.batch_hessian) num_batch : 1, batch_size : 50000
lower: 0, upper: 128
process : [4/102]
z.shape: torch.Size([128, 1]), D.shape: torch.Size([128, 1])
(in func parllutils.batch_hessian) num_batch : 1, batch_size : 50000
lower: 0, upper: 128
process : [5/102]
z.shape: torch.Size([128, 1]), D.shape: torch.Size([128, 1])
(in func parllutils.batch_hessian) num_batch : 1, batch_size : 50000
lower: 

z.shape: torch.Size([128, 1]), D.shape: torch.Size([128, 1])
(in func parllutils.batch_hessian) num_batch : 1, batch_size : 50000
lower: 0, upper: 128
process : [55/102]
z.shape: torch.Size([128, 1]), D.shape: torch.Size([128, 1])
(in func parllutils.batch_hessian) num_batch : 1, batch_size : 50000
lower: 0, upper: 128
process : [56/102]
z.shape: torch.Size([128, 1]), D.shape: torch.Size([128, 1])
(in func parllutils.batch_hessian) num_batch : 1, batch_size : 50000
lower: 0, upper: 128
process : [57/102]
z.shape: torch.Size([128, 1]), D.shape: torch.Size([128, 1])
(in func parllutils.batch_hessian) num_batch : 1, batch_size : 50000
lower: 0, upper: 128
process : [58/102]
z.shape: torch.Size([128, 1]), D.shape: torch.Size([128, 1])
(in func parllutils.batch_hessian) num_batch : 1, batch_size : 50000
lower: 0, upper: 128
process : [59/102]
z.shape: torch.Size([128, 1]), D.shape: torch.Size([128, 1])
(in func parllutils.batch_hessian) num_batch : 1, batch_size : 50000
lower: 0, upper: 128

In [5]:
# total train data, batch_size=args.parllsize 
pass_loader = make_loader(train_data, batch_size=pass_batch)
print(len(pass_loader)) # 共102个batches，每个batch_size=128

# # calculate of partial_(wx@xx^{-1}@xw) by parll (set batch_size=args.parllsize(128) for acceleration)
# # process : [x/102] 批处理，计算所有 train_data 的 partial_𝜹𝜹 hessian matrix
# matrix = parll_calculate_memory_matrix(model, pass_loader, args, method='MUter')
# store_memory_matrix(matrix, args, method='MUter')
# del matrix

102
weight's shape: torch.Size([784, 1])
process : [0/102]
(in func parllutils.batch_hessian) num_batch : 1, batch_size : 50000
process : [1/102]
(in func parllutils.batch_hessian) num_batch : 1, batch_size : 50000
process : [2/102]
(in func parllutils.batch_hessian) num_batch : 1, batch_size : 50000
process : [3/102]
(in func parllutils.batch_hessian) num_batch : 1, batch_size : 50000
process : [4/102]
(in func parllutils.batch_hessian) num_batch : 1, batch_size : 50000
process : [5/102]
(in func parllutils.batch_hessian) num_batch : 1, batch_size : 50000
process : [6/102]
(in func parllutils.batch_hessian) num_batch : 1, batch_size : 50000
process : [7/102]
(in func parllutils.batch_hessian) num_batch : 1, batch_size : 50000
process : [8/102]
(in func parllutils.batch_hessian) num_batch : 1, batch_size : 50000
process : [9/102]
(in func parllutils.batch_hessian) num_batch : 1, batch_size : 50000
process : [10/102]
(in func parllutils.batch_hessian) num_batch : 1, batch_size : 50000
p

process : [97/102]
(in func parllutils.batch_hessian) num_batch : 1, batch_size : 50000
process : [98/102]
(in func parllutils.batch_hessian) num_batch : 1, batch_size : 50000
process : [99/102]
(in func parllutils.batch_hessian) num_batch : 1, batch_size : 50000
process : [100/102]
(in func parllutils.batch_hessian) num_batch : 1, batch_size : 50000
process : [101/102]
(in func parllutils.batch_hessian) num_batch : 1, batch_size : 50000
matrix shape torch.Size([784, 784]), type <class 'torch.Tensor'>
memory matrix for MUter method using perturb samples to calculate
saving matrix...
done!


In [6]:
matrix = load_memory_matrix(args, method='MUter').to(device)

loading memory matrix : ../data/MemoryMatrix/dataset_binaryMnist_adv_PGD_model_logistic_method_MUter_sample_perturbed.pt
matrix shape torch.Size([784, 784]), type <class 'torch.Tensor'>
done!
