In [1]:
import torch
import random
import time
import copy
import numpy as np

from torchattacks import PGD

from dataloder import *
from argument import *
from model import *
from pretrain import *
from utils import *
from parllutils import *
from modules import *

args = argument()
device = 'cuda'

def setup_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True

# random seed
setup_seed(args.times)

In [2]:
delete_num = 600 # 共删除50个点
delete_batch = 1 # 每次删1个点
pass_batch = args.parllsize # batch_size 并行计算 total hessian

muter_time1 = 0.0
muter_time2_sequence = []
muter_time3_sequence = []

delete_num, delete_batch, pass_batch

(600, 1, 128)

## Pre-processing
### 1) load data

In [3]:
train_data, test_data, re_sequence = Load_Data(args, delete_num, shuffle=True)
train_loader = make_loader(train_data, batch_size=args.batchsize)
test_loader = make_loader(test_data, batch_size=args.batchsize)
print(f"total number of train data: {len(train_data[0])}, test data: {len(test_data[0])}")

train labels: tensor([1, 1, 1,  ..., 1, 7, 1])
total number of train data: 13007, test data: 2163


In [5]:
lr, epochs, atk_info = training_param(args)

# model
model_clean = LogisticModel(input_featrue=get_featrue(args)).to(device)
# loss function
criterion = LossFunction(args.model).to(device)
# setting optimizer
optimizer = torch.optim.SGD(model_clean.parameters(), lr=lr)

model_clean.train()

for epoch in range(epochs):
    # pbar.set_description(desc)
    # print('Epoch [{}/{}] training type {}, learning rate : {:.4f}'.format(epoch+1, epochs, args.adv, optimizer.param_groups[0]['lr']), end=' ')
    total_loss = 0.0
    step = 0
    for data, label in train_loader:
        label = label.to(device)
        data = data.to(device)
        output = model_clean(data)
        loss = criterion(output, label)

        if args.lam != 0.0:
            lam = torch.tensor(0.5 * args.lam)
            l2_reg = torch.tensor(0.0)
            for param in model_clean.parameters():
                l2_reg = l2_reg + lam * param.pow(2.0).sum()
            loss = loss + l2_reg

        optimizer.zero_grad()
        step = step + 1
        total_loss += loss.item()
        loss.backward()
        optimizer.step()

#     pbar.set_postfix(adv_train_type=args.adv, model=args.model, lr=optimizer.param_groups[0]['lr'], loss=total_loss/step, times=args.times)
#     # print('loss : {:.5f}  adv_type : {} model : {}  times : {}'.format(total_loss/step, args.adv, args.model, args.times))
#     time.sleep(0.1)


model_clean.eval()
correct = 0
total = 0

# clean test acc
for data, label in test_loader:
    label = label.to(device)
    data = data.to(device)

    predict = model_clean(data).round()
    total = total + data.shape[0]
    correct = correct + (predict == label).sum()

clean_test_acc = float(correct) / total

print('clean test acc : {:.2f}%'.format(clean_test_acc * 100))


clean test acc : 99.21%


### 2) load adversarially trained model (original model w*)

In [6]:
# Adversarisal training
# model_path = os.path.join('..', 'data', 'ATM', f"dataset_{args.dataset}_adv_{args.adv}_model_{args.model}_points_{len(train_loader.dataset)}_{args.times}.pth")
model, training_time = train(train_loader, test_loader, args, desc='Pre-Adv Training', verbose=True, model_path=None)
model, training_time

model information: 
LogisticModel(
  (fc): Linear(in_features=784, out_features=1, bias=False)
)
training type: PGD, epsilon: 0.25098, alpha: 0.03137, steps: 15
training hyperparameters  lr: 0.010, epochs: 100 


Pre-Adv Training: 100%|██████████| 100/100 [01:47<00:00,  1.08s/it, adv_train_type=PGD, loss=0.354, lr=0.01, model=logistic, times=0]

traning PGD model spending 107.63 seconds





(LogisticModel(
   (fc): Linear(in_features=784, out_features=1, bias=False)
 ),
 107.62872791290283)

In [7]:
clean_acc, perturb_acc = Test_model(model, test_loader, args) 

clean test acc : 96.90%
perturb test acc : 91.22%


In [8]:
pass_loader = make_loader(train_data, batch_size=pass_batch)
# Calculate the hessian matrix of partial_dd
matrices = dict(MUter=None)

method = 'MUter'
isDelta = False
ssr = 'unperturbed'
filename = f'dataset_{args.dataset}_adv_{args.adv}_model_{args.model}_method_{method}_sample_{ssr}_{args.times}.pt'

start_time = time.time()
matrices[method] = parll_calculate_memory_matrix(model, pass_loader, args, method, isDelta)
end_time = time.time()
muter_time1 = end_time - start_time
matrices[method]

  0%|          | 0/102 [00:00<?, ?it/s]

tensor([[-120426.9531,  -60783.2812,  -60783.3164,  ...,  -60783.3164,
          -60783.2969,  -60783.2969],
        [ -60783.2812, -120426.9297,  -60783.2969,  ...,  -60783.2969,
          -60783.2930,  -60783.2969],
        [ -60783.3164,  -60783.3047, -120427.0078,  ...,  -60783.3281,
          -60783.3281,  -60783.3281],
        ...,
        [ -60783.3164,  -60783.3047,  -60783.3281,  ..., -120427.0391,
          -60783.3281,  -60783.3281],
        [ -60783.2969,  -60783.2969,  -60783.3281,  ...,  -60783.3281,
         -120427.0391,  -60783.3242],
        [ -60783.3047,  -60783.2969,  -60783.3281,  ...,  -60783.3281,
          -60783.3242, -120427.0391]], device='cuda:0')

## Stage II: Unlearning
1) Inner level attack method;
2) Calculate the public part partial_xx and partial_xx_inv for linear model;
3) Init gradient information;

In [9]:
from torchattacks import PGD
import copy
from utils import cg_solve, derive_inv
import time
from torch.utils.data import DataLoader, TensorDataset

# Inner level attack method
_, _, atk_info = training_param(args)
atk = PGD(model, atk_info[0], atk_info[1], atk_info[2], lossfun=LossFunction(args.model), lam=args.lam)

# Calculate the public part partial_xx and partial_xx_inv for linear model
feature = get_featrue(args)
weight = vec_param(model.parameters()).detach()
public_partial_xx = (weight.mm(weight.t())).detach()
public_partial_xx_inv = derive_inv(public_partial_xx, method='Neumann', iter=args.iterneumann)

In [10]:
step = 1 # record unlearning times
## compare with removal list [1, 2, 3, 4, 5, ~1%, ~2%, ~3%, ~4%, ~5%] 
remove_list = None
if args.dataset == 'binaryMnist':
    remove_list = [1, 2, 3, 4, 5, 120, 240, 360, 480, 600]  # for mnist
elif args.dataset == 'phishing':
    remove_list = [1, 2, 3, 4, 5, 100, 200, 300, 400, 500]  # for phsihing
elif args.dataset == 'madelon':
    remove_list = [1, 2, 3, 4, 5, 20, 40, 60, 80, 100]  # for madelon
elif args.dataset == 'covtype':
    remove_list = [1, 2, 3, 4, 5, 5000, 10000, 15000, 20000, 25000]
elif args.dataset == 'epsilon':
    remove_list = [1, 2, 3, 4, 5, 4000, 8000, 12000, 16000, 20000]
else:
    remove_list = [1, 2, 3, 4, 5, 10, 20, 30, 40, 50]  # for splice


In [11]:
def partial_hessian(x, y, weight, public_partial_xx, public_partial_xx_inv, args):
    
    x_size = x.shape[0]
    weight_size = weight.shape[0]
    
    z = torch.sigmoid(y*(x.t().mm(weight)))
    D = z * (1 - z)
    
    partial_ww = (D * (x.mm(x.t()))) + (args.lam * torch.eye(weight_size)).to(device)
    partial_wx = (D * (x.mm(weight.t()))) + ((z-1) * y * torch.eye(x_size).to(device))
    partial_xw = (D * (weight.mm(x.t()))) + ((z-1) * y * torch.eye(weight_size).to(device))
    partial_xx = D * public_partial_xx
    partial_xx_inv = (1/D) * public_partial_xx_inv
    
    return partial_ww.detach(), partial_wx.detach(), partial_xw.detach(), partial_xx.detach(), partial_xx_inv.detach()
    

In [12]:
# Init gradinet informations
grad = torch.zeros((feature, 1)).to(device)
clean_grad = torch.zeros((feature, 1)).to(device)
parll_partial = batch_indirect_hessian(args)

# 从1开始删50个点
for batch_delete_num in range(1, delete_num+1, 1):
    print('The {}-th delete'.format(batch_delete_num))
    # prepare work
    unlearning_model = copy.deepcopy(model).to(device)  # for MUter method
    x = train_data[0][batch_delete_num].to(device)
    y = train_data[1][batch_delete_num].to(device)
    x_delta = atk(x, y).to(device)
    
    clean_grad += parll_loss_grad(weight, x.view(1, feature), y, args).detach()
    
    # stage 2: compute delta_w
    start_time = time.time()
    ## Unlearning by Schur Complement Conversion
    # Mr[nabla_w] + nabla_w_{r+1}
    # .view(1, feature): single point
    grad += parll_loss_grad(weight, x_delta.view(1, feature), y, args).detach()  
#     print('updated_grad: ', grad.view(1, feature))
    partial_ww, partial_wx, partial_xw, partial_xx, partial_xx_inv = partial_hessian(x_delta.view(feature, 1), y, weight, public_partial_xx, public_partial_xx_inv, args)
    
    block_matrix = buliding_matrix(matrices['MUter'], partial_ww, partial_wx, -partial_xx, partial_xw)
    grad_cat_zero = torch.cat([grad, torch.zeros((feature, 1)).to(device)], dim=0)
    
    delta_w_cat_alpha = cg_solve(block_matrix, grad_cat_zero.squeeze(dim=1), get_iters(args))
    delta_w = delta_w_cat_alpha[:feature]
#     print('delta_w:', delta_w)
    update_w(delta_w, unlearning_model)
    
    clean_acc, perturb_acc = Test_model(unlearning_model, test_loader, args) 
    print(clean_acc, perturb_acc)
    
    end_time = time.time()
    muter_time2 = end_time - start_time
    muter_time2_sequence.append(muter_time2)
    
    # stage 3: update matrix
    start_time = time.time()
    ## update matrix M_{r+1}
    matrices['MUter'] -= (partial_ww - (partial_wx.mm(partial_xx_inv.mm(partial_xw))))
    
    end_time = time.time()
    muter_time3 = end_time - start_time
    muter_time3_sequence.append(muter_time3)
    

The 1-th delete


ValueError: Using a target size (torch.Size([1])) that is different to the input size (torch.Size([1, 1])) is deprecated. Please ensure they have the same size.

In [None]:
print('avg stage 1 time {:.4f}'.format(np.mean(muter_time1)))
print('avg stage 2 time {:.4f}'.format(np.mean(muter_time2_sequence)))
print('avg stage 3 time {:.4f}'.format(np.mean(muter_time3_sequence)))