In [15]:
DEVICE='cuda:0'
DIMENSION=224
MINI_BS=100
BS=1000
LR=1e-4
MODEL='vit_base_patch16_224'
CLIPPING_MODE='nonDP'
MODEL_PATH='./checkpoints/nonDP_model_epoch_4.pth'
# MODEL_PATH='./checkpoints/nonDP_model_begin.pth'


n_acc_steps = BS // MINI_BS # gradient accumulation steps

In [16]:
import torch

device= torch.device(DEVICE if torch.cuda.is_available() else "cpu") #默认为cuda:0
print("device:",device)

device: cuda:0


In [17]:
print('==> Preparing data..')

import torchvision
transformation = torchvision.transforms.Compose([
    torchvision.transforms.Resize(DIMENSION),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.5, 0.5, 0.5),(0.5, 0.5, 0.5)),
])

trainset = torchvision.datasets.CIFAR100(root='data/', train=True, download=True, transform=transformation)
testset = torchvision.datasets.CIFAR100(root='data/', train=False, download=True, transform=transformation)

trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=MINI_BS, shuffle=True, num_workers=4) # shuffle会在数据加载前打乱数据集

testloader = torch.utils.data.DataLoader(
    testset, batch_size=100, shuffle=False, num_workers=4)


==> Preparing data..
Files already downloaded and verified
Files already downloaded and verified


In [18]:
import timm
from opacus.validators import ModuleValidator
import torch.nn as nn
import torch.optim as optim

checkpoint = torch.load(MODEL_PATH)

num_classes=100
print('==> Building model..', MODEL,'; BatchNorm is replaced by GroupNorm. Mode: ', CLIPPING_MODE)
net = timm.create_model(MODEL,pretrained=True,num_classes=num_classes)
net = ModuleValidator.fix(net); # fix使其能用于DP训练
net.load_state_dict(checkpoint['model_state_dict'])
net=net.to(device) 

print('Number of total parameters: ', sum([p.numel() for p in net.parameters()]))
print('Number of trainable parameters: ', sum([p.numel() for p in net.parameters() if p.requires_grad]))

criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(net.parameters(), lr=LR)
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])


==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


# 策略
batch_size是会影响准确率的，每个batch_size都对应一个true gradient(每n_acc_steps积累好一次)。针对三个环节，我们需要在一个epoch内：

 1、计算每个batch_size的gradient，由于bs=1000, dataset_size=50000, 总共会产生50个true gradient, 将其保存到文件中, 并且在每个optimizer.step()后保存一下模型文件。<br>
 2、设置batch_size=1，打开之前训练好的各个模型文件，分别计算per-sample gradient(可以取间隔,例如每个true gradient对应20个点,注意每个true gradient对应1000个sample)，保存到文件中。<br>
 3、最后用一段小程序画图就好

In [19]:
#先进行训练，存储模型并得到true_gradient

from tqdm.notebook import tqdm
import numpy as np
SHOW_STEPS=100

STAGE='end'
NUM=0
model_path = f'./nonDP_checkpoints/{STAGE}_{NUM}.pth'
torch.save({
    'model_state_dict': net.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
}, model_path)
print(f'Model saved to {model_path}')


# def get_gradient(net):
#     current_gradient = []
#     for param in net.parameters():
#         current_gradient.append(param.grad.view(-1).detach().cpu().numpy())
#     # print(len(current_gradient))
#     # print([i.size for i in current_gradient])
#     gradient=np.concatenate(current_gradient)

#     return gradient
def get_gradient(net):
    current_gradient = []
    for name, param in net.named_parameters():
        if param.grad is not None:
            if name=='patch_embed.proj.bias':
                #print(f"Parameter: {name}, Gradient: {param.grad}")
                current_gradient.append(param.grad.view(-1).detach().cpu().numpy())
    gradient=np.concatenate(current_gradient)
    #print(gradient.size)
    return gradient

true_gradient=[]

net.train()
train_loss = 0
correct = 0
total = 0


for batch_idx, (inputs, targets) in enumerate(tqdm(trainloader)): #这里的batch_idx应该相当于step？
    inputs, targets = inputs.to(device), targets.to(device)
    outputs = net(inputs)
    loss = criterion(outputs, targets) # 交叉熵函数作为LossFunction
    loss.backward()
    # 每个mini_batch都有自己的一个true gradient
    if ((batch_idx + 1) % n_acc_steps == 0) or ((batch_idx + 1) == len(trainloader)):
        present_true_gradient=get_gradient(net)
        true_gradient.append(present_true_gradient)
        print(len(true_gradient))
        optimizer.step() # 每积累n_acc_steps步的梯度后进行一次更新参数(每执行logical batch后更新一次)
        optimizer.zero_grad()

        #保存每个batch_size训练后的模型
        NUM=(batch_idx + 1) // n_acc_steps
        model_path = f'./nonDP_checkpoints/{STAGE}_{NUM}.pth'
        torch.save({
            'model_state_dict': net.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
        }, model_path)
        print(f'Model saved to {model_path}')
        
        
    train_loss += loss.item()
    _, predicted = outputs.max(1)
    total += targets.size(0)
    correct += predicted.eq(targets).sum().item()



    # print log
    if ((batch_idx + 1) % SHOW_STEPS == 0) or ((batch_idx + 1) == len(trainloader)):
        #privacy_spent = privacy_engine.get_privacy_spent(accounting_mode="all", lenient=False)
        tqdm.write("----------------------------------------------------------------------------------------")
        tqdm.write('Epoch: {}, step: {}, Train Loss: {:.3f} | Acc: {:.3f}% ({}/{})'.format(
            'none', batch_idx + 1, train_loss / (batch_idx + 1), 100. * correct / total, correct, total))
        #tqdm.write("Privacy Cost: ε_rdp: {:.3f} | α_rdp: {:.1f} | ε_low: {:.3f} | ε_estimate: {:.3f} | ε_upper: {:.3f}".format(
            #privacy_spent["eps_rdp"], privacy_spent["alpha_rdp"], privacy_spent["eps_low"], privacy_spent["eps_estimate"], privacy_spent["eps_upper"]))

np.save(f'./gradients/nonDP_{STAGE}_true_gradients.npy', true_gradient)


print('Epoch: ', 'none', "total: ", len(trainloader), 'Train Loss: %.3f | Acc: %.3f%% (%d/%d)'
                    % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))



Model saved to ./nonDP_checkpoints/end_0.pth


  0%|          | 0/500 [00:00<?, ?it/s]

1
Model saved to ./nonDP_checkpoints/end_1.pth
2
Model saved to ./nonDP_checkpoints/end_2.pth
3
Model saved to ./nonDP_checkpoints/end_3.pth
4
Model saved to ./nonDP_checkpoints/end_4.pth
5
Model saved to ./nonDP_checkpoints/end_5.pth
6
Model saved to ./nonDP_checkpoints/end_6.pth
7
Model saved to ./nonDP_checkpoints/end_7.pth
8
Model saved to ./nonDP_checkpoints/end_8.pth
9
Model saved to ./nonDP_checkpoints/end_9.pth
10
Model saved to ./nonDP_checkpoints/end_10.pth
----------------------------------------------------------------------------------------
Epoch: none, step: 100, Train Loss: 0.015 | Acc: 99.610% (9961/10000)
11
Model saved to ./nonDP_checkpoints/end_11.pth
12
Model saved to ./nonDP_checkpoints/end_12.pth
13
Model saved to ./nonDP_checkpoints/end_13.pth
14
Model saved to ./nonDP_checkpoints/end_14.pth
15
Model saved to ./nonDP_checkpoints/end_15.pth
16
Model saved to ./nonDP_checkpoints/end_16.pth
17
Model saved to ./nonDP_checkpoints/end_17.pth
18
Model saved to ./nonDP_

In [20]:
# 对每个true gradient分别计算相应的per-sample gradient
MINI_BS=1
BS=1
n_acc_steps = BS // MINI_BS # gradient accumulation steps

trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=MINI_BS, shuffle=True, num_workers=4) # shuffle会在数据加载前打乱数据集

testloader = torch.utils.data.DataLoader(
    testset, batch_size=100, shuffle=False, num_workers=4)

In [21]:
import timm
from opacus.validators import ModuleValidator
import torch.nn as nn
import torch.optim as optim

total_per_sample_gradient=[]

for i in range(50):
    print('i=',i)
    MODEL_PATH='nonDP_checkpoints/'+STAGE+"_"+str(i)+'.pth'
    checkpoint = torch.load(MODEL_PATH)
    num_classes=100
    print('==> Building model..', MODEL,'; BatchNorm is replaced by GroupNorm. Mode: ', CLIPPING_MODE)
    net = timm.create_model(MODEL,pretrained=True,num_classes=num_classes)
    net = ModuleValidator.fix(net); # fix使其能用于DP训练
    net.load_state_dict(checkpoint['model_state_dict'])
    net=net.to(device) 

    print('Number of total parameters: ', sum([p.numel() for p in net.parameters()]))
    print('Number of trainable parameters: ', sum([p.numel() for p in net.parameters() if p.requires_grad]))

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=LR)
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

    i_per_sample_gradient=[]

    net.train()
    train_loss = 0
    correct = 0
    total = 0


    for batch_idx, (inputs, targets) in enumerate(tqdm(trainloader)):
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = net(inputs)
        loss = criterion(outputs, targets) # 交叉熵函数作为LossFunction
        loss.backward()

        if((batch_idx+1)%50==0):
            present_per_sample_gradient=get_gradient(net)
            i_per_sample_gradient.append(present_per_sample_gradient)
        
        optimizer.step() # 每积累n_acc_steps步的梯度后进行一次更新参数(每执行logical batch后更新一次)
        optimizer.zero_grad()


        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()


        if(batch_idx+1==1000):
            break
    print(len(i_per_sample_gradient))
    np.save(f'./gradients/nonDP_{STAGE}_{i}_gradients.npy', i_per_sample_gradient)
    #total_per_sample_gradient.append(i_per_sample_gradient)





i= 0
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 1
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 2
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 3
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 4
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 5
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 6
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 7
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 8
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 9
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 10
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 11
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 12
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 13
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 14
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 15
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 16
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 17
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 18
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 19
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 20
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 21
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 22
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 23
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 24
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 25
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 26
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 27
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 28
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 29
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 30
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 31
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 32
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 33
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 34
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 35
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 36
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 37
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 38
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 39
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 40
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 41
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 42
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 43
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 44
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 45
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 46
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 47
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 48
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
i= 49
==> Building model.. vit_base_patch16_224 ; BatchNorm is replaced by GroupNorm. Mode:  nonDP
Number of total parameters:  85875556
Number of trainable parameters:  85875556


  0%|          | 0/50000 [00:00<?, ?it/s]

20
