In [12]:
# First we need to mount the Google drive
import os
from google.colab import drive
drive.mount('/content/Hadrive')

Drive already mounted at /content/Hadrive; to attempt to forcibly remount, call drive.mount("/content/Hadrive", force_remount=True).


In [13]:

configs = dict({
"1": { "lr_initial": 0.1, "decay": 0.9, "sigma": 0.00000001, "const_C": 1000}
})


for index, config in configs.items():
  print(f"index: {index}")
  print(f"config: {config}")
  for key,value in config.items():
    print(f"key: {key}, value: {value}")



index: 1
config: {'lr_initial': 0.1, 'decay': 0.9, 'sigma': 1e-08, 'const_C': 1000}
key: lr_initial, value: 0.1
key: decay, value: 0.9
key: sigma, value: 1e-08
key: const_C, value: 1000


In [None]:
#!mkdir /content/Hadrive/MyDrive/Test1
#!mkdir /content/Hadrive/MyDrive/Test1/Tutorial1/

In [14]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets
from torchvision import transforms
from torch.func import functional_call, vmap, grad


import pickle
import numpy as np
import matplotlib.pyplot as plt
from collections import OrderedDict
from collections import defaultdict


In [15]:
data_path = '/content/Hadrive/MyDrive/Test1/Tutorial1/'

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4915, 0.4823, 0.4468), (0.2470, 0.2435, 0.2616))
])

# cifar10 = datasets.CIFAR10(data_path, train=True, download=True, transform=transform)
# cifar10_val = datasets.CIFAR10(data_path, train=False, download=True, transform=transform)

nbsamples = 100

#https://stackoverflow.com/questions/47432168/taking-subsets-of-a-pytorch-dataset
cifar10_org = datasets.CIFAR10(data_path, train=True, download=False, transform=transform)
#cifar10 = torch.utils.data.Subset(cifar10_org, list(range(0, nbsamples)))
cifar10 = cifar10_org

cifar10_val_org = datasets.CIFAR10(data_path, train=False, download=False, transform=transform)
#cifar10_val = torch.utils.data.Subset(cifar10_val_org, list(range(0, nbsamples)))
cifar10_val = cifar10_val_org

print(f"lencifar10: {len(cifar10)}")
print(f"lencifar10_val: {len(cifar10_val)}")

lencifar10: 50000
lencifar10_val: 10000


In [16]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                           shuffle=True)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=64,
                                           shuffle=True)

In [17]:
# model
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(8 * 8 * 8, 32)
        # bài toán phân loại 10 lớp nên output ra 10 nodes
        self.fc2 = nn.Linear(32, 10)

    def forward(self, x):
        out = F.max_pool2d(torch.tanh(self.conv1(x)), 2)
        out = F.max_pool2d(torch.tanh(self.conv2(out)), 2)
        # flatten về dạng vector để cho vào neural network
        out = out.view(-1, 8 * 8 * 8)
        out = torch.tanh(self.fc1(out))
        out = self.fc2(out)
        return out


In [22]:
import pickle

import torch
import torchvision
import torch.nn as nn
import numpy as np
import torchvision.transforms as transforms
from collections import OrderedDict
from collections import defaultdict
from torch.func import functional_call, vmap, grad

def generate_private_grad(model,loss_fn,samples,targets,sigma,const_C):
    '''
        We generate private grad given a batch of samples (samples,targets) in batchclipping mode
        The implementation flow is as follows:
            1. samples x0, x1, ..., x(L-1)
            2. compute avg of gradient g = sum(g0, ..., g(L-1))/L
            3. clipped gradient gc
            4. clipped noisy gradient (gc*L + noise)/L

        Finally, we normalize the private gradient and update the model.grad. This step allows optimizer update the model
    '''

    #copute the gradient of the whole batch
    outputs = model(samples)
    loss = loss_fn(outputs, targets)
    model.zero_grad()
    loss.backward()

    #get the list of gradients of the whole batch for computing the total norm
    grads = list()
    for param in model.parameters():
        grads.append(param.grad)

    device = grads[0].device
    norm_type = 2.0
    max_norm = const_C #clipping constant C
    total_norm = torch.norm(torch.stack([torch.norm(grad.detach(), norm_type).to(device) for grad in grads]), norm_type)
    clip_coef = max_norm / (total_norm + 1e-6)
    clip_coef_clamped = torch.clamp(clip_coef, max=1.0)

    #clipping the gradient
    #https://discuss.pytorch.org/t/how-to-clip-grad-norm-grads-from-torch-autograd-grad/137816/2
    for param in model.parameters():
        param.grad.detach().mul_(clip_coef_clamped)

    #generate private grad per layer
    mean = 0
    std = sigma*const_C
    batch_size = len(samples)
    for param in model.parameters():
        grad = param.grad
        #generate the noise ~ N(0,(C\sigma)^2I)
        #std -- is C\sigma as explain this in wikipage https://en.wikipedia.org/wiki/Normal_distribution N(mu,\sigma^2) and sigma is std
        noise = torch.normal(mean=mean, std=std, size=grad.shape)
        #generate private gradient per layer
        param.grad = (grad*batch_size + noise)/batch_size

    return 0

def training_loop(n_epochs, optimizer, model, loss_fn, sigma, const_C, train_loader, val_loader, data_path):
    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0

        for imgs, labels in train_loader:

          outputs = model(imgs)
          loss = loss_fn(outputs, labels)
          loss_train += loss.item()

          optimizer.zero_grad()
          '''
            generate_private_grad(model,loss_fn,imgs,labels,sigma,const_C)
              1. Compute the grad for whole batch of samples
              2. Clip the gradient of the batch of samples
              3. Add noise to the clipped grad of the whole batch of samples
              4. Update the model.grad. This helps optimizer.step works as normal.
          '''
          #loss.backward()
          generate_private_grad(model,loss_fn,imgs,labels,sigma,const_C)
          optimizer.step()

        correct = 0
        with torch.no_grad():
            for data in val_loader:
                images, labels = data
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                c = (predicted == labels).squeeze()
                correct += c.sum()
        if epoch == 1 or epoch % 1 == 0:
            print('Epoch {}, Training loss {}, Val accuracy {}'.format(
                epoch,
                loss_train / len(train_loader),
                correct / len(cifar10_val)))

        before_lr = optimizer.param_groups[0]["lr"]
        scheduler.step()
        after_lr = optimizer.param_groups[0]["lr"]
        print("Epoch %d: SGD lr %.4f -> %.4f" % (epoch, before_lr, after_lr))


        #save the model config
        model_state = model.state_dict()
        optimizer_state = optimizer.state_dict()
        scheduler_state = scheduler.state_dict()
        dict_state = dict()
        dict_state["epoch"] = epoch
        dict_state["sigma"] = sigma
        dict_state["const_C"] = const_C
        dict_state["model_state"] = model_state
        dict_state["optimizer_state"] = optimizer_state
        dict_state["scheduler_state"] = scheduler_state
        dict_state["train_loss"] = loss_train / len(train_loader)
        dict_state["val_acc"] = correct / len(cifar10_val)

        try:
            geeky_file = open(data_path + "epoch_" + str(epoch), 'wb')
            pickle.dump(dict_state, geeky_file)
            geeky_file.close()

        except:
            print("Something went wrong")

        #print(f"scheduler state: {scheduler_state}")

In [23]:
for index, config in configs.items():
  print(f"index: {index}")
  data_path_index = data_path + "config_" + str(index) + "_"
  model = Net()
  optimizer = optim.SGD(model.parameters(), lr=config["lr_initial"])
  loss_fn = nn.CrossEntropyLoss()
  '''
    LinearLR =>> new LR = initial LR - nb_epochs*(start_factor-end_factor)/total_iters
    example, initialLR = 0.1, start = 1.0, end_factor = 0.5, total_iters = 20
    (start_factor-end_factor)/total_iters = 0.025.
    ===> epoch 1: 0.1 - 1*0.025 = 0.0975
    ===> epoch 2: 0.1 - 2*0.025 = 0.0950....
  '''
  #scheduler = lr_scheduler.LinearLR(optimizer, start_factor=1.0, end_factor=0.5, total_iters=20)
  '''
   StepLR =>>> new LR = old LR * gamma
  '''
  scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)
  '''
    LambdaLR: new LR = initialLR * f(epoch)
    For example: f(epoch) = 1/t
  '''
  # lambda1 = lambda epoch: 1/(epoch+1)
  # scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=[lambda1])

  training_loop(
      n_epochs = 20,
      optimizer = optimizer,
      model = model,
      sigma = config["sigma"],
      const_C = config["const_C"],
      loss_fn = loss_fn,
      train_loader = train_loader,
      val_loader = val_loader,
      data_path = data_path_index
  )


index: 1
Epoch 1, Training loss 1.5678405574215648, Val accuracy 0.4927000105381012
Epoch 1: SGD lr 0.1000 -> 0.0900
Epoch 2, Training loss 1.2261115410138883, Val accuracy 0.5478000044822693
Epoch 2: SGD lr 0.0900 -> 0.0810
Epoch 3, Training loss 1.1107494477420816, Val accuracy 0.5759999752044678
Epoch 3: SGD lr 0.0810 -> 0.0729
Epoch 4, Training loss 1.0450485630718338, Val accuracy 0.603600025177002
Epoch 4: SGD lr 0.0729 -> 0.0656
Epoch 5, Training loss 0.9988533652499508, Val accuracy 0.6177999973297119
Epoch 5: SGD lr 0.0656 -> 0.0590
Epoch 6, Training loss 0.9612446208591656, Val accuracy 0.6176999807357788
Epoch 6: SGD lr 0.0590 -> 0.0531
Epoch 7, Training loss 0.9301041228996824, Val accuracy 0.5440000295639038
Epoch 7: SGD lr 0.0531 -> 0.0478
Epoch 8, Training loss 0.906463612604629, Val accuracy 0.6108999848365784
Epoch 8: SGD lr 0.0478 -> 0.0430
Epoch 9, Training loss 0.8831214247380986, Val accuracy 0.6385999917984009
Epoch 9: SGD lr 0.0430 -> 0.0387
Epoch 10, Training lo

In [None]:
import pandas as pd
epoch = 1
path = data_path + "epoch_" + str(epoch)
obj = pd.read_pickle(path)
print(obj.keys())

dict_keys(['epoch', 'model_state', 'optimizer_state'])
