In [1]:
from utils.train_smt import test, print_cfmtx, NumpyEncoder, check_global_contrastive
from utils.reader import read_jsons
from utils.parser import read_arguments

from pathlib import Path
from torch.utils.data import DataLoader
from utils import fmodule
import torch, json, os, numpy as np, copy
import torch.nn.functional as F

device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [2]:
batch_size = 4
epochs = 100
exp_folder = "./jsons/baseline/simple_8"
dataset = "mnist"

num_client, clients_training_dataset, clients_testing_dataset, global_testing_dataset, singleset = read_jsons(exp_folder, dataset)
client_id_list = [i for i in range(num_client)]
total_sample = np.sum([len(dataset) for dataset in clients_training_dataset])

In [None]:
from utils.fmodule import FModule
import torch.nn as nn

class MLP(FModule):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 128)
        self.fc3 = nn.Linear(128, 10)

    def forward(self, x):
        x = x.view(x.shape[0], -1)
        x = F.relu(self.fc1(x))
        x = self.fc3(x)
        return x

In [3]:
global_model = MLP().to(device)

In [4]:
client_id = 0

my_training_dataset = clients_training_dataset[client_id]
my_testing_dataset = clients_testing_dataset[client_id]

local_model = copy.deepcopy(global_model)
train_dataloader = DataLoader(my_training_dataset, batch_size=batch_size, shuffle=True, drop_last=False)
loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.SGD(local_model.parameters(), lr=1e-4)

In [None]:
def train(dataloader, model, loss_fn, optimizer):   
    model = model.cuda()
    model.train()
    losses = []
        
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred.to(torch.float32), F.one_hot(y, 10).to(torch.float32))
        
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        losses.append(loss.item())
        
    return losses

In [None]:
from collections import OrderedDict

def flatten_tensors(tensors):
    """
    Reference: https://github.com/facebookresearch/stochastic_gradient_push
    Flatten dense tensors into a contiguous 1D buffer. Assume tensors are of
    same dense type.
    Since inputs are dense, the resulting tensor will be a concatenated 1D
    buffer. Element-wise operation on this buffer will be equivalent to
    operating individually.
    Arguments:
        tensors (Iterable[Tensor]): dense tensors to flatten.
    Returns:
        A 1D buffer containing input tensors.
    """
    if len(tensors) == 1:
        return tensors[0].view(-1).clone()
    flat = torch.cat([t.view(-1) for t in tensors], dim=0)
    return flat


def flatten_model(model):
    ten = torch.cat([flatten_tensors(i) for i in model.parameters()])
    return ten


def unflatten_tensors(flat, tensors):
    """
    Reference: https://github.com/facebookresearch/stochastic_gradient_push
    View a flat buffer using the sizes of tensors. Assume that tensors are of
    same dense type, and that flat is given by flatten_dense_tensors.
    Arguments:
        flat (Tensor): flattened dense tensors to unflatten.
        tensors (Iterable[Tensor]): dense tensors whose sizes will be used to
            unflatten flat.
    Returns:
        Unflattened dense tensors with sizes same as tensors and values from
        flat.
    """
    outputs = []
    offset = 0
    for tensor in tensors:
        numel = tensor.numel()
        outputs.append(flat.narrow(0, offset, numel).view_as(tensor))
        offset += numel
    return tuple(outputs)


def unflatten_model(flat, model):
    count = 0
    l = []
    output = []
    for tensor in model.parameters():
        n = tensor.numel()
        output.append(flat[count: count + n].view_as(tensor))
        count += n
    output = tuple(output)
    temp = OrderedDict()
    for i, j in enumerate(model.state_dict().keys()):
        temp[j] = output[i]
    return temp

In [None]:
from utils.fmodule import FModule
import torch

class OptimizerUtilities:
    def __init__(self, X, Y, loss_fn):
        self.X = X
        self.Y = Y
        self.loss_fn = loss_fn
        self.r = 0.01
        
    def gradient(self, model: torch.nn.Module):
        """
        Compute the gradient of the input model on the dataset
        Arguments:
            model: A neural network
        Returns:
            A model with parameters being the gradient of the 
            input model
        """
        pred = model(self.X)
        loss = self.loss_fn(pred, self.Y)
        loss.backward()
        grad = torch.cat([flatten_tensors(p._grad) for p in model.parameters()])
        model.zero_grad()
        return unflatten_model(flat=grad, model=model)
        
    def hessian_vector_product(self, model: FModule, vector: FModule):
        """
        Perform approximate estimation of the dot product of the hessian 
        of the model parameters and the vector.
        Arguments:
            model: A neural network
            vector: Another neural network with the same shape as model
        Returns:
            A model with parameters being the product of the hessian of 
            the input model and the input vector.
        """
        if vector.norm() > 0:
            surrogate_model = model + self.r * vector
            surrogate_grad = self.gradient(surrogate_model)
            model_grad = self.gradient(model)
            
            estimated_product = (surrogate_grad - model_grad) * 1.0/self.r
            return estimated_product
        else:
            return model.zeros_like()
    
    def eigens(self, model: FModule, k: int, M: int):
        """
        Compute k largest eigenvectors and their eigenvalue of the model
        Arguments:
            model: A neural network
            k: The number of eigenvalues/eigenvectors to compute
            M: The number of iteration to estimate an eigenvector
        Returns:
            A list of eigenvectors and a list of corresponding eigenvalues
            each vector is a flatten vector with dim equals to the number 
            of input model's parameters
        """
        eigenvectors = []
        eigenvalues = []
        
        for i in range(k):
            eigenvec = model.randn_like()
            for m in range(M):
                eigenvec_new = self.hessian_vector_product(model, eigenvec)
                for other in eigenvectors:
                    eigenvec_new = eigenvec_new - (other.dot(eigenvec_new) * 1.0/other.norm()) * eigenvec_new
                
                eigenval = eigenvec_new.dot(eigenvec)/(eigenvec.norm()**2)
                eigenvec_new = eigenvec_new/eigenval
                eigenvec = eigenvec_new
                    
            eigenvectors.append(eigenvec_new)
            eigenvalues.append(eigenval)

        return eigenvectors, eigenvalues
    
    def inverse_hessian_vector_product(self, model: FModule, vector: FModule, k=10, M=5):
        """
        Compute the estimate dot product of the inverse hessian matrix of the model
        and the input vector
        Arguments:
            model: A neural network
            vector: Another neural network
            k: The number of eigenvalues/eigenvectors to compute
                default k = 10
            M: The number of iteration to estimate an eigenvector
                default M = 5
        Returns:
            a model with parameters being the dot product of the inverse hessian 
            matrix of the input model and the input vector
        """
        eigenvectors, eigenvalues = self.eigens(model, k=k, M=M)
        
        d = model.zeros_like()
        for (eigenvector, eigenvalue) in zip(eigenvectors, eigenvalues):
            d += (eigenvector.dot(vector)) / (eigenvector.norm()**2 * eigenvalue) * eigenvector
            
        res = (eigenvalues[-1] / eigenvalues[0]) * d - vector
        return res