In [3]:
import torch
import numpy as np
import os
import random
import matplotlib.pyplot as plt
import torch.nn as nn

cfg = {
    'VGG9':  [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}


class VGG(nn.Module):
    def __init__(self, vgg_name):
        super(VGG, self).__init__()
        self.input_size = 32
        self.features = self._make_layers(cfg[vgg_name])
        self.n_maps = cfg[vgg_name][-2]
        self.fc = self._make_fc_layers()
        self.classifier = nn.Linear(self.n_maps, 10)

    def forward(self, x, return_feat=False):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        out = self.classifier(x)
        if return_feat:
            return out, x
        else:
            return out

    def _make_fc_layers(self):
        layers = []
        layers += [nn.Linear(self.n_maps*self.input_size*self.input_size, self.n_maps),
                   nn.BatchNorm1d(self.n_maps),
                   nn.ReLU(inplace=True)]
        return nn.Sequential(*layers)

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
                self.input_size = self.input_size // 2
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        return nn.Sequential(*layers)

def VGG9():
    return VGG('VGG9')

def VGG16():
    return VGG('VGG16')

def VGG19():
    return VGG('VGG19')

In [4]:
model = VGG19()

In [5]:

import torchvision.datasets as datasets
import torchvision.transforms as transforms
transform = transforms.Compose(
    [transforms.ToTensor()])

batch_size = 256

trainset = datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

Files already downloaded and verified


In [8]:
# import torch
# import os
# import math
# import numpy as np

# from utils import *

# #parameters to set
# nets_dir = 'loss_less_01'
# element_loss = torch.nn.CrossEntropyLoss(reduction='none')
# avg_loss = torch.nn.CrossEntropyLoss()
# with_acc = True
# filename = nets_dir + "/measures/reparametrized_comparison_measures_"
# wd = None

# set_seeds(1)
# testloader = trainloader#load_cifar10(train_batch_size = 50000, test_batch_size = 10000)


# input_dim = 32*32*3
# output_dim = 10
# inputs, labels = iter(trainloader).next()
# x_train = inputs.cuda()
# y_train = labels.cuda()
# inputs, labels = iter(testloader).next()
# x_test = inputs.cuda()
# y_test = labels.cuda()
# train_size = len(x_train)
# test_size = len(x_test)
# print("Data loaded", train_size, "training", test_size, "testing")

# model.cuda()
# print("Model loaded")


# ## test data
# test_output, test_loss = calculate_loss_on_data(model, element_loss, x_test, y_test)
# test_loss *= (train_size//test_size)
# print("Test loss calculated", test_loss)
# if with_acc:
#     acc = softmax_accuracy(test_output, y_test)
#     print("Test accuracy is", acc*1.0/len(y_test))

# ## train data
# train_output, train_loss_overall = calculate_loss_on_data(model, element_loss, x_train, y_train)
# print("Train loss calculated", train_loss_overall)
# if with_acc:
#     acc = softmax_accuracy(train_output, y_train)
#     print("Train accuracy is", acc*1.0/len(y_train))
#     train_acc = acc*1.0/len(y_train)

# train_loss = avg_loss(train_output, y_train)

# params = list(model.parameters())
# feature_layer_idx = -1
# for i in range(len(params)):
#     if params[i] is model.classifier.weight:
#         feature_layer_idx = i

# assert i is not -1
# print(i)
# feature_layer = list(model.parameters())[feature_layer_idx]

# # hessian calculation for the layer of interest
# last_layer_jacobian = grad(train_loss, feature_layer, create_graph=True, retain_graph=True)
# hessian = []
# for n_grd in last_layer_jacobian[0]:
#     for w_grd in n_grd:
#         drv2 = grad(w_grd, feature_layer, retain_graph=True)
#         hessian.append(drv2[0].data.cpu().numpy().flatten())

# weights_norm = 0.0
# for n in feature_layer.data.cpu().numpy():
#     for w in n:
#         weights_norm += w**2
# print("Squared euclidian norm is calculated", weights_norm)

# max_eignv = LA.eigvalsh(hessian)[-1]
# print("Largest eigenvalue is", max_eignv)

# trace = np.trace(hessian)
# print("Trace is", trace)

# ## calculate FisherRao norm
# # analytical formula for crossentropy loss from Appendix of the original paper
# sum_derivatives = 0
# m = torch.nn.Softmax(dim=0)
# for inp in range(len(train_output)):
#     sum_derivatives += \
#         (np.inner(m(train_output[inp]).data.cpu().numpy(), train_output[inp].data.cpu().numpy()) -
#             train_output[inp].data.cpu().numpy()[y_train[inp]]) ** 2
# fr_norm = math.sqrt(((5 + 1) ** 2) * (1.0 / len(train_output)) * sum_derivatives)
# print("Fisher Rao norm is", fr_norm)

# # adapted from https://github.com/nitarshan/robust-generalization-measures/blob/master/data/generation/measures.py
# sigma = pacbayes_sigma(model, trainloader, train_acc, 42)
# weights = get_weights_only(model)
# w_vec = get_vec_params(weights)
# pacbayes_flat = 1.0 / sigma ** 2
# print("PacBayes flatness", pacbayes_flat)
# def pacbayes_bound(reference_vec):
#     return (reference_vec.norm(p=2) ** 2) / (4 * sigma ** 2) + math.log(train_size / sigma) + 10
# pacbayes_orig = pacbayes_bound(w_vec).data.cpu().item()
# print("PacBayes orig", pacbayes_orig)
# #-----------------------------

# # normalization of feature layer
# _, activation = model(x_train,return_feat=True)[1].data.cpu().numpy()
# activation = np.squeeze(activation)
# sigma = np.std(activation, axis=0)

# j = 0
# for p in model.parameters():
#     if feature_layer_idx - 2 == j or feature_layer_idx - 1 == j:
#         for i, sigma_i in enumerate(sigma):
#             if sigma_i != 0.0:
#                 p.data[i] = p.data[i] / sigma_i
#     if feature_layer_idx == j:
#         for i, sigma_i in enumerate(sigma):
#             p.data[:,i] = p.data[:,i] * sigma_i
#         feature_layer = p
#     j += 1
    
# train_output, train_loss_overall = calculate_loss_on_data(model, element_loss, x_train, y_train)
# train_loss = avg_loss(train_output, y_train)

# trace_nm, maxeigen_nm = calculateNeuronwiseHessians_fc_layer(feature_layer, train_loss, wd, normalize = False)
# print("Neuronwise tracial measure is", trace_nm)
# print("Neuronwise max eigenvalue measure is", maxeigen_nm)


Data loaded 256 training 256 testing
Model loaded
Test loss calculated 601.4557495117188
Test accuracy is 0.12890625
Train loss calculated 603.4698
Train accuracy is 0.10546875
69
Squared euclidian norm is calculated 3.388254138469054
Largest eigenvalue is 11.022581
Trace is 225.47794
Fisher Rao norm is 2.311321004221044
PacBayes flatness 0.25001525948758285
PacBayes orig 516.326416015625
got hessian
Neuronwise tracial measure is 78.11812272504903
Neuronwise max eigenvalue measure is 25.68762469291687


NameError: name 'trained_net' is not defined