In [61]:
import torch
from torch import nn, optim
from torch.autograd import grad, Variable
import numpy as np
from torchvision import datasets, transforms
import torchvision.models as models
from copy import deepcopy
from typing import Tuple, Union
from collections import OrderedDict

# Define the simple convolutional network
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.fc = nn.Linear(32 * 7 * 7, 10)

    def forward(self, x):
        out = self.conv1(x)
        out = self.relu(out)
        out = self.maxpool(out)
        out = self.conv2(out)
        out = self.relu(out)
        out = self.maxpool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

# Define the loss function
loss_fn = nn.CrossEntropyLoss()

# Load the MNIST dataset
batch_size = 32
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
mnist_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)
test_loader = torch.utils.data.DataLoader(dataset=mnist_dataset, batch_size=batch_size, shuffle=True)

# Define a function to calculate the highest Hessian value
def calculate_highest_hessian(model, loss_fn, dataset):
    hessians = []

    # Iterate over the dataset
    for x, y in dataset:
        logit = model(x)
        loss = loss_fn(logit, y)
        grads_1st = torch.autograd.grad(loss, model.parameters())

        frz_model_params = deepcopy(model.state_dict())
        delta = 1e-3
        dummy_model_params_1 = OrderedDict()
        dummy_model_params_2 = OrderedDict()
        with torch.no_grad():
            for (layer_name, param), grad in zip(model.named_parameters(), grads_1st):
                dummy_model_params_1.update({layer_name: param + delta * grad})
                dummy_model_params_2.update({layer_name: param - delta * grad})

        model.load_state_dict(dummy_model_params_1, strict=False)
        logit_1 = model(x)
        loss_1 = loss_fn(logit_1, y)
        grads_1 = torch.autograd.grad(loss_1, model.parameters())

        model.load_state_dict(dummy_model_params_2, strict=False)
        logit_2 = model(x)
        loss_2 = loss_fn(logit_2, y)
        grads_2 = torch.autograd.grad(loss_2, model.parameters())

        model.load_state_dict(frz_model_params)

        grads = []
        with torch.no_grad():
            for g1, g2 in zip(grads_1, grads_2):
                grads.append((g1 - g2) / (2 * delta))

        # Compute the norm for each gradient individually
        norms = [torch.norm(grad) for grad in grads]

        # Compute the norm over all the norms
        norm = torch.norm(torch.stack(norms))

        hessians.append(norm*0.9)

    return hessians

# Create an instance of the ResNet-9 model
model = ConvNet()

# Calculate the highest Hessian value for the loss function in MNIST
hessian_values = calculate_highest_hessian(model, loss_fn, test_loader)
print(hessian_values)
# Print the highest Hessian value
max_hessian_value = np.max(hessian_values)
print("Highest Hessian value:", max_hessian_value)
min_hessian_value = np.min(hessian_values)
print("Lowest Hessian value:", min_hessian_value)


[tensor(18.9843), tensor(45.6913), tensor(67.4386), tensor(35.9074), tensor(53.2924), tensor(40.2480), tensor(31.6699), tensor(28.4670), tensor(51.0383), tensor(43.0060), tensor(62.8962), tensor(48.9364), tensor(50.9113), tensor(58.8567), tensor(39.8095), tensor(39.7580), tensor(39.3271), tensor(41.9701), tensor(47.1500), tensor(31.8783), tensor(43.8330), tensor(52.6606), tensor(53.5816), tensor(35.8898), tensor(33.3435), tensor(49.0478), tensor(51.5003), tensor(29.4431), tensor(32.9550), tensor(61.4541), tensor(43.9555), tensor(40.9688), tensor(43.1617), tensor(61.3024), tensor(64.3761), tensor(56.4112), tensor(30.0364), tensor(18.5321), tensor(50.1029), tensor(27.8650), tensor(56.9830), tensor(44.8925), tensor(27.1997), tensor(49.6243), tensor(36.2814), tensor(36.2884), tensor(46.9478), tensor(37.8120), tensor(21.9484), tensor(48.2271), tensor(42.6665), tensor(37.3368), tensor(44.3244), tensor(37.9712), tensor(60.6359), tensor(37.4707), tensor(45.6898), tensor(39.4140), tensor(46.018

In [36]:
max_list = []
eig_list = []
B_list = []
for i in range(100):
    A = np.random.rand(200,200)
    eig_val, eig_vec = np.linalg.eig(A)
    B = np.linalg.norm(A)
    eig_list.append(np.absolute(np.max(eig_val)))
    B_list.append(B)
    max_list.append(np.absolute(np.max(eig_val))-B)

def calculate_average(lst):
    total = sum(lst)
    average = total / len(lst)
    return average

average_max = calculate_average(max_list)
average_norm = calculate_average(B_list)
average_eig = calculate_average(eig_list)
print(f"{average_eig} - {average_norm} - {average_max}")

99.97033244955503 - 115.44230402029002 - -15.471971570734993
