In [None]:
%load_ext autoreload
%autoreload 2
%load_ext tensorboard

# Tensorboard

In [None]:
%tensorboard --logdir=reports --port=6004

In [None]:
import torch
from torch.autograd import Variable

def max_eigenvalue(model, loss_fn, data, target):
    # Set model to evaluation mode
    model.eval()
    # Create a variable from the data
    data = Variable(data, requires_grad=True)
    # Compute the loss
    loss = loss_fn(model(data), target)
    # Compute the gradients
    loss.backward(create_graph=True)
    # Get the gradients of the weights
    grads = torch.cat([p.grad.view(-1) for p in model.parameters()])
    # Create a vector of ones with the same size as the gradients
    v = torch.ones(grads.size())
    # Compute the Hessian-vector product
    Hv = torch.autograd.grad(grads, model.parameters(), grad_outputs=v, retain_graph=True)
    # Concatenate the Hessian-vector product into a single vector
    Hv = torch.cat([h.view(-1) for h in Hv])
    # Compute the maximum eigenvalue using the power iteration method
    for _ in range(100):
        v = Hv / torch.norm(Hv)
        Hv = torch.autograd.grad(grads, model.parameters(), grad_outputs=v, retain_graph=True)
        Hv = torch.cat([h.view(-1) for h in Hv])
    return (v * Hv).sum()

In [None]:
import torch
from torch.autograd import Variable

def max_eigenvalue(model, loss_fn, data, target):
    # Set model to evaluation mode
    model.eval()
    # Create a variable from the data
    data = torch.autograd.Variable(data, requires_grad=True)
    # Compute the loss
    loss = loss_fn(model(data), target)
    # Compute the gradients
    grads = torch.autograd.grad(
            loss,
            [p for p in model.parameters() if p.requires_grad],
            retain_graph=True,
            create_graph=True)
    # Get the gradients of the weights
    grads = torch.cat([g.reshape(-1) for g in grads])
    print(grads.size())
    # Create a vector of ones with the same size as the gradients
    v = torch.ones(grads.size())#.to(grads.device)
    # Compute the Hessian-vector product
    Hv = torch.autograd.grad(grads, [p for p in model.parameters() if p.requires_grad], grad_outputs=v, retain_graph=True)
    # Concatenate the Hessian-vector product into a single vector
    Hv = torch.cat([h.reshape(-1) for h in Hv])
    # Compute the maximum eigenvalue using the power iteration method
    for _ in range(100):
        v = Hv / torch.norm(Hv)
        Hv = torch.autograd.grad(grads, model.parameters(), grad_outputs=v, retain_graph=True)
        Hv = torch.cat([h.reshape(-1) for h in Hv])

    return (v * Hv).sum()

# correct this code to get rid of the error RuntimeError: reshape size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.
def max_eigenvalue_corrected(model, loss_fn, data, target):
    # Set model to evaluation mode
    model.eval()
    # Create a variable from the data
    data = torch.autograd.Variable(data, requires_grad=True)
    # Compute the loss
    loss = loss_fn(model(data), target)
    # Compute the gradients
    grads = torch.autograd.grad(
            loss,
            [p for p in model.parameters() if p.requires_grad],
            retain_graph=True,
            create_graph=True)
    # Get the gradients of the weights
    grads = torch.cat([g.reshape(-1) for g in grads])
    # Create a vector of ones with the same size as the gradients
    v = torch.ones(grads.size()).to(grads.device)
    # Compute the Hessian-vector product
    Hv = torch.autograd.grad(grads, [p for p in model.parameters() if p.requires_grad], grad_outputs=v, retain_graph=True)
    # Concatenate the Hessian-vector product into a single vector
    Hv = torch.cat([h.reshape(-1) for h in Hv])
    # Compute the maximum eigenvalue using the power iteration method
    for _ in range(100):
        v = Hv / torch.norm(Hv)
        Hv = torch.autograd.grad(grads, model.parameters(), grad_outputs=v, retain_graph=True)
        Hv = torch.cat([h.reshape(-1) for h in Hv])

    return (v * Hv).sum()

In [None]:
import torch
from typing import List, Dict, Any

class SimpleCNN(torch.nn.Module):
    def __init__(self, layers_dim: List[int]):
        super().__init__()
        self.blocks = torch.nn.ModuleList([
            torch.nn.Sequential(torch.nn.Conv2d(layer_dim1, layer_dim2, 3, padding=1),
                                torch.nn.ReLU(),
                                torch.nn.Conv2d(layer_dim2, layer_dim2, 3, padding=1, stride=2),
                                torch.nn.ReLU(),
                                # torch.nn.MaxPool2d(2, 2)
                                )
            for layer_dim1, layer_dim2 in zip(layers_dim[:-3], layers_dim[1:-2])
        ])
        # flatten_dim = infer_flatten_dim(conv_params, layers_dim[-3])
        # napisz wnioskowanie spłaszczonego wymiaru
        self.final_layer = torch.nn.Sequential(torch.nn.Linear(4096, layers_dim[-2]), torch.nn.ReLU(),
                                               torch.nn.Linear(layers_dim[-2], layers_dim[-1]))

    def forward(self, x):
        for block in self.blocks:
            x = block(x)
        x = x.flatten(start_dim=1)
        x = self.final_layer(x)
        return x
    

class MLP(torch.nn.Module):
    def __init__(self, layers_dim):
        super().__init__()
        self.layers = torch.nn.ModuleList([
            torch.nn.Sequential(torch.nn.Linear(hidden_dim1, hidden_dim2), torch.nn.ReLU())
            for hidden_dim1, hidden_dim2 in zip(layers_dim[:-2], layers_dim[1:-1])
        ])
        self.final_layer = torch.nn.Linear(layers_dim[-2], layers_dim[-1])

    def forward(self, x):
        x = x.flatten(start_dim=1)
        for layer in self.layers:
            x = layer(x)
        x = self.final_layer(x)
        return x

In [None]:
model = SimpleCNN([3, 32, 64, 128, 10])
model

In [None]:
model = SimpleCNN([3, 32, 64, 128, 10])
model

In [None]:
import os

import torch
import torchvision
import torchvision.transforms as transforms

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 512

trainset = torchvision.datasets.CIFAR10(root=os.environ['CIFAR10_PATH'], train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

model = SimpleCNN([3, 32, 64, 128, 10])
optim = torch.optim.SGD(model.parameters(), lr=0.01)

x_true, y_true = next(iter(trainloader))

criterion = torch.nn.CrossEntropyLoss()


In [None]:
# optim.zero_grad()
max_eigenvalue_corrected(model, criterion, x_true, y_true)

In [None]:
for p in model.parameters():
    print(p.grad.shape)

Brurrin images

In [None]:
import os

import torch
import torchvision
import torchvision.transforms as transforms

Normal

In [None]:
mean, std = (0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.262)
transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.RandomAffine(degrees=0, translate=(1/8, 1/8)),
        transforms.RandomHorizontalFlip(),
        transforms.Normalize(mean, std),
    ])

batch_size = 4

trainset = torchvision.datasets.CIFAR10(root=os.environ['CIFAR10_PATH'], train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# functions to show an image


def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# get some random training images
dataiter = iter(trainloader)
images, labels = next(dataiter)

# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join(f'{classes[labels[j]]:5s}' for j in range(batch_size)))

Blurred

In [None]:
from torchvision.transforms import InterpolationMode
down_to = 16
transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize(down_to, interpolation=InterpolationMode.BILINEAR, antialias=None),
        transforms.Resize(32, interpolation=InterpolationMode.BILINEAR, antialias=None), 
        transforms.RandomAffine(degrees=0, translate=(1/8, 1/8)),
        transforms.RandomHorizontalFlip(),
        transforms.Normalize(mean, std),
    ])
# transform = transforms.Compose([transforms.ToTensor(), transforms.Resize(down_to, interpolation=InterpolationMode.BILINEAR, antialias=True), transforms.Resize(32, interpolation=InterpolationMode.BILINEAR, antialias=True), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 4

trainset = torchvision.datasets.CIFAR10(root=os.environ['CIFAR10_PATH'], train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# functions to show an image


def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# get some random training images
dataiter = iter(trainloader)
images, labels = next(dataiter)

# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join(f'{classes[labels[j]]:5s}' for j in range(batch_size)))