In [2]:
"""
METHOD

1) Train a model fully with the leNet5 net and the mnist dataset
2) Retrive it's weights (for each parameters and compute it's loss function)

"""
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np

# Define the LeNet5 model
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.fc1 = nn.Linear(16*4*4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = nn.functional.relu(self.conv1(x))
        x = nn.functional.max_pool2d(x, 2)
        x = nn.functional.relu(self.conv2(x))
        x = nn.functional.max_pool2d(x, 2)
        x = x.view(-1, 16*4*4)
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Load the MNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)

# Define the loss function
criterion = nn.CrossEntropyLoss()

# Initialize a list to store the loss values
loss_values = []

# Train the model and store the loss values
model = LeNet5()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
for epoch in range(5):
    for i, (images, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        loss_values.append(loss.item())


# Ways of plotting th loss function 
Neural networks are trained on a corpus of feature vectors (e.g., images) {xi} and accompanying
labels {yi} by minimizing a loss of the form L(θ) = sum (for i in dataset) Loss (xi, yi; θ), where θ denotes the
parameters (weights) of the neural network, the function `(xi, yi; θ) measures how well the neural
network with parameters θ predicts the label of a data sample, and m is the number of data samples.
Neural nets contain many parameters, and so their loss functions live in a very high-dimensional
space. Unfortunately, visualizations are only possible using low-dimensional 1D (line) or 2D (surface)
plots. Several methods exist for closing this dimensionality gap.
1-Dimensional Linear Interpolation One simple and lightweight way to plot loss functions is
to choose two sets of parameters θ and θ
0
, and plot the values of the loss function along the line
connecting these two points. We can parameterize this line by choosing a scalar parameter α, and
defining the weighted average θ(α) = (1−α)θ+αθ0
. Finally, we plot the function f(α) = L(θ(α)).
This strategy was taken by Goodfellow et al. [13], who studied the loss surface along the line between
a random initial guess, and a nearby minimizer obtained by stochastic gradient descent. This method
has been widely used to study the “sharpness” and “flatness” of different minima, and the dependence. However there is some limitation in the plotting of such a 1D loss landscape.

For 2D visulization : 
Contour Plots & Random Directions To use this approach, one chooses a center point θ in the graph, and chooses two direction vectors, δ and η. One then plots a function of the form
f(α) = L(θ + αδ) in the 1D (line) case, or f(α, β) = L(θ∗ + αδ + βη)

# Interesting approch : Filter-Wise Normalization


In [9]:
model.parameters()

<generator object Module.parameters at 0x00000279E7049070>

In [3]:

# Create a meshgrid of weight values
weights = np.empty((0, len(list(model.parameters()))))
for i, params in enumerate(model.parameters()):
    weights = np.concatenate((weights, params.data.flatten().numpy().reshape(1, -1)), axis=0)
X, Y = np.meshgrid(range(weights.shape[1]), range(weights.shape[0]))

# Evaluate the loss function at each point in the meshgrid
Z = []
for row in range(weights.shape[0]):
    params = torch.from_numpy(weights[row]).float().view_as(params)
    for i, params_i in enumerate(model.parameters()):
        params_i.data = params[i].view_as(params_i.data)
    output = model(images)
    loss = criterion(output, labels)
    Z.append(loss.item())
Z = np.array(Z).reshape(X.shape)

# Create a contour plot of the loss function landscape
plt.contourf(X, Y, Z, levels=50)
plt.colorbar()
plt.show()


ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 10 and the array at index 1 has size 150