In [None]:
#@title Run this cell to setup visualization...
# This cell defines plot_progress() which plots an optimization trace.

import matplotlib
from matplotlib import pyplot as plt

def plot_progress(bowl, track, losses):
    # Draw the contours of the objective function, and x, and y
    fig, (ax1, ax2) = plt.subplots(1,2, figsize=(12, 5))
    for size in torch.linspace(0.1, 1.0, 10):
        angle = torch.linspace(0, 6.3, 100)
        circle = torch.stack([angle.sin(), angle.cos()])
        ellipse = torch.mm(torch.inverse(bowl), circle) * size
        ax1.plot(ellipse[0,:], ellipse[1,:], color='skyblue')
    track = torch.stack(track).t()
    ax1.set_title('progress of x')
    ax1.plot(track[0,:], track[1,:], marker='o', markersize=3, linewidth=0.5)
    ax1.set_ylim(-1, 1)
    ax1.set_xlim(-1.6, 1.6)
    ax1.set_ylabel('x[1]')
    ax1.set_xlabel('x[0]')
    ax2.set_title('progress of y')
    ax2.xaxis.set_major_locator(matplotlib.ticker.MaxNLocator(integer=True))
    ax2.plot(range(len(losses)), losses, marker='o')
    ax2.set_ylabel('objective')
    ax2.set_xlabel('iteration')
    fig.show()
    

import torch, os, PIL.Image, numpy
from matplotlib import cm
from baukit import show

def rgb_heatmap(data, size=None, colormap='hot', amax=None, amin=None, mode='bicubic', symmetric=False):
    size = spec_size(size)
    mapping = getattr(cm, colormap)
    scaled = torch.nn.functional.interpolate(data[None, None], size=size, mode=mode)[0,0]
    if amax is None: amax = data.max()
    if amin is None: amin = data.min()
    if symmetric:
        amax = max(amax, -amin)
        amin = min(amin, -amax)
    normed = ((scaled - amin) / (amax - amin + 1e-10)).numpy()
    return PIL.Image.fromarray((255 * mapping(normed)).astype('uint8'))

def spec_size(size):
    if isinstance(size, int): dims = (size, size)
    if isinstance(size, torch.Tensor): size = size.shape[:2]
    if isinstance(size, PIL.Image.Image): size = (size.size[1], size.size[0])
    if size is None: size = (224, 224)
    return size

# Learning Rate vs Loss on Pytorch Optimizers

Here is regular gradient descent with momentum.

In [None]:
import torch

x_init = torch.tensor([1.0, 1.1])
x = x_init.clone()
x.requires_grad = True
optimizer = torch.optim.SGD([x], lr=0.1, momentum=0.5)

bowl = torch.tensor([[ 0.4410, -1.0317], [-0.2844, -0.1035]])
track, losses = [], []

for iter in range(21):
    loss = torch.mm(bowl, x[:,None]).norm()
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    track.append(x.detach().clone())
    losses.append(loss.detach())
    
plot_progress(bowl, track, losses)

## Exercise 1.

Copy the code above twice, below, and then modify it in three ways:
    
(1) In one copy, decrease the learning rate by 10x  (i.e., 0.01)
(2) In the other copy, multiplly the loss by 1/10 (i.e., * 0.1).  Compare the results.
(3) Now try with and without momentum.

Now with Adam
----------------------

Here is Adam:

In [None]:
# The code below uses Adam
x = x_init.clone()
x.requires_grad = True
optimizer = torch.optim.Adam([x], lr=0.1)

track, losses = [], []

for iter in range(21):
    loss = torch.mm(bowl, x[:,None]).norm()
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    track.append(x.detach().clone())
    losses.append(loss.detach())
    
plot_progress(bowl, track, losses)

## Exercise 2.

Now copy the Adam code below, but decrease the loss by 1000x (e.g., multiply by 0.001).  What happens?

In [None]:
# TODO: copy the and modify the Adam code below, decreasing loss by 1000x.

# Convolutions by hand: the networks is a picture of the data

Try the code below.

In [None]:
%%bash

wget https://cs7150.baulab.info/2022-Fall/data/midterm_2d_conv_data.pt

In [None]:
import torch

with torch.no_grad():
    conv = torch.nn.Conv2d(
        in_channels  = 1,
        out_channels = 1,
        kernel_size  = (5,5),
        padding      = 2)

    conv.weight[:,:,:,:] = torch.tensor([[[

        [-1.0, -1.0, -1.0, -1.0, -1.0],

        [-1.0, -1.0, -1.0, -1.0, -1.0],

        [-1.0, -1.0, -1.0, -1.0, -1.0],

        [-1.0, -1.0, -1.0, -1.0, -1.0],

        [-1.0, -1.0, -1.0, -1.0, -1.0],

    ]]])
    conv.bias[:] = torch.tensor(

        [-1.0]

    )

    class Sign(torch.nn.Module):
        def forward(self, x):
            return x.sign()

    net = torch.nn.Sequential(
        conv,
        Sign()
    )

    input_data = torch.load('midterm_2d_conv_data.pt')
    output = net(input_data)

    show([[
        show.style(width=300), rgb_heatmap(input_data[0,0], 800, mode='nearest', symmetric=True),
        show.style(width=300), rgb_heatmap(output[0,0], 800, mode='nearest', symmetric=True),
        show.style(width=300), rgb_heatmap((output+input_data)[0,0], 800, mode='nearest', symmetric=True),
    ]])

# Running out of memory

Try and fix the code below.

In [None]:
assert False  # Don't run this by default - remove this line to try this cell.

import torch
from torch.nn import Sequential, ReLU, Linear
# commented out: # torch.set_grad_enabled(False)
num_pixels = 1000000
bottleneck = 100
net = Sequential(
    Linear(in_features=num_pixels, out_features=bottleneck, bias=True),
    ReLU(),
    Linear(in_features=bottleneck, out_features=num_pixels, bias=False)
).cuda()
print('Parameter tensors', len(list(net.named_parameters())))
print('Parameter elements', sum(p.numel() for p in net.parameters()))
total_error = 0
sample_size = 10000
for test_index in range(sample_size):
    test_data = torch.randn(1, num_pixels, device='cuda')
    total_error += (net(test_data) - test_data).pow(2).mean()
print('Average error', total_error / sample_size)