In [None]:
# We start by loading pytorch and looking at fundamental blocks of pytorch (tensors, etc)

In [None]:
import torch
import numpy as np

x = torch.empty(3, 4) #matrix
print(type(x))
print(x)

zeros = torch.zeros(2, 3, 10)
print(zeros)

ones = torch.ones(2, 3)
print(ones)

torch.manual_seed(1729)
random = torch.rand(2, 3)
print(random)

a = torch.rand(2, 2, requires_grad=True) # turn on autograd
print(a)

b = a.clone() # returns a copy of the input
print(b)

c = a.detach().clone() # returns a new tensor, detached from the current graph (can't differentiate)
print(c)

# We generate a neural network

Consider a scalar function $f(x)=cos(x)$. We want to approximate this using a neural network.

What are the dimensions of the input and output?

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    # we need to instantiate the network and write a forward evaluation of it
    # This object can be differentiated
    def __init__(self, nInput, nOutput, n_hidden=3, n_layers=2):
        super(Net, self).__init__()
        # an affine operation: y = Wx + b
        self.n_hidden = n_hidden
        self.input_layer = nn.Linear(nInput, self.n_hidden)
        self.output_layer = nn.Linear(self.n_hidden, nOutput)

        self.hidden_layers = []
        for i in range(n_layers):
          self.hidden_layers.append(nn.Linear(self.n_hidden, self.n_hidden))
        self.hidden_layers = nn.ModuleList(self.hidden_layers)

        # The values are initialized from uniformly sampling U(−k,k), where k = 1/in_features​.
        # https://pytorch.org/docs/stable/generated/torch.nn.Linear.html?highlight=nn+linear#torch.nn.Linear

        self.activation = nn.ReLU() # you can test different activation functions and layers
                                    # https://pytorch.org/docs/stable/nn.html

    def forward(self, x):
        x = self.input_layer(x)
        x = self.activation(x)
        for layer in self.hidden_layers:
          x = layer(x)
          x = self.activation(x)
        x = self.output_layer(x)
        return x

In [None]:
# Scalar problem
nInput = 1
nOutput = 1
net = Net(nInput,nOutput,n_hidden=3)
print(net)

# generate an input
input_tensor = torch.rand(1,nInput) # (nDataPoints, size input)
print(f"Input tensor: {input_tensor}")
print(f"Network output: {net.forward(input_tensor)}")

In [None]:
# Multi-dimensional problem
nInput = 10
nOutput = 10
net = Net(nInput,nOutput)
print(net)

# generate an input
input_tensor = torch.rand(1,nInput)
print(f"Input tensor: {input_tensor}")
print(f"Network output: {net.forward(input_tensor)}")

In [None]:
import torch.utils.data as data
import torch.optim as optim
from datasets import CustomDataset

import warnings # remove warnings (not nice :) )
warnings.filterwarnings("ignore")

nData = 200
x_train = np.random.uniform(low=0.0, high=1, size=(nData,))
y_train = 1-np.cos(2*np.pi*x_train)

x_test = np.sort(np.random.uniform(low=0.0, high=1.0, size=(50,)))
batch_size = 64

training_data = CustomDataset(x_train,y_train)
training_loader = data.DataLoader(training_data, batch_size=batch_size, shuffle=True, num_workers=2)

def train_one_epoch(epoch_index,debug=1):
    running_loss = 0.
    last_loss = 0.

    # Here, we use enumerate(training_loader) instead of
    # iter(training_loader) so that we can track the batch
    # index and do some intra-epoch reporting
    for i, data in enumerate(training_loader):
        # Every data instance is an input + label pair
        data_shape = data['x'].shape[0]
        inputs = data['x'].reshape((data_shape,1))
        labels = data['y'].reshape((data_shape,1))
        inputs = torch.tensor(inputs.clone().detach(),dtype=torch.float32)
        labels = torch.tensor(labels.clone().detach(),dtype=torch.float32)

        # Zero your gradients for every batch!
        optimizer.zero_grad()

        # Make predictions for this batch
        outputs = model(inputs)

        # Compute the loss and its gradients
        loss = loss_fn(outputs, labels)
        loss.backward()
        if debug == 1:
          if i == 0:
            grads = {name: params.grad.view(-1).cpu().clone().numpy() for name, params in model.named_parameters() if "weight" in name}
            for key in grads.keys():
              print(f"{key}:{grads[key]}")

        # Adjust learning weights
        optimizer.step()

        # Gather data and report
        running_loss += loss.item()
    scheduler.step()
    #print(f"Loss:{running_loss}")
    return running_loss

In [None]:
import torch.utils.data as data
import torch.optim as optim

import warnings # remove warnings (not nice :) )
warnings.filterwarnings("ignore")

nData = 200
x_train = np.random.uniform(low=0.0, high=1, size=(nData,))
y_train = 1-np.cos(2*np.pi*x_train)

x_test = np.sort(np.random.uniform(low=0.0, high=1.0, size=(50,)))
batch_size = 64

training_data = CustomDataset(x_train,y_train)
training_loader = data.DataLoader(training_data, batch_size=batch_size, shuffle=True, num_workers=2)

def train_one_epoch(epoch_index,debug=1):
    running_loss = 0.
    last_loss = 0.

    # Here, we use enumerate(training_loader) instead of
    # iter(training_loader) so that we can track the batch
    # index and do some intra-epoch reporting
    for i, data in enumerate(training_loader):
        # Every data instance is an input + label pair
        data_shape = data['x'].shape[0]
        inputs = data['x'].reshape((data_shape,1))
        labels = data['y'].reshape((data_shape,1))
        inputs = torch.tensor(inputs.clone().detach(),dtype=torch.float32)
        labels = torch.tensor(labels.clone().detach(),dtype=torch.float32)

        # Zero your gradients for every batch!
        optimizer.zero_grad()

        # Make predictions for this batch
        outputs = model(inputs)

        # Compute the loss and its gradients
        loss = loss_fn(outputs, labels)
        loss.backward()
        if debug == 1:
          if i == 0:
            grads = {name: params.grad.view(-1).cpu().clone().numpy() for name, params in model.named_parameters() if "weight" in name}
            for key in grads.keys():
              print(f"{key}:{grads[key]}")

        # Adjust learning weights
        optimizer.step()

        # Gather data and report
        running_loss += loss.item()
    return running_loss

In [None]:
def init_weights_zero(m):
    if isinstance(m, nn.Linear):
          m.weight.data.fill_(0.0)
          m.bias.data.fill_(0.0)

def init_weights_constant(m):
    if isinstance(m, nn.Linear):
          m.weight.data.fill_(1.0)
          m.bias.data.fill_(1.0)

def init_weights_xavier(m):
    if isinstance(m, nn.Linear):
          torch.nn.init.xavier_uniform_(m.weight)
          m.bias.data.fill_(0.01)

In [None]:
for initialisation_type, initialisation in zip(['zeros','constant','xavier'],[init_weights_zero, init_weights_constant, init_weights_xavier]):
  model = Net(1,1,n_hidden=3)
  model.apply(initialisation)

  loss_fn = torch.nn.MSELoss()

  optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

  print(f"Initialisation type: {initialisation_type}")
  for i in range(10):
    _ = train_one_epoch(i,debug=1)


# Optimizers

In [None]:
from matplotlib import pyplot as plt
import copy

models = {}
for optimizer_type in ['SGD','Adam']:
  for lr in [0.1,0.01,0.001]:
    losses = []
    model = Net(1,1,n_hidden=50, n_layers=3)

    loss_fn = torch.nn.MSELoss()
    if optimizer_type == 'SGD':
      optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    elif optimizer_type == 'Adam':
      optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    for i in range(20):
      losses.append(train_one_epoch(i,debug=0))

    plt.semilogy(losses,label=f'{optimizer_type} lr = {lr}',marker='.')
    plt.legend()
    models[f"{optimizer_type}_{lr}"] = copy.deepcopy(model)


In [None]:
plt.plot(torch.linspace(0,1,10),models[f"Adam_0.01"](torch.linspace(0,1,10).unsqueeze(1)).detach().numpy())
plt.plot(torch.linspace(0,1,10),1-np.cos(2*np.pi*torch.linspace(0,1,10)))

# Convolutional Neural Networks

In [None]:
# Load MNIST

import torchvision
import torchvision.transforms as transforms

BATCH_SIZE = 32

## transformations
transform = transforms.Compose(
    [transforms.ToTensor()])

## download and load training dataset
trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE,
                                          shuffle=True, num_workers=2)

## download and load testing dataset
testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE,
                                         shuffle=False, num_workers=2)


classes = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

# get some random training images
dataiter = iter(trainloader)
images, labels = next(dataiter)

# show images
imshow(torchvision.utils.make_grid(images))


In [None]:
# set up CNNs
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, 5) # (in channels, out channels, kernel size) https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html#torch.nn.Conv2d
        self.pool = nn.MaxPool2d(2, 2) # kernel_size
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16*4*4, 120) # a bit nasty, should catch the size without hardcoding it...
        self.fc2 = nn.Linear(120, 10)

    def forward(self, x):
        x = self.pool(self.conv1(x)) # F.relu() seems to improve convergence (idk why)
        x = self.pool(self.conv2(x)) # F.relu()
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

net = Net()

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [None]:
for epoch in range(2):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 100 == 99:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

print('Finished Training')


In [None]:
dataiter = iter(testloader)
images, labels = next(dataiter)

# print images
imshow(torchvision.utils.make_grid(images))
outputs = net(images)
_, predicted = torch.max(outputs, 1)

print('Predicted: ', ' '.join(f'{classes[predicted[j]]:5s}'
                              for j in range(8)))