## Kod w mniejszym lub większym stopniu wzorowany na kodzie z laboratorium 9,10 na Nauczaniu maszynowym, WMI, UJ, 2019/20, semestr zimowy

Feedforward neural network
trained on MINIST dataset

In [1]:
import numpy as np
import torch
import torchvision
import matplotlib.pyplot as plt
import torch.nn as nn

from typing import Tuple
from typing import List
from copy import deepcopy, copy

In [None]:
from torchvision import transforms
from torchvision.datasets import MNIST
from torchvision.transforms import Lambda
from torchvision.transforms import Compose, ToTensor

## download data to calculate mean and std

In [None]:
train_data = MNIST(
    root='.',
    download=True,
    train=True, 
    )

test_data = MNIST(
    root='.',
    download=True,
    train=False, 
)


train_data.data = (train_data.data/255.0)  
test_data.data = (test_data.data/255.0)


def calculate_mean_and_std() -> Tuple[float, float]:
    mean = train_data.data.mean()
    std = train_data.data.std()
    return mean, std

mean, std = calculate_mean_and_std()

## Download data once again + normalization and shape transformation using mean and std

In [None]:
transform = transforms.Compose([
        transforms.ToTensor(), 
        transforms.Lambda(lambda x: x.view(784)),
        transforms.Lambda(lambda x: (x-mean)/std),       
]) 

train_data = MNIST(
    root = '.', 
    download = True, 
    train = True, 
    transform = transform
)

test_data = MNIST(
    root='.',
    download=True,
    train=False, 
    transform = transform
)

## Loader + check on if it works: x,y - sample bath. 

we may check that mean each x is a normalized vectof of dimenstion 784,
for example x[1].std(), x[1].mean()

In [None]:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=100)
x,y = next(iter(train_loader))

assert len(x.shape) == 2
assert x.shape == (100, 784)

In [None]:
print(x.shape)
print(y.shape)

print("Checking, whether normalization in transforms is working.")

for i in range(100):
    if i%7==0:
        print("Sample image in bath size:\t mean(x) = {:}\t std(x)={:}".format(x[i].mean(), x[i].std()))

In [None]:
train_data.data.size()

## Neural network

In [None]:
class CustomNetwork(object):
    
    """
    Simple D-layer linear neural network 
    hidden_dims = topule(n0, n1, n2, ...nD)
    n0 = input layer
    n_D = output layer
    """
    
    def __init__(self, D, layers_dim):
        
        """
        Initialize network's weights according to Gaussian iid and network's biases with 0.0 values
        """
        
        self.weights = []
        self.biases = []
        
        self.D = len(layers_dim)-1
        assert self.D == D
        
        print("Depth of the network = number of hidden layers + 1:", D)
        
        for i in range(self.D):
            
            weight: torch.Tensor = torch.randn((layers_dim[i+1], layers_dim[i]) )
            bias: torch.Tensor = torch.zeros(layers_dim[i+1])  
            
            weight.requires_grad = True
            bias.requires_grad = True
            
            self.weights.append(weight)
            self.biases.append(bias)
                          
            
    def __call__(self, x: torch.Tensor) -> torch.Tensor:
        """
        Forward pass through the network
        """
        
        fl = torch.tanh( torch.matmul(x, self.weights[0].T) + self.biases[0])       
        for i in range(1,self.D):            
            out = torch.tanh( torch.matmul(fl, self.weights[i].T) + self.biases[i])
            fl = out
        return out
    
    
    def parameters(self) -> List[torch.Tensor]:
        """
        Returns all trainable parameters 
        """
        return self.weights+self.biases

In [None]:
from torch import nn
from torch.optim import SGD
from torch.nn.functional import cross_entropy


input_dim = 784
output_dim = 10
D = 3

# initialize the model
model: CustomNetwork = CustomNetwork(D = D, layers_dim = (input_dim, 100, 50, output_dim))

    
# initialize the optimizer using the hyperparams below
lr: float = 0.005
momentum: float = 0.99
optimizer: torch.optim.Optimizer = SGD(params = model.parameters(), lr=lr, momentum = momentum)
criterion = nn.CrossEntropyLoss()

## Training loop

In [None]:
# some hyperparams
batch_size: int = 6000
epoch: int = 250
    
    
# prepare data loaders, based on the already loaded datasets
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size)


loss_tab = []
test_accuracy_tab = []
parameter_tab =[]


for e in range(epoch):   
    
    for i, (x, y) in enumerate(train_loader):
        # reset the gradients from previouis iteration
        optimizer.zero_grad()

        # pass through the network
        output: torch.Tensor = model(x)        
        loss: torch.Tensor = criterion(output, y)
                
        # backward pass thorught the network
        loss.backward()
        
        # apply the gradients
        optimizer.step()
  
        # log the loss value
        if (i + 1) % 100 == 0:
            print(f"Epoch {e} iter {i+1}/{len(train_data) // batch_size} loss: {loss.item()}", end="\r")           
            
    
    ## at the end of an epoch save weights, biasses and loss on training set
    loss_tab.append(loss.item())
    
    print("\n\n\nepoch e={:}\t iteration i={:}\t model parameters = {:}".format(e,i, model.parameters()))    
    parameter_tab.append(deepcopy(model.parameters()))
    
    
    # at the end of an epoch run evaluation on the test set
    with torch.no_grad():
        # initialize the number of correct predictions
        correct: int = 0 
        for i, (x, y) in enumerate(test_loader):
            # pass through the network            
            output: torch.Tensor = model(x)
            
            # update the number of correctly predicted examples
            pred_labels = torch.argmax(output, 1)
            correct += ( (pred_labels -y) ==0).sum()
        
        test_accuracy_tab.append( float(correct) / len(test_data))
        print(f"\n\n\nTest accuracy: {float(correct) / len(test_data)}")

     

In [None]:
fig, axs = plt.subplots(1, 2, figsize = (15,5))

axs[0].plot(loss_tab)
axs[1].plot(test_accuracy_tab)

axs[0].set_xlabel('epoch')
axs[0].set_ylabel('loss')
axs[0].grid(True)

axs[1].set_xlabel('epoch')
axs[1].set_ylabel('test accuracy')
axs[1].grid(True)

plt.show()


## weights/ biases analysis

parameter_tab is a list of lenght = number of epochs, each item consists of weights and biases values in the given iteration. For D = 3 it is
$$parameter\_tab[0] = [weights_1, weights_2, weights_3, biases_1, biases_2, biases_3 ]$$

the dimesnion of each item is determinated by the architecture of our neural network. In general:
$dim(weights_1)\neq dim(weights_2)$  and $dim(biases_1)\neq dim(biases_2)$ 

layers: 784, 10, 5, 10

In [None]:
len(parameter_tab)

In [None]:
len(parameter_tab[0])

In [None]:
params = parameter_tab[1]

w1 = params[0]
w2 = params[1]
w3 = params[2]
b1 = params[3]
b2 = params[4]
b3 = params[5]

print("size: w1 = {:}\tw2 = {:}\tw3 = {:}".format(w1.size(), w2.size(), w3.size() ))
print("size: b1 = {:}\tb2 = {:}\tb3 = {:}".format(b1.size(), b2.size(), b3.size() ))
print("\nb1 = ", b1)
print("\nb2 = ", b2)
print("\nb3 = ", b3)

In [None]:
params = parameter_tab[1]

w1 = params[0]
w2 = params[1]
w3 = params[2]
b1 = params[3]
b2 = params[4]
b3 = params[5]

print("size: w1 = {:}\tw2 = {:}\tw3 = {:}".format(w1.size(), w2.size(), w3.size() ))
print("size: b1 = {:}\tb2 = {:}\tb3 = {:}".format(b1.size(), b2.size(), b3.size() ))
print("\nb1 = ", b1)
print("\nb2 = ", b2)
print("\nb3 = ", b3)

In [None]:
params = parameter_tab[37]

w1 = params[0]
w2 = params[1]
w3 = params[2]
b1 = params[3]
b2 = params[4]
b3 = params[5]

print("size: w1 = {:}\tw2 = {:}\tw3 = {:}".format(w1.size(), w2.size(), w3.size() ))
print("size: b1 = {:}\tb2 = {:}\tb3 = {:}".format(b1.size(), b2.size(), b3.size() ))
print("\nb1 = ", b1)
print("\nb2 = ", b2)
print("\nb3 = ", b3)