In [None]:
import numpy as np
import torch
import torchvision
from typing import Tuple

In [103]:
from torchvision.datasets import MNIST
from torchvision.transforms import Lambda, ToTensor

def calculate_mean_and_std() -> Tuple[float, float]:
    mean = torch.mean(train_data.data, dtype=torch.float)/255
    std = torch.std(train_data.data.type(torch.FloatTensor))/255
    return mean, std
mean, std = calculate_mean_and_std()

train_data = MNIST(
    root='.',
    download=True,
    train=True,
    transform=torchvision.transforms.Compose([
        ToTensor(),
        Lambda(lambda t : torch.reshape(t, (-1,)))
    ])
)

test_data = MNIST(
    root='.',
    download=True,
    train=False,
    transform=torchvision.transforms.Compose([
        ToTensor(),
        Lambda(lambda t : torch.reshape(t, (-1,)))
    ])
)
# print(train_data.data[0])

In [104]:
mean, std = calculate_mean_and_std()
assert np.isclose(mean, 0.1306, atol=1e-4)
assert np.isclose(std, 0.3081, atol=1e-4)

In [105]:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=10)

x, y = next(iter(train_loader))
assert len(x.shape) == 2
assert x.shape == (10, 784)

In [None]:
# Zadanie 2


# 1
# Można go rozwiązać metodami z poprzednich zajęć. Jest łatwiejszy ponieważ klasy można rozdzielić prostą.
# Bardziej nadaje się sieć z jednym neuronem

# 2
# wystarczy zanurzenie x1 ** 2, x2 ** 2
# przy x1, x2 dobrze działa sieć 3-1
# Nie udało się. Być może dlatego, że złożenie funkcji liniowych jest funkcją liniową.

# 3
# learning_rate=0.003, activation=Tanh, bez regularyzacji, sieć 8-7-7-7-6, zanurzenia - wszystkie poza x1 x2
# rozwiązania dobrze generalizujące "rozumieją" wzorzec. W zaproponowane przeze mnie sieci tak się nie dzieje - górna połowa płaszczyzny jest pomarańczowa a dolna niebieska, gdyby rozwijać dalej spiralę to funkcja kosztu by gwałtownie wzrosła.

In [130]:
from typing import List

class CustomNetwork(object):
    """
    Simple 1-hidden layer linear neural network
    """
    def __init__(self, input_dim, hidden_dim, output_dim):
        """
        Initialize network's weights
        """

        self.weight_1: torch.Tensor = torch.normal(mean=0.0, std=1.0, size=(input_dim, hidden_dim))
        self.bias_1: torch.Tensor = torch.zeros(size=(input_dim, hidden_dim))

        self.weight_2: torch.Tensor = torch.normal(mean=0.0, std=1.0, size=(hidden_dim, output_dim))
        self.bias_2: torch.Tensor = torch.zeros(size=(hidden_dim, output_dim))

    def __call__(self, x: torch.Tensor) -> torch.Tensor:
        """
        Forward pass through the network
        """
        first_layer = torch.tanh_(x @ self.weight_1 + self.bias_1)
        return first_layer @ self.weight_2 + self.bias_2

    def parameters(self) -> List[torch.Tensor]:
        """
        Returns all trainable parameters
        """
        return [self.weight_1, self.bias_1, self.weight_2, self.bias_2]

train_loader = torch.utils.data.DataLoader(train_data, batch_size=64)
model = CustomNetwork(784, 64, 1)
for i, (x, y) in enumerate(train_loader):
    if i == 2:
        print(f'x.size: {x.shape}, weight_1.shape: {model.weight_1.shape}')
        output = model.__call__(x)
        break

x.size: torch.Size([64, 784]), weight_1.shape: torch.Size([784, 64])


RuntimeError: The size of tensor a (64) must match the size of tensor b (784) at non-singleton dimension 0

In [126]:
from torch import nn
from torch.optim import SGD
from torch.nn.functional import cross_entropy

# some hyperparams
batch_size: int = 64
epoch: int = 3


# prepare data loaders, based on the already loaded datasets
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size)

# initialize the model
model: CustomNetwork = CustomNetwork(784, 15, 1)

# initialize the optimizer using the hyperparams below
lr: float = 0.01
momentum: float = 0.9
optimizer: torch.optim.Optimizer = SGD(lr=lr, momentum=momentum, params=model.parameters())

criterion = nn.CrossEntropyLoss()

# training loop
for e in range(epoch):
    for i, (x, y) in enumerate(train_loader):
        # reset the gradients from previouis iteration
        optimizer.zero_grad()
        # pass through the network
        output: torch.Tensor = model.__call__(x=x)
        # calculate loss
        loss: torch.Tensor = criterion(output, y)
        # backward pass thorught the network
        loss.backward()
        # apply the gradients
        optimizer.step()

        # log the loss value
        if (i + 1) % 100 == 0:
            print(f"Epoch {e} iter {i+1}/{len(train_data) // batch_size} loss: {loss.item()}", end="\r")

    # at the end of an epoch run evaluation on the test set
    with torch.no_grad():
        # initialize the number of correct predictions
        correct: int = 0
        for i, (x, y) in enumerate(test_loader):
            # pass through the network
            output: torch.Tensor = model.__call__(x=x)
            # update the number of correctly predicted examples
            correct += torch.sum(output==y)

        print(f"\nTest accuracy: {correct / len(test_data)}")


# this is your test
assert correct / len(test_data) > 0.82, "Subject to random seed you should be able to get >82% accuracy"

x.size: torch.Size([64, 784]), weight_1.shape: torch.Size([784, 15])


RuntimeError: The size of tensor a (64) must match the size of tensor b (784) at non-singleton dimension 0