# Zadanie 6


Celem ćwiczenia jest implementacja perceptronu wielowarstwowego oraz wybranego algorytmu optymalizacji gradientowej z algorytmem propagacji wstecznej.

Następnie należy wytrenować perceptron wielowarstwowy do klasyfikacji zbioru danych [MNIST](http://yann.lecun.com/exdb/mnist/). Zbiór MNIST dostępny jest w pakiecie `scikit-learn`.

Punktacja:
1. Implementacja propagacji do przodu (`forward`) [1 pkt]
2. Implementacja wstecznej propagacji (zademonstrowana na bramce XOR) (`backward`) [2 pkt]
3. Przeprowadzenie eksperymentów na zbiorze MNIST, w tym:
    1. Porównanie co najmniej dwóch architektur sieci [1 pkt]
    2. Przetestowanie każdej architektury na conajmniej 3 ziarnach [1 pkt]
    3. Wnioski 1.[5 pkt]
4. Jakość kodu 0.[5 pkt]

Polecane źródła - teoria + intuicja:
1. [Karpathy, CS231n Winter 2016: Lecture 4: Backpropagation, Neural Networks 1](https://www.youtube.com/watch?v=i94OvYb6noo&ab_channel=AndrejKarpathy)
2. [3 Blude one Brown, Backpropagation calculus | Chapter 4, Deep learning
](https://www.youtube.com/watch?v=tIeHLnjs5U8&t=4s&ab_channel=3Blue1Brown)


In [2]:
from abc import abstractmethod, ABC
from typing import List
import numpy as np


In [96]:
class Layer(ABC):
    """Basic building block of the Neural Network"""

    def __init__(self) -> None:
        self._learning_rate = 0.01
        self.X = None

    @abstractmethod
    def forward(self, x:np.ndarray)->np.ndarray:
        """Forward propagation of x through layer"""
        raise NotImplementedError

    @abstractmethod
    def backward(self, output_error_derivative) ->np.ndarray:
        """Backward propagation of output_error_derivative through layer"""
        raise NotImplementedError

    @property
    def learning_rate(self):
        return self._learning_rate

    @learning_rate.setter
    def learning_rate(self, learning_rate):
        assert learning_rate < 1, f"Given learning_rate={learning_rate} is larger than 1"
        assert learning_rate > 0, f"Given learning_rate={learning_rate} is smaller than 0"
        self._learning_rate = learning_rate

class FullyConnected(Layer):
    def __init__(self, input_size:int, output_size:int) -> None:
        super().__init__()
        self.input_size = input_size
        self.output_size = output_size
        self.weights = np.matrix(np.random.uniform(low=-1/np.sqrt(input_size), high=1/np.sqrt(input_size), size=(input_size, output_size)))
        self.biases = np.random.randn(1, output_size)

    def forward(self, x:np.ndarray)->np.ndarray:
        self.X = x
        return x @ self.weights + self.biases

    def backward(self, output_error_derivative)->np.ndarray:
        input_error = output_error_derivative @ self.weights.T
        weights_error = self.X.T @ output_error_derivative
        self.weights -= self.learning_rate * weights_error
        self.biases -= self.learning_rate * output_error_derivative
        return input_error

class Tanh(Layer):
    def __init__(self) -> None:
        super().__init__()

    def forward(self, x:np.ndarray)->np.ndarray:
        self.X = x
        return np.tanh(x)

    def backward(self, output_error_derivative)->np.ndarray:
        print(self.X.shape)
        return (1 - np.tanh(self.X)**2) * np.mean(output_error_derivative)

class Loss:
    def __init__(self, loss_function:callable, loss_function_derivative:callable)->None:
        self.loss_function = loss_function
        self.loss_function_derivative = loss_function_derivative

    def loss(self, x:np.ndarray, y:np.ndarray)->np.ndarray:
        return self.loss_function(x, y)

    def loss_derivative(self, x:np.ndarray, y:np.ndarray)->np.ndarray:
        """Loss function derivative for a particular x and y"""
        return self.loss_function_derivative(x, y)

class Network:
    def __init__(self, layers:List[Layer], learning_rate:float)->None:
        self.layers = layers
        self.learning_rate = learning_rate

    def compile(self, loss:Loss)->None:
        """Define the loss function and loss function derivative"""
        self.loss = loss.loss
        self.loss_derivative = loss.loss_derivative

    def __call__(self, x:np.ndarray) -> np.ndarray:
        """Forward propagation of x through all layers"""
        layer_output = x
        for layer in self.layers:
            layer_output = layer.forward(layer_output)
        return layer_output

    def fit(self,
            x_train:np.ndarray,
            y_train:np.ndarray,
            epochs:int,
            learning_rate:float,
            verbose:int=0)->None:
        """Fit the network to the training data"""
        number_of_samples = len(x_train)
        for i in range(epochs):
            loss_value = 0
            for j in range(number_of_samples):
                output = self(x_train[j])
                loss_value += self.loss(y_train[j], output)
            error = self.loss_derivative(y_train[j], output)
            for layer in reversed(self.layers):
                error = layer.backward(error)
            loss_value /= number_of_samples
            if verbose > 0 and i % verbose == 0:
                print(f"epoch: {i}; loss_value={loss_value}")
                

In [97]:
np.array([1,2]) ** 2

array([1, 4])

In [98]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split


# Eksperymenty

In [99]:
def mse(x:np.ndarray, y:np.ndarray):
    return np.power(y - x, 2)
def mse_derivative(x: np.ndarray, y: np.ndarray):
    return 2 * (y - x)/len(y)
loss = Loss(mse, mse_derivative)

x_train = np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]]])
y_train = np.array([[[0]], [[1]], [[1]], [[0]]])

# network
net = Network([FullyConnected(2, 3), Tanh(), FullyConnected(3, 1), Tanh()], learning_rate=0.1)
net.compile(loss)

net.fit(x_train, y_train, epochs=1000, learning_rate=0.1, verbose=100)

# test
out = net(x_train)
print(out)

(1, 1)
(1, 3)


LinAlgError: Last 2 dimensions of the array must be square

In [69]:
a = np.array([2, 3])
b = np.array([1, 1])
np.mean(a)

2.5

# Wnioski

In [7]:
a = np.matrix(np.random.random((2,3)))
np.c_[a, np.random.random((2, 1))]

matrix([[0.5910934 , 0.02975142, 0.76083433, 0.62899763],
        [0.57939911, 0.11221394, 0.30080693, 0.63085744]])