# Zadanie 5


Celem ćwiczenia jest implementacja perceptronu wielowarstwowego oraz wybranego algorytmu optymalizacji gradientowej z algorytmem propagacji wstecznej.

Następnie należy wytrenować perceptron wielowarstwowy do klasyfikacji zbioru danych [MNIST](http://yann.lecun.com/exdb/mnist/). Zbiór MNIST dostępny jest w pakiecie `scikit-learn`.

Punktacja:
1. Implementacja propagacji do przodu (`forward`) [1 pkt]
2. Implementacja wstecznej propagacji (`backward`) [2 pkt]
3. Przeprowadzenie eksperymentów na zbiorze MNIST, w tym:
    1. Porównanie co najmniej dwóch architektur sieci [1 pkt]
    2. Przetestowanie każdej architektury na conajmniej 3 ziarnach [1 pkt]
    3. Wnioski [2.5 pkt]
4. Jakość kodu [0.5 pkt]

Polecane źródła - teoria + intuicja:
1. [Karpathy, CS231n Winter 2016: Lecture 4: Backpropagation, Neural Networks 1](https://www.youtube.com/watch?v=i94OvYb6noo&ab_channel=AndrejKarpathy)
2. [3 Blude one Brown, Backpropagation calculus | Chapter 4, Deep learning
](https://www.youtube.com/watch?v=tIeHLnjs5U8&t=4s&ab_channel=3Blue1Brown)


In [None]:
from abc import abstractmethod, ABC
from typing import List
import numpy as np
import matplotlib.pyplot as plt
import copy
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

# Load MNIST
mnist = fetch_openml('mnist_784', version=1)
x, y = mnist.data / 255.0, mnist.target.astype(int)
y = y.to_numpy()
# One-hot encode the labels
encoder = OneHotEncoder(sparse=False)
y_encoded = encoder.fit_transform(y.reshape(-1, 1))




  warn(


In [54]:
class Layer(ABC):
    """Basic building block of the Neural Network"""

    def __init__(self) -> None:
        self._learning_rate = 0.01

    @abstractmethod
    def forward(self, x:np.ndarray)->np.ndarray:
        """Forward propagation of x through layer"""
        pass

    @abstractmethod
    def backward(self, output_error_derivative) ->np.ndarray:
        """Backward propagation of output_error_derivative through layer"""
        pass

    @property
    def learning_rate(self):
        return self._learning_rate

    @learning_rate.setter
    def learning_rate(self, learning_rate):
        assert learning_rate < 1, f"Given learning_rate={learning_rate} is larger than 1"
        assert learning_rate > 0, f"Given learning_rate={learning_rate} is smaller than 0"
        self._learning_rate = learning_rate

class FullyConnected(Layer):
    def __init__(self, input_size:int, output_size:int) -> None:
        super().__init__()
        self.input_size = input_size
        self.output_size = output_size
        self.weights = np.random.randn(input_size, output_size) * 0.01
        self.biases = np.random.randn(output_size) * 0.01

    def forward(self, x:np.ndarray)->np.ndarray:
        self.input = x
        self.output = np.dot(x, self.weights) + self.biases
        return self.output

    def backward(self, output_error_derivative)->np.ndarray:
        # Gradients for weights and biases
        weights_gradient = np.dot(self.input.T, output_error_derivative)
        weights_gradient = np.clip(weights_gradient, -1.0, 1.0)
        biases_gradient = np.sum(output_error_derivative, axis=0, keepdims=False)

        # Update weights and biases
        self.weights -= self.learning_rate * weights_gradient
        self.biases -= self.learning_rate * biases_gradient

        # Propagate the error backward
        return np.dot(output_error_derivative, self.weights.T)

class Tanh(Layer):
    def __init__(self) -> None:
        super().__init__()

    def forward(self, x:np.ndarray)->np.ndarray:
        self.input = x
        return np.tanh(x)

    def backward(self, output_error_derivative)->np.ndarray:
        tanh_derivative = 1 - np.tanh(self.input) ** 2
        return output_error_derivative * tanh_derivative

class Loss:
    def __init__(self, loss_function:callable, loss_function_derivative:callable)->None:
        self.loss_function = loss_function
        self.loss_function_derivative = loss_function_derivative



    def loss(self, y_pred:np.ndarray, y_true:np.ndarray)->np.ndarray:
        """Loss function for a particular x"""
        return self.loss_function(y_pred, y_true)

    def loss_derivative(self, y_pred:np.ndarray, y_true:np.ndarray)->np.ndarray:
        """Loss function derivative for a particular x and y"""
        return self.loss_function_derivative(y_pred, y_true)

class Mse(Loss):
    def __init__(self):
        super().__init__(self.mse, self.mse_derivative)

    @staticmethod
    def mse(y_pred, y_true):
        return np.mean((y_pred - y_true) ** 2)

    @staticmethod
    def mse_derivative(y_pred, y_true):
        return 2 * (y_pred - y_true) / y_true.size

class Network:
    def __init__(self, layers:List[Layer], learning_rate:float)->None:
        self.layers = layers
        self.learning_rate = learning_rate

    def compile(self, loss:Loss)->None:
        """Define the loss function and loss function derivative"""
        self.loss = loss

    def __call__(self, x:np.ndarray) -> np.ndarray:
        """Forward propagation of x through all layers"""
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def fit(self,
            x_train:np.ndarray,
            y_train:np.ndarray,
            epochs:int,
            learning_rate:float,
            verbose:int=0)->None:
        """Fit the network to the training data"""

        if learning_rate:
            self.learning_rate = learning_rate
            for layer in self.layers:
                layer.learning_rate = self.learning_rate


        accuracy_during_epoches = []
        lost_during_epoches = []

        for epoch in range(epochs):
            total_loss = 0
            correct_predictions = 0

            # Forward pass
            predictions = self(x_train)

            # Compute loss
            loss_value = self.loss.loss(predictions, y_train)

            correct_predictions += (predictions.argmax(axis=1) == y_train.argmax(axis=1)).sum()

            # Backward pass
            loss_gradient = self.loss.loss_derivative(predictions, y_train)
            for layer in reversed(self.layers):
                loss_gradient = layer.backward(loss_gradient)
            avg_accuracy = correct_predictions / len(x_train)
            accuracy_during_epoches.append(avg_accuracy)
            lost_during_epoches.append(total_loss)

            # Verbose output
            if verbose and epoch % verbose == 0:
                print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss_value:.4f}, Accuracy: {avg_accuracy:.4f}")

        return accuracy_during_epoches, lost_during_epoches

# Eksperymenty

In [22]:
layer_accuracy = []
layer_loss = []
layers1 = [
    FullyConnected(784, 128),
    Tanh(),
    FullyConnected(128, 32),
    Tanh(),
    FullyConnected(32, 10),
    Tanh()
]

layers2 = [
    FullyConnected(784, 64),
    Tanh(),
    FullyConnected(64, 10),
    Tanh()
]
layers = [layers1,layers2]
SEEDS = [0, 42, 100]

In [None]:

# Train the network
network = Network(
    layers=[FullyConnected(784, 128),
    Tanh(),
    FullyConnected(128, 32),
    Tanh(),
    FullyConnected(32, 10),
    Tanh()],
    learning_rate=0.01
)
SEEDS = [10, 50, 100]
# Split into training and test sets
x_train, x_test, y_train, y_test = train_test_split(x, y_encoded, test_size=0.2, random_state=42)
# Compile the network with the loss function
loss = Loss(
    loss_function=lambda y_pred, y_true: np.mean((y_pred - y_true) ** 2),
    loss_function_derivative=lambda y_pred, y_true: 2 * (y_pred - y_true) / y_true.size
)

network.compile(loss)
network.fit(x_train, y_train, epochs=40, learning_rate=0.1, verbose=1)

Epoch 1/40, Loss: 0.0999, Accuracy: 0.0987
Epoch 2/40, Loss: 0.0995, Accuracy: 0.0987
Epoch 3/40, Loss: 0.0991, Accuracy: 0.0987
Epoch 4/40, Loss: 0.0987, Accuracy: 0.0987
Epoch 5/40, Loss: 0.0984, Accuracy: 0.0987
Epoch 6/40, Loss: 0.0980, Accuracy: 0.0987
Epoch 7/40, Loss: 0.0977, Accuracy: 0.0987
Epoch 8/40, Loss: 0.0974, Accuracy: 0.0987
Epoch 9/40, Loss: 0.0971, Accuracy: 0.0987
Epoch 10/40, Loss: 0.0968, Accuracy: 0.0987
Epoch 11/40, Loss: 0.0965, Accuracy: 0.0987
Epoch 12/40, Loss: 0.0963, Accuracy: 0.0987
Epoch 13/40, Loss: 0.0960, Accuracy: 0.0987


KeyboardInterrupt: 

# Wnioski