# Backpropagation

## We have to consider the following steps

## How to start now?

## Prepare your dataset

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import tqdm

def load_mnist_data(root_path='./data', batch_size=4):
    transform = transforms.Compose(
        [transforms.ToTensor(),
        transforms.Normalize((0.5), (0.5))]
    )

    trainset = torchvision.datasets.MNIST(root=root_path, train=True, download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

    testset = torchvision.datasets.MNIST(root=root_path, train=False, download=True, transform=transform)
    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)
    return trainloader, testloader

## Building your neural network

In [1]:
import numpy as np
from typing import Any, Callable, Tuple

import numpy as np
from typing import Any, Callable, Tuple

class MyWeightTensor:
    def __init__(self, shape: Tuple or int, init_weight_fn: Callable = np.random.randn, init_weights: 'MyWeightTensor' or np.ndarray or int or float = None):
        assert isinstance(shape, tuple) or isinstance(shape, int) or isinstance(shape, float), f'Allowed shapes: tuple, int, float, got: {type(shape)}'
        self.shape = shape

        if init_weights is not None:
            if isinstance(init_weights, MyWeightTensor):
                self.values = init_weights.values
            else:
                if isinstance(shape, tuple):
                    assert isinstance(init_weights, np.ndarray)
                else:
                    assert isinstance(init_weights, int) or isinstance(init_weights, float)
                
                self.values = init_weights
        else:
            if isinstance(shape, int):
                self.shape = (self.shape,)
                self.values = init_weight_fn(shape)
            else:
                self.values = init_weight_fn(*shape)
    
    @property
    def T(self) -> 'MyWeightTensor':
        _T = self.values.T
        return MyWeightTensor(shape=_T.shape, init_weights=_T)
    
    def __add__(self, other) -> 'MyWeightTensor':
        if isinstance(other, MyWeightTensor):
            other = other.values
        else:
            assert isinstance(other, np.ndarray) or isinstance(other, int) or isinstance(other, float)
        
        return MyWeightTensor(shape=self.values.shape, init_weights=self.values + other)

    def __mul__(self, other) -> 'MyWeightTensor':
        if isinstance(other, MyWeightTensor):
            other = other.values
        else:
            assert isinstance(other, np.ndarray) or isinstance(other, int) or isinstance(other, float)
        
        _dot = np.dot(self.values, other)

        return MyWeightTensor(shape=_dot.shape, init_weights=_dot)


###############################
# For creating a linear layer #
###############################
class MyLinearLayer:
    def __init__(self, in_features: int, out_features: int, init_weight_fn: Callable = np.random.randn) -> None:
        self.in_features = in_features
        self.out_features = out_features

        self.weights = MyWeightTensor(shape=(out_features, in_features), init_weight_fn=init_weight_fn)
        self.bias = MyWeightTensor(shape=out_features, init_weight_fn=init_weight_fn)

        self.latest_input = None
        self.latest_output = None

    def __call__(self, tensor: np.ndarray or MyWeightTensor) -> MyWeightTensor:
        self.latest_input = tensor

        bs = -1
        if len(tensor.shape) == 2:
            # batch size included
            bs = tensor.shape[0]
            _w = self.weights * tensor.T
        else:
            _w = self.weights * tensor
        
        _bias = self.bias.values
        if bs != -1:
            _bias = np.tile(_bias, bs).reshape(bs, -1)
        
        self.latest_output = (_w + _bias.T).T

        return MyWeightTensor(shape=self.latest_output.shape, init_weights=self.latest_output)
    
    def derivative(self) -> float:
        assert self.latest_output is not None, 'Cannot calculate grad without a single forward pass.'
        # Linear activation derivation
        return np.ones(shape=self.latest_output.shape)

## Implement your loss function(s)

In [3]:
# Your code    

## Implement the training loop

In [4]:
# Your code

# Consider the following steps:
# 1) Loop through your training data
#   1. 1) Choose number of epochs (How often do you want to loop through your complete dataset?)
# 2) Forward the data through your network
# 3) Calculate the loss
# 4) Perform backpropagation with SGD and update the weights
#   4. 1) Choose a learning rate to update your weights
# Repeat 1, 2, 3, 4 until the training converges or maximum epochs are reached