# Deep Leaning : 

In [36]:
import numpy as np
from abc import ABC, abstractmethod

Let's start with a simple neural network :

$$ z = x^Tw$$

$$ MSE = (Xw - y )^T(Xw - y)$$

In [458]:
class Tensor(np.ndarray):

    def __new__(cls, input: 'Tensor'):
        if not isinstance(input, np.ndarray): input = np.array(input)
        if input.ndim ==1 : input = input.reshape(1, -1)
        
        obj = np.asanyarray(input).view(cls)

        return obj

    @staticmethod
    def rand(*shape: tuple[int]) -> Tensor:
        return Tensor(np.random.rand(*shape))

    @staticmethod
    def zero(*shape: tuple[int]) -> Tensor:
        return Tensor(np.zeros(shape))
    
    @staticmethod
    def dot(t1: Tensor, t2: Tensor) -> Tensor:
        return Tensor(np.dot(t1, t2))

In [459]:
class Layer(ABC):

    @abstractmethod
    def forward(self, input: Tensor) -> Tensor:
        ... 

    def __call__(self, input: Tensor) -> Tensor:
        return self.forward(input)

    def backward(self):
        pass
    
    def __repr__(self) -> str:
        return f"{self.__class__.__name__}({self.wparam})"

In [552]:
class Linear(Layer):
    
    def __init__(self, *shape: tuple[int]):

        self.grad: Tensor = Tensor.zero(*shape).T
        self.p_shape: tuple[int] = shape
        self.wparam: Tensor = Tensor.rand(*shape)
        self.parameters = [{'Linear': self.wparam}]
        self.x: Tensor = Tensor.zero(*shape)

    def forward(self, input: Tensor):
        assert isinstance(input, Tensor), f"input must be a Tensor, not a {input.__class__.__name__}."
        self.x = input
        return Tensor.dot(input, self.wparam)

Linear is extend from Layer class and has the following formula : 

$$ L(x) = x^Tw$$

and also support matrix : 

$$ L(X) = X^Tw$$

In [553]:
def set_params(fun):
    def wrapper(self):
        
        self.parameters += list(map(lambda key: self.__dict__[key], filter(lambda key: isinstance(self.__dict__[key], Layer), self.__dict__.keys())))
        
        return fun(self)
    return wrapper

In [554]:
class Module:
    
    def __init__(self):
        self.parameters: list[Layer]  = []
        
        self.l1 = Linear(2, 1)

        self.set_params()

    def set_params(self):
        self.parameters += list(map(lambda key: self.__dict__[key], filter(lambda key: isinstance(self.__dict__[key], Layer), self.__dict__.keys())))
    
    def forward(self, input: Tensor) -> Tensor:
        return self.l1(input)

    def __call__(self, input: Tensor) -> Tensor:
        return self.forward(input)

In [555]:
class Optimizer:

    def __init__(self, parameters: list[Layer], lr=0.01):
        self.parameters = parameters 
        self.lr = lr 

    def zero_grad(self):
        for param in self.parameters:
            param.grad = Tensor.zero(*param.p_shape)

    def step(self):
        for param in self.parameters:
            param.wparam -= self.lr * param.grad

In [556]:
class MSELoss:

    def __init__(self, parameters: list[Layer]):
        self.parameters = parameters

    def backward(self, y_pred: Tensor, y: Tensor):
        last = self.parameters[0]
        last.grad = (y_pred - y)*last.x
        print(last.grad.shape)
        last.backward()

In [557]:
data = np.loadtxt("./data.csv")

In [558]:
X, y = data[:, :-1], data[:, -1]

In [559]:
X, y = Tensor(X), Tensor(y).reshape(-1, 1)

In [560]:
model = Module()

In [561]:
model.parameters

[Linear([[0.7160776 ]
  [0.60862983]])]

In [562]:
model(Tensor([1, 2]))

Tensor([[1.93333726]])

In [563]:
opt = Optimizer(model.parameters)

In [564]:
mse = MSELoss(model.parameters)

In [565]:
for i in range(1):

    for xi, yi in zip(X, y):

        opt.zero_grad()
        
        y_pred = model(Tensor(xi))
        
        mse.backward(y_pred, Tensor(yi))

        opt.step()

(1, 2)


ValueError: non-broadcastable output operand with shape (2,1) doesn't match the broadcast shape (2,2)

In [499]:
model.parameters

[Linear([[0.25720553]
  [0.87054233]]),
 Linear([[0.25720553]
  [0.87054233]]),
 Linear([[0.25720553]
  [0.87054233]]),
 Linear([[0.25720553]
  [0.87054233]]),
 Linear([[0.25720553]
  [0.87054233]]),
 Linear([[0.25720553]
  [0.87054233]]),
 Linear([[0.25720553]
  [0.87054233]]),
 Linear([[0.25720553]
  [0.87054233]]),
 Linear([[0.25720553]
  [0.87054233]]),
 Linear([[0.25720553]
  [0.87054233]]),
 Linear([[0.25720553]
  [0.87054233]]),
 Linear([[0.25720553]
  [0.87054233]]),
 Linear([[0.25720553]
  [0.87054233]]),
 Linear([[0.25720553]
  [0.87054233]]),
 Linear([[0.25720553]
  [0.87054233]]),
 Linear([[0.25720553]
  [0.87054233]]),
 Linear([[0.25720553]
  [0.87054233]]),
 Linear([[0.25720553]
  [0.87054233]]),
 Linear([[0.25720553]
  [0.87054233]]),
 Linear([[0.25720553]
  [0.87054233]]),
 Linear([[0.25720553]
  [0.87054233]]),
 Linear([[0.25720553]
  [0.87054233]]),
 Linear([[0.25720553]
  [0.87054233]]),
 Linear([[0.25720553]
  [0.87054233]]),
 Linear([[0.25720553]
  [0.87054233]]),
