In [37]:
import numpy as np
import itertools

In [252]:
class tensor:
    def __init__(self, fromArray=np.zeros((2,2)), _children = (), _operation = ''):
        fromArray = fromArray if isinstance(fromArray, np.ndarray) else np.array(fromArray)
        #assert len(fromArray.shape) == 2, "Only 2D Tensors or Scalar to 2D Supported!"
        self.matrix = fromArray
        #self.rows = fromArray.shape[0]
        #self.columns = fromArray.shape[1]
        self.shape = fromArray.shape
        self._prev = set(_children)
        self._operation = _operation
        self._backward = lambda : None
        self.grad = None


    def __repr__(self):
        return f"Tensor Values = {self.matrix}"
    
    @classmethod
    def zeros(cls, shape, dtype = np.float32):
        t = tensor()
        t.matrix = np.zeros(shape, dtype=dtype)
        t.shape = shape
        #t.rows = rows
        #t.columns = columns
        return t
    
    @classmethod
    def random(cls, shape, dtype = np.float32):
        t = tensor()
        t.matrix = (np.random.rand(*shape)).astype(dtype=dtype)
        t.shape = shape
        #t.rows = rows
        #t.columns = columns
        return t
    
    @classmethod
    def const(cls, shape, constant=1, dtype = np.float32):
        t = tensor()
        t.matrix = (np.full(shape, constant)).astype(dtype=dtype)
        t.shape = shape
        #t.rows = rows
        #t.columns = columns
        return t
    
    #Operations
    def __add__(self, other):
        other = self.checkOther(other)
        out_matrix = self.matrix + other.matrix

        def _backward():
            self.grad = np.zeros_like(self.matrix) if self.grad is None else self.grad
            other.grad = np.zeros_like(other.matrix) if other.grad is None else other.grad
            out1 = self.return_unbroadcasted(out)
            out2 = other.return_unbroadcasted(out)
            self.grad += out1 #Derivation in the notes. 
            other.grad += out2
        out = tensor(out_matrix, (self, other), '+')
        out._backward = _backward
        return out
    
    def __radd__(self, other):
        return self + other
    
    def __sub__(self, other):
        other = self.checkOther(other)
        return self + (-1 * other)
    
    
    def __rsub__(self, other):
        other = self.checkOther(other)
        return other + (-1 * other)
    

    def __mul__(self, other):
        other = self.checkOther(other)
        out_matrix = self.matrix * other.matrix
        def _backward():
            self.grad = np.zeros_like(out.grad) if self.grad is None else self.grad
            other.grad = np.zeros_like(out.grad) if other.grad is None else other.grad
            out1 = self.return_unbroadcasted(out)
            out2 = other.return_unbroadcasted(out)
            self.grad += out1* other.matrix #Derivation in the notes. 
            other.grad += out2 * self.matrix

        out = tensor(out_matrix, (self, other), '*')
        out._backward = _backward
        return out
    
    def __rmul__(self, other):
        return self * other

    def __matmul__(self, other):
        other = other if isinstance(other, tensor) else tensor(other)
        assert other.shape[-2] == self.shape[-1], f"Dimensions Unsupported for @, {self.shape} vs {other.shape}"
        out_matrix = self.matrix @ other.matrix
        def _backward():
            self.grad = np.zeros_like(self.matrix, dtype=float) if self.grad is None else self.grad
            other.grad = np.zeros_like(other.matrix, dtype=float) if other.grad is None else other.grad
            self.grad += out.grad @ (other.matrix).transpose()#Derivation in the notes.
            other.grad += (self.matrix).transpose() @ out.grad 
        out = tensor(out_matrix, (self, other), '@')
        out._backward = _backward
        return out
    
    def __rmatmul__(self, other):
        other = other if isinstance(other, tensor) else tensor(other)
        return other @ self
    
    def transpose(self):
        out_matrix = self.matrix.transpose()
        
        def _backward():
            self.grad = np.zeros_like(out.grad.transpose()) if self.grad is None else self.grad
            self.grad += (out.grad).transpose() #Not in note, but can be derived similarly.

        out = tensor(out_matrix, (self, ), 'T')
        out._backward = _backward

        return out
    
    def __pow__(self, N):
        assert isinstance(N, int | float), "Can only power up by scalars!"
        out_matrix = self.matrix ** N

        def _backward():
            self.grad = np.zeros_like(self.matrix) if self.grad is None else self.grad
            out1 = self.return_unbroadcasted(out)
            self.grad += N * (self.matrix ** (N-1)) * out1
        
        out = tensor(out_matrix, _children=(self, ), _operation="**")
        out._backward = _backward
        return out
    
    def __truediv__(self, other):
        other = self.checkOther(other)
        return self * (other**-1)
    
    def __rtruediv__(self, other):
        return other * (self**-1)
    
    def sum(self):
        out_matrix = np.array(([[self.matrix.sum()]]))

        def _backward():
            self.grad = np.zeros_like(self.matrix) if self.grad is None else self.grad
            self.grad += np.ones_like(self.matrix) * out.grad

        out = tensor(out_matrix, _children=(self, ), _operation='sum()')
        out._backward = _backward
        return out

    def mean(self):
        N = np.prod(self.shape)
        out_matrix = np.array(([[self.matrix.sum()/(N)]]))

        def _backward():
            self.grad = np.zeros_like(self.matrix) if self.grad is None else self.grad
            self.grad += np.ones_like(self.matrix) * out.grad / N

        out = tensor(out_matrix, _children=(self, ), _operation='mean()')
        out._backward = _backward
        return out
    
    def ReLU(self):
        out_matrix = np.maximum(0,self.matrix)

        def _backward():
            self.grad = np.zeros_like(self.matrix) if self.grad is None else self.grad
            self.grad += (self.matrix > 0).astype(self.matrix.dtype) * out.grad

        out = tensor(out_matrix, (self, ), "ReLU")
        out._backward = _backward
        return out
    
    #Helper Functions
    #def shape(self):
     #   return (self.rows, self.columns)

    def return_unbroadcasted(self, out):  
        added_axis = []
        stretched_axis = []
        for index, (first_no, second_no) in enumerate(itertools.zip_longest(self.shape, out.shape)):
            if ((first_no) == None):
                added_axis.append(index)
            elif ((first_no == 1) and (second_no > 1)):
                stretched_axis.append(index)
        grad = out.grad
        if stretched_axis:
            grad = np.sum(grad, axis = tuple(stretched_axis), keepdims=True)
        if added_axis:
            grad = np.sum(grad, axis = tuple(added_axis), keepdims=False)
        return grad

    def checkOther(self, other):
        if isinstance(other, int | float):
            other = tensor.const(self.shape, other)
        elif not isinstance(other, tensor):
            other = tensor(other)
        #assert other.shape == self.shape, "Operand Tensor sizes dont match"

        return other
    
    def zero_grad(self):
        self.grad = None
        
    def backward(self):
        self.grad = np.ones_like(self.matrix, dtype=float)
        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)

        for current in reversed(topo):

            current._backward()

In [275]:
class Layer:
    def __init__(self, in_features, out_features): #Naming inspired by torch :)
        self.weights = tensor.random((in_features, out_features))
        self.bias = tensor.random((1, out_features))
        self.parameters = [self.weights, self.bias]

    def __call__(self, X):
        X = X if isinstance(X, tensor) else tensor(X)
        act = (X @ self.weights + self.bias)
        return act
    
    def parameters(self):
        return self.parameters
    
    def zero_grad(self):
        for param in self.parameters:
            param.zero_grad()


In [291]:
class FCN:
    def __init__(self, input_features, output_features_list):
        sizes = [input_features] + output_features_list
        self.layers = [Layer(sizes[i], sizes[i+1]) for i in range(len(sizes)-1)]
        self.parameters = [layer.parameters for layer in self.layers]
    def __call__(self, X):
        for layer in self.layers:
            X = layer(X)
            #print("X before ReLU: ", X)
            #X = X.ReLU()
            #print("X after ReLU: ", X)
        return X
    def zero_grad(self):
        for layer in self.layers:
            layer.zero_grad()

    def forward(self, Y):
        for parameter in list(itertools.chain.from_iterable(self.parameters)):
            parameter += -0.1 * parameter.grad
    
    def fit(self, X, Y, epochs):
        for _ in range(epochs):
            self.zero_grad()
            Ypredicted = self(X)
            Loss = ((Ypredicted - Y)**2).sum()
            Loss.backward()
            for layer in self.layers:
                layer.weights += (-0.0001 * layer.weights.grad)
                layer.bias += (-0.0001 * layer.bias.grad)
            #print("-----------------------")
        print("Ypredicted = ", Ypredicted)
        print("Loss = ", Loss)
        return Loss





In [292]:
input_features = 3
output_features_list = [4, 5, 6, 1]

nn = FCN(input_features, output_features_list)


In [294]:
X = np.array([[0.0, 1.0, 2.0],
              [1.0, 2.0, 3.0],
              [2.0, 3.0, 4.0],
              [3.0, 4.0, 5.0]], dtype=float)
Y = np.array([[1.0], [2.0], [3.0], [4.0]], dtype=float)

#4x3 . 3x4 = 4x4
#4x4 . 4x5 = 4x5
#4x5 . 5x6 = 4x6
#4x6 . 6x1 = 4x1

nn.fit(X, Y, 500)



  other.grad += out2
  self.grad += out1 #Derivation in the notes.


Ypredicted =  Tensor Values = [[0.96886792]
 [1.98606517]
 [3.00326243]
 [4.02045968]]
Loss =  Tensor Values = [[0.00159263]]


Tensor Values = [[0.00159263]]