In [27]:
import numpy as np
from typing import Optional, Tuple, Union
from collections import defaultdict

class Layer:
    
    __names = defaultdict(int)
    __rng   = np.random.default_rng()
    
    def __init__(self, name: str | None = None):
        name = name or type(self).__name__
        self._name = name + (str(Layer.__names[name]) if name in Layer.__names else '')
        Layer.__names[name] += 1
        
        self._shape = None
        self._weights = {}        
        
    @property
    def name(self) -> str:
        return self._name
    
    @property
    def shape(self) -> tuple[int | None, ...] | None:
        return (None, *self._shape) if self._shape else None
    
    @property
    def weights(self) -> tuple[np.ndarray]:
        return tuple(self._weights.values())
        
    def __call__(self, x: np.ndarray) -> np.ndarray:
        raise NotImplementedError()
        
    def __getattr__(self, name: str):
        value = self._weights.get(name, None)
        if value is not None:
            return value
        
        raise AttributeError()
        
    def add_weight(self, shape: tuple[int, ...], name: str) -> np.ndarray:
        if name in self._weights:
            raise ValueError(f'Weight \'{name}\' already exists')

        #weight = Layer.__rng.normal(scale=np.sqrt(2 / np.sum(shape)), size=shape)
        weight = Layer.__rng.uniform(low=-1, high=1, size=shape)
        self._weights[name] = weight
        return weight
    
    def grad(self, grad_output: np.ndarray) -> tuple[np.ndarray, tuple[np.ndarray, ...]]:
        raise NotImplementedError()
        
    def __repr__(self) -> str:
        result = f'{self.name} ({type(self).__name__})'
        
        if self.shape is not None:
            result += f' Shape: {self.shape}'
            
        weights = self._weights
        if weights is not None:
            result += f' Parameters: [{", ".join(f"{name}: {param.shape}" for name, param in weights.items())}]'
            
        return result

class Dense(Layer):
    """Implements fully-connected layer"""

    def __init__(self, n_in: int, n_out: int, use_bias: bool = True):
        super().__init__()
        """Initializes Dense layer.
        The weights are initialized using uniformly distributed values in range [-1, 1]. Bias vector is not initialized if `use_bias` is False.
        Weigths matrix has the shape (`n_in`, `n_out`), bias vector has the shape (`n_out`, ).
        
        Arguments:
            n_in: Positive integer, dimensionality of input space.
            n_out: Positive integer, dimensionality of output space.
            use_bias: Whether the layer uses a bias vector."""
        
        self._n_in = n_in
        self._n_out = n_out
        self._use_bias = use_bias
        self.add_weight((self._n_in, self._n_out), name='w')
        if self._use_bias:
            self.add_weight((self._n_out,), name='b')

    @property
    def weights(self) -> tuple[np.ndarray, np.ndarray] | tuple[np.ndarray]:
        """Returns weights used by the layer."""
        return tuple(self._weights.values())

    @property
    def input(self) -> np.ndarray:
        """Returns the last input received by the layer"""
        return self.input_x
    
    def __call__(self, x: np.ndarray) -> np.ndarray:
        """Performs the layer forward pass.

        Arguments:
            x: Input array of shape (`batch_size`, `n_in`)

        Returns:
            An array of shape (`batch_size`, `n_out`)"""
        self.input_x = x
        
        self.x = x @ self.w
        if self._use_bias:
            self.x += self.b
        
        return self.x

    def grad(self, gradOutput: np.ndarray) -> tuple[np.ndarray, tuple[np.ndarray, np.ndarray]] | tuple[np.ndarray, tuple[np.ndarray]]:
        """Computes layer gradients

        Arguments:
            gradOutput: Gradient of loss function with respect to the layer output, an array of shape (`batch_size`, `n_out`).

        Returns:
            A tuple object:
                Gradient of loss function with respect to the layer input, an array of shape (`batch_size`, `n_in`)
                Gradient of loss function with respect to the layer's weights:
                    An array of shape (`n_in`, `n_out`).
                    Optional array of shape (`n_out`, )."""
        
        grad_input = gradOutput @ self.w.T  
        grad_w = self.input_x.T @ gradOutput
        
        if self._use_bias:
            grad_b = np.sum(gradOutput, axis=0)
            return grad_input, (grad_w, grad_b)
        return grad_input, (grad_w, )

In [28]:
rng = np.random.default_rng()
a = rng.random(size=(5, 5))

dense1 = Dense(5, 10)
res1 = dense1(a)
grad_out1, (grad_W1, grad_b1) = dense1.grad(res1)

In [29]:
dense1.weights

(array([[-0.31307034, -0.65816368, -0.34761087, -0.5261666 , -0.96249171,
         -0.68752564,  0.4110944 ,  0.82518695,  0.56189961,  0.46163682],
        [ 0.1929603 , -0.34407455, -0.59018401, -0.18447632, -0.2356574 ,
         -0.13370136,  0.40121672, -0.04128237, -0.55498924,  0.52064579],
        [ 0.65838   ,  0.23204704,  0.48863681, -0.98855984, -0.28413412,
          0.21735185, -0.26960781, -0.18722475, -0.09142955, -0.4084435 ],
        [-0.80885521, -0.06120415,  0.38651936, -0.28550535, -0.08657384,
          0.35139411, -0.63480911, -0.03601418,  0.58593524, -0.6502677 ],
        [ 0.09998344,  0.27938554,  0.90134881, -0.30095014, -0.40918808,
         -0.42236619,  0.79318277,  0.34594806,  0.93682432, -0.85972423]]),
 array([ 0.57577025, -0.72633312, -0.03534729, -0.79564648,  0.32756921,
        -0.23802737,  0.29188115,  0.29920995,  0.05604012,  0.63504642]))