In [40]:
import numpy as np
np.random.seed(42)

In [35]:
import numpy as np
np.random.seed(42)


class Linear():
    def __init__(self, in_dim: int, out_dim: int):
        """
        in_dim : explanatory_variable_dim_
        out_dim : target_variable_dim_
        dw : parameter_gradient_
        db : bias_gradient_
        """
        self.in_dim = in_dim
        self.out_dim = out_dim
        
        self.weight = np.random.randn(out_dim, in_dim)
        self.bias = np.zeros(out_dim, dtype=float)

        self.dx , self.dw, self.db = None, None, None

    def __call__(self, x: np.ndarray) -> np.ndarray:
        """ forward_propagation
        x : input_data_ (batch_size, in_dim)
        output : output_data_ (batch_size, out_dim)
        """
        self.x = x
        # Affine
        output = np.dot(self.x, self.weight.T) + self.bias
        self.param = {'w' : self.weight, 'b' : self.bias}
        return  output

    def backward(self, grad: np.ndarray) -> np.ndarray:
        """ back_propagation
        grad : previous_gradient_ (batch_size, out_dim)
        dx : gradient_ (batch_size, in_dim)
        """
        # transpose x_shape
        if self.x.ndim == 2:
            x_T = self.x.T
        if self.x.ndim == 3:
            x_T = np.transpose(self.x, (0, 2, 1))
        if self.x.ndim == 4:
            x_T = np.transpose(self.x, (0, 1, 3, 2))
        # calculate gradient
        dx = np.dot(grad, self.weight)
        dw = np.dot(x_T, grad)
        db = np.sum(self.bias)
        self.grad_param = {'w' : dw, 'b' : db}
        return dx

In [36]:
batch, channel, indim, outdim = np.arange(2,6)
# create_data
x = np.random.randn(batch, channel, indim)

# forward_propagation
affine = Linear(indim, outdim)
out = affine(x)
print('-------------  Affine_output  ------------\n',out)
print()
# parameters
print('----------------  params  ----------------\n', affine.param)
# demo_grad
grad = np.random.randn(batch, channel, outdim)
#back_propagation
dx = affine.backward(grad)
print('----------------  grad  -------------\n', dx)
print()
# grad_parameters
print('---------------params_grad-----------\n',affine.grad_param)

-------------  Affine_output  ------------
 [[[-0.45902529  2.1733416  -1.18708902 -0.18572742 -0.1903813 ]
  [-1.42784213  0.68021768  0.61286529 -1.5364852  -0.62674877]
  [ 0.67417295 -0.46009647 -0.38013374 -0.63747404 -0.05988715]]

 [[ 1.43017296  0.40914153  1.28807438  5.98025443  0.21959614]
  [ 1.10075507 -1.55293367  0.65859899  0.1006324  -0.16382862]
  [-1.43591716 -3.49412997  2.01396363  0.37840003  1.46482916]]]

----------------  params  ----------------
 {'w': array([[-0.54438272,  0.11092259, -1.15099358,  0.37569802],
       [-0.60063869, -0.29169375, -0.60170661,  1.85227818],
       [-0.01349722, -1.05771093,  0.82254491, -1.22084365],
       [ 0.2088636 , -1.95967012, -1.32818605,  0.19686124],
       [ 0.73846658,  0.17136828, -0.11564828, -0.3011037 ]]), 'b': array([0., 0., 0., 0., 0.])}
----------------  grad  -------------
 [[[ 1.71801043 -1.47953034  0.3122144  -1.22181994]
  [ 1.08062691  1.54857722  2.345836    0.09061054]
  [-0.92925033  1.39308439 -2.064

In [37]:
import torch
import torch.nn as nn

# self_made module
x = np.random.randn(2,3,3,4)
affine = Linear(4,6)
# torch.nn.Linear
xt = torch.tensor(x).float()
linear = nn.Linear(4,6)
# overwrite nn.Linear params with self_made module params
weight = linear.weight.detach().numpy().copy()
affine.weight = weight
bias = linear.bias.detach().numpy().copy()
affine.bias = bias
# output with self_made module
y = affine(x)
# output with nn.Linear
yt = linear(xt).detach().numpy().copy()
print(y[0][0][0][0])
print(yt[0][0][0][0])
np.round(y[0][0], decimals=3) == np.round(yt[0][0], decimals=3)

0.2805776394511505
0.28057763


  from .autonotebook import tqdm as notebook_tqdm


array([[False, False, False, False, False, False],
       [False, False, False, False, False, False],
       [False, False, False, False, False, False]])