In [10]:
import math

from torch import empty , cat , arange
from torch.nn.functional import fold , unfold

kernel = empty((5,5)).normal_()
print(kernel.view(1,-1))
print(kernel)
print(kernel.flip((0,1)))

tensor([[-0.4397,  0.2831,  1.5650, -0.5245, -2.0008,  2.2580, -0.0792, -0.3956,
         -0.0845,  0.1205,  1.9671,  1.1400,  0.6991,  1.2292,  0.2043, -0.0662,
         -0.2540, -1.3839,  0.3893,  0.4615,  0.6654,  0.5382,  0.5478,  1.1590,
         -0.8607]])
tensor([[-0.4397,  0.2831,  1.5650, -0.5245, -2.0008],
        [ 2.2580, -0.0792, -0.3956, -0.0845,  0.1205],
        [ 1.9671,  1.1400,  0.6991,  1.2292,  0.2043],
        [-0.0662, -0.2540, -1.3839,  0.3893,  0.4615],
        [ 0.6654,  0.5382,  0.5478,  1.1590, -0.8607]])
tensor([[-0.8607,  1.1590,  0.5478,  0.5382,  0.6654],
        [ 0.4615,  0.3893, -1.3839, -0.2540, -0.0662],
        [ 0.2043,  1.2292,  0.6991,  1.1400,  1.9671],
        [ 0.1205, -0.0845, -0.3956, -0.0792,  2.2580],
        [-2.0008, -0.5245,  1.5650,  0.2831, -0.4397]])


In [11]:
kernel = empty((3,3)).normal_()
print(kernel)
print(kernel.flip(0,1))

tensor([[ 0.2661, -0.5691, -0.1831],
        [-0.8653, -0.5603, -0.5318],
        [-0.2112,  0.7639, -0.5779]])
tensor([[-0.5779,  0.7639, -0.2112],
        [-0.5318, -0.5603, -0.8653],
        [-0.1831, -0.5691,  0.2661]])


In [85]:
class relu(object) :
    def __init__(self):
        pass
    def forward(self, input) :
        self.input = input
        self.positif_mask = (input > 0)
        return self.positif_mask*(input)
    def backward(self, gradwrtoutput) :
        self.input.grad = self.positif_mask.int()*gradwrtoutput
        return self.input.grad
    def param(self) :
        return []

class sigmoid(object) :
    def forward(self, input) :
        self.input = input
        self.output = 1/(1 + math.e**(-input))
        return  self.output
    def backward(self, gradwrtoutput ) :
        self.input.grad = self.output * (1-self.output) * gradwrtoutput
        return self.input.grad
    def param(self) :
        return []

class convolution(object):
    def __init__(self, in_ch, out_ch, kernel_size = (3,3), padding = 0, stride = 1, use_bias = False):
        self.in_ch = in_ch
        self.out_ch = out_ch
        self.kernel_size = kernel_size
        self.k_1 = self.kernel_size[0]
        self.k_2 = self.kernel_size[1]
        self.use_bias = use_bias
        self.stride = stride
        self.padding = padding
        self.kernel = empty(out_ch, in_ch, self.k_1, self.k_2).normal_()
        self.bias = empty(out_ch).normal_() if use_bias else 0 *empty(out_ch)
        
    def forward(self, x):   
        self.x = x
        self.batch_size = x.size(0)
        X_unf = unfold(x, kernel_size=(self.k_1, self.k_2), padding = self.padding, stride = self.stride)
        K_expand = self.kernel.view(self.out_ch, -1)
        O_expand = K_expand @ X_unf
        s1 = math.ceil((x.size(-2)-self.k_1+1+self.padding*2)/(self.stride))
        s2 = math.ceil((x.size(-1)-self.k_2+1+self.padding*2)/(self.stride))

        O = O_expand.view(self.batch_size, self.out_ch, s1, s2)
        return O + self.bias.view(1, -1, 1, 1) if self.use_bias else O
    
    def backward(self, gradwrtoutput):
        kernel_back = self.kernel.flip(-2, -1).transpose(0,1)
        s1 = self.x.size(-2)
        s2 = self.x.size(-1)
        
        # backward wrt input
        M = self.get_M(s1-self.k_1 + 1 + self.padding*2)
        dL_dO = (M.transpose(0,1) @ gradwrtoutput) @ M

        dL_dO_unf = unfold(dL_dO, kernel_size=(k_1, k_2), padding = (k_1 - 1 - self.padding, k_2-1- self.padding), stride = 1)
        dO_dX_exp = kernel_back.reshape(self.in_ch, -1)
        dL_dX_exp = dO_dX_exp @ dL_dO_unf
        dL_dX = dL_dX_exp.view(self.batch_size, self.in_ch, s1, s2)
        
        self.dL_dO = dL_dO.transpose(0,1) # K
        self.dO_dF = self.x.view(self.in_ch, self.batch_size, s1, s2).transpose(0,1) # X
        
        # backward wrt weights
        dL_dO_unf_F = self.dL_dO.reshape(self.out_ch, -1)
        dO_dF_exp = unfold(self.dO_dF, kernel_size = (s1 - self.k_1 +1 + self.padding*2, s2 - self.k_2 +1 + self.padding*2), padding = self.padding, stride = 1)
        dL_dF_exp = dL_dO_unf_F @ dO_dF_exp
        dL_dF = dL_dF_exp.transpose(0,1).view(self.kernel.size())
        
        # backward wrt bias
        if self.use_bias:
            dL_dO_exp = self.dL_dO.reshape(self.out_ch, -1)
            dO_dB_exp = torch.ones(self.batch_size * (s1 - self.k_1 +1 + self.padding*2) * (s2 - self.k_2 +1 + self.padding*2))
            dL_dB = dL_dO_exp @ dO_dB_exp
        else:
            dL_dB = None
        return dL_dX, dL_dF, dL_dB
    
    def get_M(self, N):
        diag = empty(N)
        eye_N = (diag == diag).float().diag()
        return eye_N[range(0,N,self.stride)]
        
    def param(self) :
        return [self.kernel, self.bias]
    
class mse(object):
    def forward(self, input, target):
        self.input = input
        self.target = target
        return (input - target).pow(2).mean()
    def backward(self, gradwrtoutput):
        self.input.grad = 2*(self.input-self.target)/(self.input.size(-3)*self.input.size(-2)*self.input.size(-1))

In [86]:
input = empty((1,1,5,5)).normal_()
test = relu()
print(test.forward(input))
print(test.backward(input))

tensor([[[[0.6332, 1.3556, 0.9980, 1.1980, 0.0522],
          [-0.0000, 0.8747, -0.0000, 1.3845, -0.0000],
          [0.7899, -0.0000, 0.0772, 0.7219, -0.0000],
          [0.2408, 0.6669, -0.0000, -0.0000, -0.0000],
          [2.5453, -0.0000, 1.1522, 0.0190, 1.1587]]]])
tensor([[[[0.6332, 1.3556, 0.9980, 1.1980, 0.0522],
          [-0.0000, 0.8747, -0.0000, 1.3845, -0.0000],
          [0.7899, -0.0000, 0.0772, 0.7219, -0.0000],
          [0.2408, 0.6669, -0.0000, -0.0000, -0.0000],
          [2.5453, -0.0000, 1.1522, 0.0190, 1.1587]]]])


In [83]:
test = sigmoid()
print(test.forward(input))
print(test.backward(input))

tensor([[[[0.5825, 0.4077, 0.9503, 0.4043, 0.5982],
          [0.5541, 0.2446, 0.5082, 0.6013, 0.5563],
          [0.5330, 0.8787, 0.3567, 0.6566, 0.7053],
          [0.6171, 0.7591, 0.4651, 0.6924, 0.7866],
          [0.8103, 0.4861, 0.5083, 0.4181, 0.6746]]]])
tensor([[[[ 0.0810, -0.0902,  0.1393, -0.0933,  0.0956],
          [ 0.0537, -0.2084,  0.0082,  0.0985,  0.0558],
          [ 0.0329,  0.2111, -0.1353,  0.1461,  0.1814],
          [ 0.1127,  0.2099, -0.0348,  0.1728,  0.2190],
          [ 0.2232, -0.0138,  0.0083, -0.0804,  0.1601]]]])


In [84]:
test = convolution(1,3)
print(test.forward(input))
print(test.backward(input))

tensor([[[[ 6.1688,  0.8555,  2.7351],
          [ 7.6526,  0.7513, -0.9575],
          [ 5.2676,  3.7301, -1.1703]],

         [[ 0.8378, -2.5154,  1.2878],
          [ 4.1992, -3.3236,  0.8735],
          [ 0.7682,  0.3650,  1.6813]],

         [[ 0.1420,  7.7649,  9.1296],
          [ 1.4210,  3.6245,  3.5923],
          [ 7.8587,  4.5758,  1.2232]]]])
tensor([[1, 0, 0],
        [0, 1, 0],
        [0, 0, 1]], dtype=torch.int32)


RuntimeError: mat1 and mat2 shapes cannot be multiplied (5x5 and 3x3)

In [58]:
test = empty(5).normal_().diag()

In [59]:
print(test)

tensor([[-0.0959,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  1.1059,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000, -1.5449,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000, -0.5452,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000, -0.4348]])
