In [158]:
class Module(object):
    
     def forward(self,*input):
        raise NotImplementedError
    
     def backward(self,*gradwrtoutput):
        raise NotImplementedError
    
     def param(self):
        return []

In [159]:
class Sequential(Module):
    
    def __init__(self, modules):
        self.modules = modules
        self.input = None
    
    def forward(self, input):
        self.input = input
        output = self.input
        
        for module in self.modules:
            output = module.forward(output)
        return output
    
    def backward(self,*gradwrtouput):
        gradient = gradwrtouput
        for module in reversed(self.modules):
            gradient = module.backward(gradient)
        self.input = None
        return gradient
    
    def param(self):
        params = []
        for module in self.modules:
            params.append(module.param())
        return params
            

In [160]:
class Optimizer(object):
    def step(self):
        return NotImplementedError
    
    def zero_grad(self):
        return NotImplementedError

In [223]:
class SGD(Optimizer):
    
    def __init__(self,params,lr, mu = 0, tau = 0):
        self.params = params
        self.lr = lr
        
        # parameters in order to add momemtum 
        self.momemtum = mu
        self.dampening = tau
        self.state_momemtum = None
        
    def step(self):
        for x, grad in self.params:
            x.add_(-self.lr * grad) 
    
    def zero_grad(self):
        for x, grad in self.params:
            grad = grad.zero_()  

In [201]:
class MSE(Module):
    
    def forward(self,input,target):
        self.input = input
        self.target = target
        return (self.input - self.target).pow(2).mean() 
    
    def backward(self):
        return 2*(self.input - self.target).div(torch.tensor(self.input.size(0)))
        ## we divide by the batch size as in Pytorch

In [285]:
#Check MSE loss function
mse = MSE()
input = torch.normal(mean=torch.zeros(100,3,10),std = 1)
target = torch.normal(mean=torch.zeros(100,3,10),std = 1)
loss = mse.forward(input,target)
print(loss)
mse_ = torch.nn.MSELoss()
loss_ = mse_.forward(input,target)
print(loss_)

print(mse.backward().size())

tensor(1.9524)
tensor(1.9524)
torch.Size([100, 3, 10])


In [209]:
class Sigmoid(Module):
    
    def forward(self,input):
        self.input = input
        self.sigmoid = 1./(1+(-self.input).exp())
        return  self.sigmoid
    
    def backward(self,*gradwrtouput):
        return gradwrtouput*self.sigmoid*(1-self.sigmoid)
    

In [213]:
input = torch.normal(mean=torch.zeros(100,3,38,38),std = 1)

sig = Sigmoid()
sig_ = torch.nn.Sigmoid()

print(torch.allclose(sig.forward(input),sig_.forward(input)))

True


In [217]:
class ReLU(Module):
    def forward(self, input):
        self.input = input
        return (self.input>0.)*self.input
    
    def backward(self, *gradwrtouput):
        return gradwrtouput*(self.input>=0.)

In [284]:
input = torch.normal(mean=torch.zeros(100,3,380,380),std = 1)

re = ReLU()
re_ = torch.nn.ReLU()

print(torch.allclose(sig.forward(input),sig_.forward(input)))

True


In [2]:
import torch 

In [144]:
def nearest_upsampling(input, scale_factor):
    if isinstance(scale_factor,int):
        scale1 ,scale2 = scale_factor,scale_factor
    if isinstance(scale_factor,tuple):
        scale1 ,scale2 = scale_factor[0],scale_factor[1]
    
    N, C, H, W = tuple(input.size())
    output = torch.empty(N,C,scale1*H,scale2*W)
    
    for i in range(N):
        output[i] = torch.nn.functional.fold(input[i].view(C,1,-1).repeat(1,scale1*scale2,1), 
                                             output_size=(H*scale1,W*scale2), kernel_size = (scale1,scale2), 
                                             stride = (scale1,scale2) ).view(C,H*scale1,W*scale2)
    return output

In [232]:
class NearestUpsampling(Module):
        
    def __init__(self, scale_factor: None ):
        self.scale_factor = scale_factor
        
    
    def forward(self, input):
        self.input = input
        
        if (len(input.size()) == 4) :
            return nearest_upsampling(self.input, self.scale_factor)

        
    
    def backward(self,*gradwrtouput):
        return 3
    
    

In [283]:
input = torch.normal(mean = torch.zeros((100,3,280, 280)),std = 1)

m = torch.nn.Upsample(scale_factor=2, mode='nearest')
mm = m(input)

m_ = NearestUpsampling(2)
mm_ = m_.forward(input)

print(torch.allclose(mm,mm_))


True


In [963]:
from torch.nn.functional import fold, unfold
from torch import Tensor, empty, einsum


def conv2d_(input: Tensor, weight: Tensor, bias=None, stride=1, padding=0, dilation=1, groups=1) -> Tensor:
    # input is 4d tensor
    
    if isinstance(stride, int):
        stride = (stride, stride)
    if isinstance(padding, int):
        padding = (padding, padding)
    if isinstance(dilation, int):
        dilation = (dilation, dilation)
    
    N = input.size(0)
    H_in = input.size(-2)
    W_in = input.size(-1)
    
    kernel_size = (weight.size(-2), weight.size(-1))
    C_out = weight.size(0)
    H_out = int((H_in + 2 * padding[0] - dilation[0] * (kernel_size[0] - 1) - 1) / stride[0] + 1)
    W_out = int((W_in + 2 * padding[1] - dilation[1] * (kernel_size[1] - 1) - 1) / stride[1] + 1)
        
    unfolded = unfold(input, kernel_size=kernel_size, dilation=dilation, stride=stride, padding=padding)
    
    #wxb  = empty(N, C_out, unfolded.size(2))
    #for ind, unfdd in enumerate(unfolded):
    #    wxb[ind] =  weight.view(C_out, -1) @ unfdd + bias.view(-1,1)
    
    #wxb = einsum('nij,njk->nik', weight.view(1, C_out, -1).repeat(N, 1, 1), unfolded) + bias.view(1, -1, 1).repeat(N,1,1)
    
    wxb = weight.view(1, C_out, -1).repeat(N, 1, 1).matmul(unfolded) + bias.view(1, -1, 1).repeat(N,1,1)
    
    return wxb.view(N, C_out, H_out, W_out)

#FALSE METHOD
def grad_conv2d_weight(self,gradwrtouput):

        N = self.input.shape[0]

        grad_ = gradwrtouput.contiguous().repeat(1, self.in_channel // self.groups, 1,
                                                      1)
        grad_ = grad_.contiguous().view(
            grad_.shape[0] * grad_.shape[1], 1, grad_.shape[2],
            grad_.shape[3])

        input = self.input.contiguous().view(1, self.input.shape[0] * self.input.shape[1],
                                        self.input.shape[2], self.input.shape[3])

        grad_weight = self.conv2d(input, grad_, None, self.dilation, self.padding,
                                   self.stride, self.in_channel * N)

        grad_weight = grad_weight.contiguous().view(
            min_batch, grad_weight.shape[1] // min_batch, grad_weight.shape[2],
            grad_weight.shape[3])

        return grad_weight.sum(dim=0).view(
            self.in_channel // self.groups, self.out_channel,
            grad_weight.shape[2], grad_weight.shape[3]).transpose(0, 1).narrow(
                2, 0, self.kernel[2]).narrow(3, 0, self.kernel[3])
    

    
        @staticmethod
    def conv2d(input: Tensor, weight: Tensor, bias=None, stride=1, padding=0, dilation=1, groups=1) -> Tensor:
        # input is 4d tensor

        if isinstance(stride, int):
            stride = (stride, stride)
        if isinstance(padding, int):
            padding = (padding, padding)
        if isinstance(dilation, int):
            dilation = (dilation, dilation)

        N = input.size(0)
        H_in = input.size(-2)
        W_in = input.size(-1)

        kernel_size = (weight.size(-2), weight.size(-1))
        C_out = weight.size(0)
        H_out = int((H_in + 2 * padding[0] - dilation[0] * (kernel_size[0] - 1) - 1) / stride[0] + 1)
        W_out = int((W_in + 2 * padding[1] - dilation[1] * (kernel_size[1] - 1) - 1) / stride[1] + 1)

        unfolded = unfold(input, kernel_size=kernel_size, dilation=dilation, stride=stride, padding=padding)
        
        print(unfolded.size(),weight.view(1, C_out, -1).repeat(N, 1, 1).size())
        
        if bias != None:
            wxb = weight.view(1, C_out, -1).repeat(N, 1, 1).matmul(unfolded.repeat_interleave(groups,1)) #+ ...
            #bias.view(1, -1, 1).repeat(N,1,1)
        else: 
            wxb = weight.view(1, C_out, -1).repeat(N, 1, 1).matmul(unfolded)
        #print(wxb.size())
        return wxb.view(N, C_out, H_out, W_out)



    def conv2d(input: Tensor, weight: Tensor, bias=None, stride=1, padding=0, dilation=1, groups=1) -> Tensor:
        # input is 4d tensor
        

        if isinstance(stride, int):
            stride = (stride, stride)
        if isinstance(padding, int):
            padding = (padding, padding)
        if isinstance(dilation, int):
            dilation = (dilation, dilation)

        N = input.size(0)
        C_in = input.size(1)
        H_in = input.size(-2)
        W_in = input.size(-1)
        
        
        kernel_size = (weight.size(-2), weight.size(-1))
        C_out = weight.size(0)
        H_out = int((H_in + 2 * padding[0] - dilation[0] * (kernel_size[0] - 1) - 1) / stride[0] + 1)
        W_out = int((W_in + 2 * padding[1] - dilation[1] * (kernel_size[1] - 1) - 1) / stride[1] + 1)
        
        #weight = weight.repeat_interleave(groups,1)
        
        inp_unf = unfold(input,kernel,stride,padding,dilation)
        print("weight", weight.view(C_out, -1).t().size())
        print("inp_", inp_unf.transpose(1, 2).size())
        if bias != None:
            out_unf = inp_unf.transpose(1, 2).matmul(weight.view(C_out, -1).t()).transpose(1, 2).add(bias.view(1, -1, 1).repeat(N,1,H_out*W_out))
        else:
            out_unf = inp_unf.transpose(1, 2).matmul(weight.view(C_out, -1).t()).transpose(1, 2)
            
        out = fold(out_unf, (H_out,W_out), (1,1), dilation, padding, stride)
        
        return out



IndentationError: unindent does not match any outer indentation level (<tokenize>, line 65)

In [1185]:
from torch.nn.functional import fold, unfold
from torch import Tensor


class Conv2d(Module):
    
    def __init__(self,in_channel, out_channel, kernel_size = (2,2),stride=1, padding=0, dilation=1, groups=1,weight = None, bias = None):
        if isinstance(stride, int):
            self.stride = (stride, stride)
        else:
            self.stride = stride
        if isinstance(padding, int):
            self.padding = (padding, padding)
        else: 
            self.padding = padding
        if isinstance(dilation, int):
            self.dilation = (dilation, dilation)
        else:
            self.dilation = dilation
        if isinstance(kernel_size, int):
            self.kernel = (kernel_size, kernel_size)
        else:
            self.kernel = kernel_size
        
        self.groups = groups
        self.in_channel = in_channel
        self.out_channel = out_channel
        
        k = self.groups/(self.in_channel*self.kernel.prod())
        if weight == None:
            self.weight = torch.empty((self.out_channel,self.in_channel// self.groups,
                                       self.kernel[0],self.kernel[1])).uniform_(-k,k)
        else:
            self.weight = weight
        if bias == None:
            self.bias = torch.empty(self.out_channel).uniform_(-k,k)
        else:
            self.bias = bias
        
        self.weight_grad = torch.empty((self.kernel[0],self.kernel[1]))
        self.bias_grad = torch.empty(self.out_channel)
        
    @staticmethod
    def conv2d(input: Tensor, weight: Tensor, bias=None, stride=1, padding=0, dilation=1, groups=1) -> Tensor:
        # input is 4d tensor
        

        if isinstance(stride, int):
            stride = (stride, stride)
        if isinstance(padding, int):
            padding = (padding, padding)
        if isinstance(dilation, int):
            dilation = (dilation, dilation)

        N = input.size(0)
        C_in = input.size(1)
        H_in = input.size(-2)
        W_in = input.size(-1)
        
        
        kernel_size = (weight.size(-2), weight.size(-1))
        C_out = weight.size(0)
        H_out = int((H_in + 2 * padding[0] - dilation[0] * (kernel_size[0] - 1) - 1) / stride[0] + 1)
        W_out = int((W_in + 2 * padding[1] - dilation[1] * (kernel_size[1] - 1) - 1) / stride[1] + 1)
        
        inp_unf = unfold(input,kernel_size,dilation,padding,stride)
        if bias != None:
            out_unf = inp_unf.transpose(1, 2).matmul(weight.view(C_out, -1).t()).transpose(1, 2).add(bias.view(1, -1, 1).repeat(N,1,H_out*W_out))
        else:
            out_unf = inp_unf.transpose(1, 2).matmul(weight.view(C_out, -1).t()).transpose(1, 2)
        return out_unf.view(N,C_out,H_out,W_out)


    def forward(self, input):
        self.input = input
        
        if (len(input.size()) == 4) :
            return self.conv2d(self.input,weight = self.weight, bias = self.bias,stride =self.stride,
                                  padding = self.padding, dilation = self.dilation,groups = self.groups)
        
    
    def grad_conv2d_weight(self,input, weight_size, grad_output, stride=1, padding=0, dilation=1, groups=1):

        if isinstance(stride, int):
            stride = (stride, stride)
        if isinstance(padding, int):
            padding = (padding, padding)
        if isinstance(dilation, int):
            dilation = (dilation, dilation)

        N = input.size(0)
        C_in = input.size(1)
        H_in = input.size(-2)
        W_in = input.size(-1)
        
        kernel_size = (weight_size[2], weight_size[3])
        C_out = weight_size[0]
        H_out = int((H_in + 2 * padding[0] - dilation[0] * (kernel_size[0] - 1) - 1) / stride[0] + 1)
        W_out = int((W_in + 2 * padding[1] - dilation[1] * (kernel_size[1] - 1) - 1) / stride[1] + 1)

        grad_w = torch.empty(weight_size)

        for i in range(grad_w.size(0)): 
            for j in range(input.size(1)): 
                grad_w[i,j,:,:] = self.conv2d(self.input[:,j,:,:].view(N,1,H_in,W_in),
                                              weight = grad_output[i,:,:].view(1,1,H_out,W_out), 
                                              bias = None,stride =self.dilation,padding = self.padding, 
                                              dilation = self.stride,groups = self.groups).narrow(2, 0, 
                                              weight_size[-2]).narrow(3, 0, weight_size[-1]).sum(dim=0)
        
        return grad_w
        
    def backward(self,gradwrtouput):
        
        a = self.grad_conv2d_weight(self.input, self.weight.shape, gradwrtouput, self.stride, self.padding, self.dilation, self.groups)
        # the true bacward return the gradient with respect to the input
        return a
    

ModuleNotFoundError: No module named 'torch.empty'

In [1179]:
with torch.no_grad():
    input = torch.normal(mean = torch.zeros(1,4,8,8),std = 1)

    kernel = torch.tensor([4,4])
    stride = 1
    padding = 0
    dilation= 1
    groups = 1

    conv = torch.nn.Conv2d(4, 2, kernel, stride=stride, padding=padding, dilation=dilation, groups=groups)
    weight = conv.state_dict()["weight"]
    bias = conv.state_dict()["bias"]

    conv_ = Conv2d(4,2,kernel,stride, padding, dilation, groups,weight, bias)

    m = conv.forward(input) 
    print("size",m.size())
    m_ = conv_.forward(input)
    #print("qwertz")
    #print(m_.size(),weight.size())
    #m = torch.conv2d(input, weight, None, stride, padding,dilation, groups)
    #print(torch.allclose(m,m_))
    #print(torch.norm(m-m_))


grad_output = torch.normal(mean=torch.zeros(m.size(1),m.size(-2),m.size(-1)),std= 1)
with torch.no_grad():
    grad = conv_.backward(grad_output)
print(grad.size())

input.requires_grad_(True)
weight.requires_grad_(True)
    
#output = torch.nn.functional.conv2d(input, weight)
#grad_weight = torch.autograd.grad(output, filter, grad_output)
print(input.size(), weight.shape, grad_output.size())
torch.nn.functional.grad.conv2d_weight(input, weight.shape, grad_output,stride = stride,dilation=dilation)

size torch.Size([1, 2, 5, 5])
torch.Size([2, 4, 4, 4])
torch.Size([1, 4, 8, 8]) torch.Size([2, 4, 4, 4]) torch.Size([2, 5, 5])


RuntimeError: shape '[4, 5, 4, 4]' is invalid for input of size 128

In [1183]:
input = torch.randn(1,2,8,8, requires_grad=True)
weight = torch.randn(2,2,4,4, requires_grad=True)
output = torch.nn.functional.conv2d(input, weight)
grad_output = torch.randn(output.shape)
grad_input = torch.autograd.grad(output, input, grad_output)
torch.nn.functional.grad.conv2d_input(input.shape, weight, grad_output)

tensor([[[[ -0.8621,  -1.3539,  -1.4050,  -2.2326,  -1.5887,  -2.6613,  -0.9174,
            -2.4680],
          [  0.8768,   1.2882,  -3.1495,  -0.7159,  -2.7948,  -0.0425,  -6.4332,
            -1.3337],
          [  2.1938,   8.8172,   1.2869,   0.8178,  -2.6069,   3.5926,   1.4423,
             3.4306],
          [ -0.2965,   0.4006, -10.5096,  -2.9640,   0.5642,  -0.2168,   2.8331,
            -2.2451],
          [ -0.1506,   5.5349,   2.1488,   3.0071,   1.0212,  -4.3455,   1.9899,
            -4.4598],
          [ -1.7478,  -3.0436,  -3.1800,   4.5011,  -1.3357,  -0.3814,   5.8009,
             1.9209],
          [  0.2352,   1.3158,  -0.2542,  -1.8943,  -4.4092,   3.0154,   0.9633,
             2.4462],
          [ -0.3679,  -0.7948,   0.4415,   4.7018,   1.7258,  -1.8242,  -0.9733,
             0.3006]],

         [[ -1.7960,  -3.2907,   0.3881,  -1.8639,   2.9707,   1.9623,   1.9869,
            -0.7144],
          [  2.5084,   4.0274,   2.4384,  -1.5744,  -0.4300,  -2.5633, 

In [645]:
weight.repeat_interleave(2,1)[0]

tensor([[[-0.1064,  0.1317],
         [ 0.1659, -0.0073],
         [ 0.0467, -0.0940]],

        [[-0.1064,  0.1317],
         [ 0.1659, -0.0073],
         [ 0.0467, -0.0940]],

        [[-0.1078,  0.0370],
         [-0.0660, -0.0301],
         [ 0.0667,  0.0965]],

        [[-0.1078,  0.0370],
         [-0.0660, -0.0301],
         [ 0.0667,  0.0965]],

        [[ 0.0626,  0.0655],
         [ 0.2033,  0.0281],
         [ 0.1692, -0.0333]],

        [[ 0.0626,  0.0655],
         [ 0.2033,  0.0281],
         [ 0.1692, -0.0333]],

        [[-0.0681, -0.1378],
         [ 0.0170, -0.0392],
         [-0.1973,  0.0774]],

        [[-0.0681, -0.1378],
         [ 0.0170, -0.0392],
         [-0.1973,  0.0774]]])

In [642]:
weight[0]

tensor([[[-0.1064,  0.1317],
         [ 0.1659, -0.0073],
         [ 0.0467, -0.0940]],

        [[-0.1078,  0.0370],
         [-0.0660, -0.0301],
         [ 0.0667,  0.0965]],

        [[ 0.0626,  0.0655],
         [ 0.2033,  0.0281],
         [ 0.1692, -0.0333]],

        [[-0.0681, -0.1378],
         [ 0.0170, -0.0392],
         [-0.1973,  0.0774]]])

In [721]:
input = torch.normal(mean = torch.zeros(30,4,8,8),std = 1)

unfolded = unfold(input,kernel,stride,padding,dilation)
print(unfolded.size())

print(weight.size())
w2 = weight.view(weight.size(0),-1)



torch.Size([30, 16, 49])
torch.Size([2, 4, 2, 2])


tensor([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]])

In [827]:
input = torch.normal(mean = torch.zeros(30,4,8,8),std = 1)

kernel = torch.tensor([3,2])
stride = 1
padding = 0
dilation= 1
groups = 1

conv = torch.nn.Conv2d(4, 2, kernel, stride=stride, padding=padding, dilation=dilation, groups=groups)
weight = conv.state_dict()["weight"]
bias = conv.state_dict()["bias"]

m = conv.forward(input) 

inp_unf = torch.nn.functional.unfold(input,kernel,stride,padding,dilation)
out_unf = inp_unf.transpose(1, 2).matmul(weight.view(weight.size(0), -1).t()).transpose(1, 2).add(bias.view(1, -1, 1).repeat(30,1,inp_unf.size(2)))
out = torch.nn.functional.fold(out_unf, (6,7), (1, 1),dilation,padding,stride)
print(out_unf.size())
(m - out).norm()


torch.Size([30, 2, 42])


tensor(3.6492e-06, grad_fn=<CopyBackwards>)

In [894]:
def conv2d(input: Tensor, weight: Tensor, bias=None, stride=1, padding=0, dilation=1, groups=1) -> Tensor:
        # input is 4d tensor
        

        if isinstance(stride, int):
            stride = (stride, stride)
        if isinstance(padding, int):
            padding = (padding, padding)
        if isinstance(dilation, int):
            dilation = (dilation, dilation)

        N = input.size(0)
        C_in = input.size(1)
        H_in = input.size(-2)
        W_in = input.size(-1)
        
        kernel_size = (weight.size(-2), weight.size(-1))
        C_out = weight.size(0)
        H_out = int((H_in + 2 * padding[0] - dilation[0] * (kernel_size[0] - 1) - 1) / stride[0] + 1)
        W_out = int((W_in + 2 * padding[1] - dilation[1] * (kernel_size[1] - 1) - 1) / stride[1] + 1)
        
        weight = weight.repeat_interleave(groups,1)## pas aussi simple on ne somme plus sur toutes les in_channels maintenant
        
        inp_unf = unfold(input,kernel,stride,padding,dilation)#.view(N,H_out*W_out,-1,C_in//groups)
        print("weight", weight.view(C_out,-1, C_in//groups).size())
        print("inp_", inp_unf.size())
        print("input", input.size(),"\n")
        if bias != None:
            
            out_unf = inp_unf.transpose(1,2).matmul(weight.view(C_out,-1).t()).transpose(1,2).add(bias.view(1, -1, 1).repeat(N,1,H_out*W_out))
        else:
            out_unf = inp_unf.transpose(1, 2).matmul(weight.view(C_out, -1).t()).transpose(1, 2)
            
        out = torch.nn.functional.fold(out_unf, (H_out,W_out), (1,1), dilation, padding, stride)
        
        return out
    
#out_unf = inp_unf.transpose(1, 2).matmul(weight.view(C_out, -1).t()).transpose(1, 2).add(bias.view(1, -1, 1).repeat(N,1,H_out*W_out))
       

In [989]:
input = torch.normal(mean = torch.zeros(31,8,8,8),std = 1)

kernel = torch.tensor([3,5])
stride = 1
padding = 0
dilation= 1
groups = 2

conv = torch.nn.Conv2d(8, 4, kernel, stride=stride, padding=padding, dilation=dilation, groups=groups)
weight = conv.state_dict()["weight"]
bias = conv.state_dict()["bias"]
m = conv.forward(input)
print(input.size(), weight.size(), None, dilation, padding,stride, groups)

#m_ = conv2d(input, weight, bias, stride, padding, dilation, groups)
m = torch.conv2d(input, weight, None, stride, padding,dilation, groups)
#print(torch.allclose(m,m_))
#print(torch.norm(m-m_))

torch.Size([31, 8, 8, 8]) torch.Size([4, 4, 3, 5]) None 1 0 1 2


In [1159]:
input = torch.normal(mean = torch.zeros(30,4,8,8),std = 1)

kernel = torch.tensor([2,4])
stride = 1
padding = 0
dilation= 2
groups = 1

conv = torch.nn.Conv2d(4, 2, kernel, stride=stride, padding=padding, dilation=dilation, groups=groups)
weight = conv.state_dict()["weight"]
bias = conv.state_dict()["bias"]
conv_ = Conv2d(4,2,kernel,stride, padding, dilation, groups,weight, bias)

m = conv.forward(input) 
m_ = conv_.forward(input)

dout = torch.normal(mean=torch.zeros(m.size(1),m.size(-2),m.size(-1)),std= 1)
conv_param = {"pad": padding, "stride": stride}
cache = (input.numpy(), weight.numpy(),bias.numpy(), conv_param)

back_w = conv_.backward(dout)
back_w_naive = conv_backward_naive(dout.view(1,dout.size(0),dout.size(1),dout.size(2)).repeat(30,1,1,1).numpy(),cache)

ho=0 wo=0
x_seg=[[[ 0.18548936 -2.0822942   1.4365394   0.16685851]
  [-0.5064098   0.56524706  1.0083085  -1.9693106 ]]

 [[ 0.05567319 -1.4576814   0.9803935   1.5677462 ]
  [-1.2257185  -1.2196308  -1.5194861  -1.0492699 ]]

 [[ 0.07147467 -0.3886078  -0.2822727  -0.75422925]
  [ 1.085428   -0.48508528  0.71002907  0.37662846]]

 [[-1.8497467   1.2027289   0.96429193 -1.7270403 ]
  [ 0.5244083  -0.04580086 -0.7821546   0.0031531 ]]]
dout=0.24331188201904297

ho=0 wo=1
x_seg=[[[-2.0822942   1.4365394   0.16685851 -0.5660924 ]
  [ 0.56524706  1.0083085  -1.9693106   0.06564075]]

 [[-1.4576814   0.9803935   1.5677462  -1.045282  ]
  [-1.2196308  -1.5194861  -1.0492699   1.0463142 ]]

 [[-0.3886078  -0.2822727  -0.75422925 -0.6288503 ]
  [-0.48508528  0.71002907  0.37662846  1.9988246 ]]

 [[ 1.2027289   0.96429193 -1.7270403  -0.7148113 ]
  [-0.04580086 -0.7821546   0.0031531   0.16711627]]]
dout=-0.39551058411598206

ho=0 wo=2
x_seg=[[[ 1.4365394   0.16685851 -0.5660924  -1.6239557 ]

IndexError: index 2 is out of bounds for axis 3 with size 2

In [1158]:
import numpy as np

def conv_backward_naive(dout, cache):
    """A naive implementation of the backward pass for a convolutional layer.
    Inputs:
    - dout: Upstream derivatives.
    - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive
    Returns a tuple of:
    - dx: Gradient with respect to x
    - dw: Gradient with respect to w
    - db: Gradient with respect to b
    """
    dx, dw, db = None, None, None
    ###########################################################################
    # TODO: Implement the convolutional backward pass.                        #
    ###########################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    # unpack the cache
    x, w, b, conv_param = cache
    N, C, H, W = x.shape
    F, C, HH, WW = w.shape
    s, pad = conv_param['stride'], conv_param['pad']
    H_out = int(1 + (H + 2 * pad - HH) / s)
    W_out = int(1 + (W + 2 * pad - WW) / s)

    # pad x with 0s for H, W axis only
    # (0, 0) means NO padding, (1, 1) - padding from both sides
    x_pad = np.pad(x, ((0, 0), (0, 0), (pad, pad), (pad, pad)))
    
    # initialize gradients
    dx = np.zeros_like(x)
    dx_pad = np.zeros_like(x_pad)
    dw = np.zeros_like(w)
    db = np.zeros_like(b)

    # iterate over examples in x
    for n in range(N):
        # iterate over filters in w
        for f in range(F):
            # iterate over x_pad with dout as a filter
            for ho in range(H_out):
                for wo in range(W_out):
                    print(f'ho={ho} wo={wo}')
                    # compute np.sum(x_seg * dout)
                    x_seg = x_pad[n, :, ho*s:ho*s+HH, wo*s:wo*s+WW]
                    print(f'x_seg={x_seg}')
                    dw[f] += x_seg * dout[n, f, ho, wo] 
                    print(f'dout={dout[n, f, ho, wo]}\n')

    # remove padding
    # dx[:, :, :, :] = dx_pad[:, :, pad:-pad, pad:-pad]

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ###########################################################################
    #                             END OF YOUR CODE                            #
    ###########################################################################
    return dx, dw, db

