In [1]:
import time
from termcolor import colored
import torch

from modules.Dcls2d import Dcls2d
from modules.Dcls2d_old import Dcls2d_old

assert torch.cuda.is_available()
cuda_device = torch.device("cuda")  # device object representing GPU

in_channels = 1
out_channels = 1
kernel_size = (3,3)
dilation = (1,1)
stride = (1,1)
padding = (0,0)
groups = 1
bias = False

m = torch.nn.Conv2d(in_channels=in_channels,
              out_channels=out_channels,
              kernel_size=kernel_size,
              dilation=dilation,
              stride=stride,
              padding=padding,
              groups=groups,
              bias=bias).to(cuda_device)

n = Dcls2d(in_channels=in_channels,
              out_channels=out_channels,
              kernel_size=kernel_size,
              dilation=dilation,
              stride=stride,
              padding=padding,
              groups=groups,
              bias=bias).to(cuda_device)

X = torch.nn.Parameter(
                      torch.tensor([[1., 2., 3., 4., 5.],
                                    [6., 7., 8., 9., 10.], 
                                    [11., 12., 13., 14., 15.],
                                    [16., 17., 18., 19., 20.],                                   
                                    [21., 22., 23., 24., 25.],],device=cuda_device).unsqueeze(0).unsqueeze(0),
                      requires_grad = True) 
n.weight = m.weight = torch.nn.Parameter(
                      torch.tensor([[1., 2., 3.],
                                    [4., 5., 6.], 
                                    [7., 8., 9.], ],device=cuda_device).unsqueeze(0).unsqueeze(0),
                      requires_grad = True)

In [2]:
print(X.size())
print(m.weight.size())
print(n.weight.size())

print(m(X).size())
print(m(X))
print(n(X).size())
print(n(X))

print(n.P1)



torch.Size([1, 1, 5, 5])
torch.Size([1, 1, 3, 3])
torch.Size([1, 1, 3, 3])
torch.Size([1, 1, 3, 3])
tensor([[[[411., 456., 501.],
          [636., 681., 726.],
          [861., 906., 951.]]]], device='cuda:0',
       grad_fn=<CudnnConvolutionBackward>)
torch.Size([1, 1, 3, 3])
tensor([[[[410.7680, 455.7680, 500.7680],
          [635.7680, 680.7680, 725.7680],
          [860.7680, 905.7680, 950.7680]]]], device='cuda:0',
       grad_fn=<SurrogateDilationBackward>)
Parameter containing:
tensor([[[[-1., -1., -1.],
          [ 0.,  0.,  0.],
          [ 1.,  1.,  1.]]]], device='cuda:0', requires_grad=True)


In [3]:
%debug

ERROR:root:No traceback has been produced, nothing to debug.


In [4]:
def elementary_test(test_name = "test_elem", print_tensors = False,
                    random_weights = True , batch = 1,
                    in_channels = 1, out_channels = 1, 
                    stride = (1,1), padding = (0,0), 
                    dilation = (1,1), kernel_size = (3,3), 
                    img_size = (8,7), bias = None, 
                    groups = 1):
    
    print("\n"+ test_name + "\n--------------------------")

    m = torch.nn.Conv2d(in_channels=in_channels,
                  out_channels=out_channels,
                  kernel_size=kernel_size,
                  dilation=dilation,
                  stride=stride,
                  padding=padding,
                  groups=groups,
                  bias=bias).to(cuda_device)

    n = Dcls2d(in_channels=in_channels,
                  out_channels=out_channels,
                  kernel_size=kernel_size,
                  dilation=dilation,
                  stride=stride,
                  padding=padding,
                  groups=groups,
                  bias=bias).to(cuda_device)

    o = Dcls2d_old(in_channels=in_channels,
                  out_channels=out_channels,
                  kernel_size=kernel_size,
                  dilation=dilation,
                  stride=stride,
                  padding=padding,
                  groups=groups,
                  bias=bias).to(cuda_device)

    Y1 = torch.nn.Parameter(torch.randn((batch,in_channels,*img_size),device=cuda_device), requires_grad = True)
    Y2 = torch.nn.Parameter(torch.randn((batch,in_channels,*img_size),device=cuda_device), requires_grad = True)
    Y3 = torch.nn.Parameter(torch.randn((batch,in_channels,*img_size),device=cuda_device), requires_grad = True)    
    Y3.data = Y2.data = Y1.data
    W = torch.nn.Parameter(torch.randn((out_channels,in_channels//groups,*kernel_size),device=cuda_device), requires_grad = True)
    B = torch.nn.Parameter(torch.randn((out_channels),device=cuda_device), requires_grad = True)
    #o.weight = torch.nn.Parameter(torch.randn((out_channels,in_channels//groups,*kernel_size),device=cuda_device), requires_grad = True)
    n.weight = torch.nn.Parameter(torch.randn((out_channels,in_channels//groups,*kernel_size),device=cuda_device), requires_grad = True)
    m.weight = torch.nn.Parameter(torch.randn((out_channels,in_channels//groups,*kernel_size),device=cuda_device), requires_grad = True)
    m.bias = torch.nn.Parameter(torch.randn((out_channels),device=cuda_device))
    n.bias = torch.nn.Parameter(torch.randn((out_channels),device=cuda_device))
    #o.bias = torch.nn.Parameter(torch.randn((out_channels),device=cuda_device))

    
    m.weight.data = W.data
    n.weight.data = W.data
    #o.weight.data = W.data
    
    
    m.bias.data = B.data
    n.bias.data = B.data
    #o.bias.data = B.data
    
    height_out = (img_size[0] + 2 * padding[0] - (dilation[0] * (kernel_size[0] - 1) + 1)) / stride[0] + 1;
    width_out = (img_size[1] + 2 * padding[1] - (dilation[1] * (kernel_size[1] - 1) + 1)) / stride[1] + 1;
    back_truth = torch.randn((batch,out_channels,int(height_out),int(width_out)),device=cuda_device)
    

    print("#Forward check")
    
    if (print_tensors):
        print(m(Y1))
        print(n(Y2))

    print(colored('True', 'green')) if torch.all(torch.abs(m(Y1) - n(Y2))/torch.abs(m(Y1)) < 1e-2) else print(colored('False', 'red'))
    #print(colored('True', 'green')) if torch.all(torch.abs(m(Y1) - o(Y3)) < 1e-2) else print(colored('False', 'red'))
   
    print("#Backward check")

    var1 = (m(Y1) - back_truth).norm()
    var2 = (n(Y2) - back_truth).norm()
    #var3 = (o(Y) - backtruth).norm()
    var1.backward();
    var2.backward();
    #var3.backward();

    if (print_tensors):
        print(m.weight.grad)
        print(n.weight.grad)
        #print(o.weight)

    print(colored('True', 'green')) if torch.all(torch.abs(m.weight.grad - n.weight.grad)/torch.abs(m.weight.grad) < 1e-2) else print(colored('False', 'red'))
    print(colored('True', 'green')) if torch.all(torch.abs(Y1.grad - Y2.grad)/torch.abs(Y1.grad) < 1e-2) else print(colored('False', 'red')    )


In [5]:



elementary_test("test_dil", dilation = (6,5), kernel_size = (3,3), img_size=(32,20))
elementary_test("test_dil", dilation = (5,6), kernel_size = (3,3), img_size=(40,50))
elementary_test("test_dil", dilation = (6,6), kernel_size = (3,3), img_size=(49,56))
elementary_test("test_dil", dilation = (5,5), kernel_size = (3,3), img_size=(120,57))

elementary_test("test_batch", batch = 42)
elementary_test("test_ch_in", in_channels = 64, img_size = (100,100))
elementary_test("test_ch_out", out_channels = 128, print_tensors = False)
elementary_test("test_batch_ch_in_out", batch = 42, in_channels = 128, out_channels = 64)

elementary_test("test_kernel_size", kernel_size = (4,3))
elementary_test("test_img_size", img_size = (233,239))
elementary_test("test_padding", padding = (33,22))
elementary_test("test_stride", stride = (2,3))
elementary_test("test_all", batch = 42, in_channels = 64, out_channels = 128, kernel_size = (4,3), 
               img_size = (233,239), padding = (33,22), stride = (2,3))

elementary_test("test_dilation", dilation = (3,2))
elementary_test("test_all_dilation", batch = 42, in_channels = 64, out_channels = 128, kernel_size = (4,3), 
                img_size = (233,239), padding = (33,22), stride = (2,3), dilation = (3,2))

elementary_test("test_groups", in_channels = 64, out_channels = 64, groups = 64)
elementary_test("test_all_groups", batch = 42, in_channels = 64, out_channels = 128, kernel_size = (4,3), 
                img_size = (233,239), padding = (33,22), stride = (2,3), dilation = (3,2), groups = 64)
#elementary_test("test_bias", batch = 42)


test_dil
--------------------------
#Forward check
[32mTrue[0m
#Backward check
[32mTrue[0m
[32mTrue[0m

test_dil
--------------------------
#Forward check
[32mTrue[0m
#Backward check
[32mTrue[0m
[32mTrue[0m

test_dil
--------------------------
#Forward check
[32mTrue[0m
#Backward check
[32mTrue[0m
[32mTrue[0m

test_dil
--------------------------
#Forward check
[32mTrue[0m
#Backward check
[32mTrue[0m
[32mTrue[0m

test_batch
--------------------------
#Forward check
[32mTrue[0m
#Backward check
[32mTrue[0m
[32mTrue[0m

test_ch_in
--------------------------
#Forward check
[32mTrue[0m
#Backward check
[32mTrue[0m
[31mFalse[0m

test_ch_out
--------------------------
#Forward check
[32mTrue[0m
#Backward check
[32mTrue[0m
[32mTrue[0m

test_batch_ch_in_out
--------------------------
#Forward check
[31mFalse[0m
#Backward check
[31mFalse[0m
[31mFalse[0m

test_kernel_size
--------------------------
#Forward check
[32mTrue[0m
#Backward check
[32mTr

In [6]:
batch = 64
in_channels = 64
out_channels = 64
stride = (1,1)
padding = (0,0)
dilation = (4,3)
kernel_size = (4,3)
img_size = (108,64)
bias = False
groups = 1
    

m = torch.nn.Conv2d(in_channels=in_channels,
              out_channels=out_channels,
              kernel_size=kernel_size,
              dilation=dilation,
              stride=stride,
              padding=padding,
              groups=groups,
              bias=bias).to(cuda_device)

n = Dcls2d(in_channels=in_channels,
              out_channels=out_channels,
              kernel_size=kernel_size,
              dilation=dilation,
              stride=stride,
              padding=padding,
              groups=groups,
              bias=bias).to(cuda_device)

o = Dcls2d_old(in_channels=in_channels,
              out_channels=out_channels,
              kernel_size=kernel_size,
              dilation=dilation,
              stride=stride,
              padding=padding,
              groups=groups,
              bias=bias).to(cuda_device)

Y = torch.nn.Parameter(torch.randn((batch,in_channels,*img_size),device=cuda_device))
o.weight = n.weight = m.weight = torch.nn.Parameter(torch.ones((out_channels,in_channels//groups,*kernel_size),device=cuda_device))

In [7]:
forward = 0
backward = 0
for _ in range(10):
    start = time.time()
    a = m(Y)
    torch.cuda.synchronize()
    forward += time.time() - start

    start = time.time()
    (a.sum()).backward()
    torch.cuda.synchronize()
    backward += time.time() - start

print('Forward: {:.3f} us | Backward {:.3f} us'.format(forward * 1e6/1e1, backward * 1e6/1e1))

Forward: 4671.597 us | Backward 15256.667 us


In [8]:
forward = 0
backward = 0
for _ in range(10):
    start = time.time()
    b = n(Y)
    torch.cuda.synchronize()
    forward += time.time() - start

    start = time.time()
    (b.sum() ).backward()
    torch.cuda.synchronize()
    backward += time.time() - start

print('Forward: {:.3f} us | Backward {:.3f} us'.format(forward * 1e6/1e1, backward * 1e6/1e1))

Forward: 1057759.881 us | Backward 1107277.656 us


In [9]:
forward = 0
backward = 0
for _ in range(10):
    start = time.time()
    c = o(Y)
    torch.cuda.synchronize()
    forward += time.time() - start

    start = time.time()
    (c.sum() ).backward()
    torch.cuda.synchronize()
    backward += time.time() - start

print('Forward: {:.3f} us | Backward {:.3f} us'.format(forward * 1e6/1e1, backward * 1e6/1e1))

Forward: 16189.647 us | Backward 233241.177 us
