In [1]:
import time
from termcolor import colored
import torch
import torch.autograd.profiler as profiler

from modules.Swc2d import Swc2d
from modules.Dcls2dFull import Dcls2dFull



assert torch.cuda.is_available()
cuda_device = torch.device("cuda")  # device object representing GPU

In [2]:
in_channels = 1
out_channels = 1
kernel_size = (2,2)
dilation = (2,2)
stride = (1,1)
padding = (0,0)
groups = 1
bias = False

m = torch.nn.Conv2d(in_channels=in_channels,
              out_channels=out_channels,
              kernel_size=kernel_size,
              dilation=dilation,
              stride=stride,
              padding=padding,
              groups=groups,
              bias=bias).to(cuda_device)

n = Swc2d(in_channels=in_channels,
              out_channels=out_channels,
              kernel_size=kernel_size,
              dilation=dilation,
              stride=stride,
              padding=padding,
              groups=groups,
              bias=bias).to(cuda_device)



X1 = torch.nn.Parameter(
                      torch.tensor([[[[1., 2., 3., 4.],
                                    [5., 6., 7., 8.], 
                                    [9., 10., 11., 12.],
                                    [13., 14., 15., 16.]]]],device=cuda_device),
                      requires_grad = True) 
X2 = torch.nn.Parameter(
                      torch.tensor([[[[1., 2., 3., 4.],
                                    [5., 6., 7., 8.], 
                                    [9., 10., 11., 12.],
                                    [13., 14., 15., 16.]]]],device=cuda_device),
                      requires_grad = True) 

m.weight = torch.nn.Parameter(
                      torch.tensor([[[[20., 40.],
                                    [60., 80.]]]],device=cuda_device),
                      requires_grad = True)
n.weight = torch.nn.Parameter(
                      torch.tensor([[[[20., 40.],
                                    [60., 80.]]]],device=cuda_device),
                      requires_grad = True)

back_truth = torch.nn.Parameter(
                      torch.tensor([[[[1., 2.],
                                    [4., 5.]]]],device=cuda_device),
                      requires_grad = True)

with torch.autograd.profiler.profile(use_cuda=True, profile_memory=True) as prof:
    var2 = (n(X2) - back_truth).norm()
var1 = (m(X1) - back_truth).norm()

var1.backward();
var2.backward();

In [3]:
print(X1.size())
print(m.weight.size())
print(n.weight.size())

print(m(X1).size())
print(m(X1))
print(n(X2).size())
print(n(X2))


print(m.weight.grad) 
print(n.weight.grad)

print(X1.grad) 
print(X2.grad)



torch.Size([1, 1, 4, 4])
torch.Size([1, 1, 2, 2])
torch.Size([1, 1, 2, 2])
torch.Size([1, 1, 2, 2])
tensor([[[[1560., 1760.],
          [2360., 2560.]]]], device='cuda:0',
       grad_fn=<CudnnConvolutionBackward>)
torch.Size([1, 1, 2, 2])
tensor([[[[1560., 1760.],
          [2360., 2560.]]]], device='cuda:0', grad_fn=<swc2dBackward>)
tensor([[[[ 7.6718, 11.5944],
          [23.3621, 27.2847]]]], device='cuda:0')
tensor([[[[0., 0.],
          [0., 0.]]]], device='cuda:0')
tensor([[[[ 7.4323,  8.3810, 14.8646, 16.7620],
          [11.2319, 12.1806, 22.4637, 24.3611],
          [22.2968, 25.1429, 29.7291, 33.5239],
          [33.6956, 36.5417, 44.9274, 48.7222]]]], device='cuda:0')
tensor([[[[0., 0., 0., 0.],
          [0., 0., 0., 0.],
          [0., 0., 0., 0.],
          [0., 0., 0., 0.]]]], device='cuda:0')


In [4]:
n.weight.nonzero().size(0)*100/n.weight.numel()

	nonzero()
Consider using one of the following signatures instead:
	nonzero(*, bool as_tuple) (Triggered internally at  /pytorch/torch/csrc/utils/python_arg_parser.cpp:882.)
  n.weight.nonzero().size(0)*100/n.weight.numel()


100.0

In [5]:
batch = 16
in_channels = 2**9
out_channels = 2**10
kernel_size = (3,3)
dilation = (8,8)
stride = (1,1)
padding = (0,0)
groups = 1
bias = False
h = 200
w = 200
h_o = int((h + 2 * padding[0] - (dilation[0] * (kernel_size[0] - 1) + 1)) / stride[0] + 1)
w_o = int((w + 2 * padding[1] - (dilation[1] * (kernel_size[1] - 1) + 1)) / stride[1] + 1)

n = Swc2d(in_channels=in_channels,
              out_channels=out_channels,
              kernel_size=kernel_size,
              dilation=dilation,
              stride=stride,
              padding=padding,
              groups=groups,
              bias=bias).to(cuda_device)

X2 = torch.nn.Parameter(torch.rand(batch,in_channels,h,w,device=cuda_device), requires_grad = True)
back_truth = torch.nn.Parameter(torch.rand(batch,out_channels,h_o,w_o,device=cuda_device), requires_grad = True)

with torch.autograd.profiler.profile(use_cuda=True, profile_memory=True) as prof:
    var2 = (n(X2) - back_truth).norm()
    var2.backward();

In [6]:
print(torch.cuda.memory_summary(device=cuda_device, abbreviated=True))

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |    6768 MB |   15802 MB |   56440 MB |   49672 MB |
|---------------------------------------------------------------------------|
| Active memory         |    6768 MB |   15802 MB |   56440 MB |   49672 MB |
|---------------------------------------------------------------------------|
| GPU reserved memory   |   17172 MB |   17172 MB |   17172 MB |       0 B  |
|---------------------------------------------------------------------------|
| Non-releasable memory |    3535 MB |    4005 MB |   12124 MB |    8588 MB |
|---------------------------------------------------------------

In [7]:
print(prof.key_averages().table( row_limit=1000))
#prof.export_chrome_trace("trace.json")

-----------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                               Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-----------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                              swc2d         1.29%     108.750us        72.35%       6.114ms       6.114ms      79.931us         0.00%        4.477s        4.477s           0 b           0 b       2.07 Gb     -10.06 Gb             1  
                         aten::view         0.60%      50.973u

In [9]:
--------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                              Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
--------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                             swc2d        54.55%      42.442ms        97.30%      75.707ms      75.707ms      42.461ms        34.57%      75.707ms      75.707ms           0 b           0 b       2.07 Gb      -6.71 Gb             1  
                                        aten::view         0.06%      49.061us         0.06%      49.061us       8.177us       0.000us         0.00%       0.000us       0.000us           0 b           0 b           0 b           0 b             6  
                                       aten::empty         5.61%       4.362ms         5.61%       4.362ms     218.108us       0.000us         0.00%       0.000us       0.000us           0 b           0 b      11.62 Gb      11.62 Gb            20  
                                        aten::rand         0.01%       8.924us         5.50%       4.277ms       4.277ms       4.244ms         3.46%      11.145ms      11.145ms           0 b           0 b       3.35 Gb           0 b             1  
                                    aten::uniform_         0.04%      31.211us         0.04%      31.211us      31.211us       6.900ms         5.62%       6.900ms       6.900ms           0 b           0 b           0 b           0 b             1  
                                         aten::eye         0.03%      21.628us         7.02%       5.463ms       2.731ms       9.345us         0.01%      11.951ms       5.976ms           0 b           0 b       6.71 Gb           0 b             2  
                                     aten::resize_         3.52%       2.739ms         3.52%       2.739ms     249.006us       0.000us         0.00%       0.000us       0.000us           0 b           0 b       5.42 Gb       5.42 Gb            11  
                                       aten::zero_         0.08%      60.815us         0.20%     159.104us      39.776us      10.878us         0.01%       8.392ms       2.098ms           0 b           0 b           0 b           0 b             4  
                                       aten::fill_         0.21%     162.000us         0.21%     162.000us      23.143us       8.400ms         6.84%       8.400ms       1.200ms           0 b           0 b           0 b           0 b             7  
                                      aten::stride         0.00%       1.372us         0.00%       1.372us       0.343us       0.000us         0.00%       0.000us       0.000us           0 b           0 b           0 b           0 b             4  
                                  aten::as_strided         0.01%      10.736us         0.01%      10.736us       0.767us       0.000us         0.00%       0.000us       0.000us           0 b           0 b           0 b           0 b            14  
                                   aten::to_sparse         0.05%      40.877us        33.59%      26.135ms      26.135ms      34.236us         0.03%      16.034ms      16.034ms           0 b           0 b     586.50 Kb    -586.50 Kb             1  
                                     aten::nonzero        33.14%      25.790ms        33.17%      25.814ms      25.814ms      15.707ms        12.79%      15.718ms      15.718ms           0 b           0 b     469.00 Kb           0 b             1  
                                  aten::contiguous         0.00%       2.933us         0.00%       2.933us       2.933us       1.342us         0.00%       1.342us       1.342us           0 b           0 b           0 b           0 b             1  
                                           aten::t         0.00%       1.752us         0.01%       4.635us       4.635us       0.000us         0.00%       0.000us       0.000us           0 b           0 b           0 b           0 b             1  
                                   aten::transpose         0.00%       2.686us         0.01%       4.451us       2.225us       0.000us         0.00%       0.000us       0.000us           0 b           0 b           0 b           0 b             2  
                                        aten::set_         0.03%      20.847us         0.03%      20.847us      10.424us      11.805us         0.01%      11.805us       5.902us           0 b           0 b           0 b           0 b             2  
                                       aten::clone         0.02%      15.979us         0.06%      50.166us      25.083us      19.070us         0.02%      44.832us      22.416us           0 b           0 b     586.50 Kb           0 b             2  
                               aten::empty_strided         0.14%     110.099us         0.14%     110.099us      13.762us       0.000us         0.00%       0.000us       0.000us          32 b          32 b       1.24 Gb       1.24 Gb             8  
                                       aten::copy_         0.09%      72.067us         0.09%      72.067us      14.413us      57.223us         0.05%      57.223us      11.445us           0 b           0 b           0 b           0 b             5  
                                       aten::chunk         0.01%       7.593us         0.03%      26.939us      26.939us       8.004us         0.01%      26.816us      26.816us           0 b           0 b           0 b           0 b             1  
                                       aten::split         0.01%       8.789us         0.02%      19.346us      19.346us       8.152us         0.01%      18.812us      18.812us           0 b           0 b           0 b           0 b             1  
                                      aten::narrow         0.01%       7.089us         0.01%      10.557us       5.279us      10.660us         0.01%      10.660us       5.330us           0 b           0 b           0 b           0 b             2  
                                       aten::slice         0.00%       2.324us         0.00%       3.468us       1.734us       0.000us         0.00%       0.000us       0.000us           0 b           0 b           0 b           0 b             2  
                                       aten::index         0.03%      23.917us         0.05%      38.904us      38.904us      39.645us         0.03%      49.312us      49.312us           0 b           0 b     117.50 Kb           0 b             1  
                                     aten::reshape         0.01%       7.349us         0.01%       9.322us       4.661us       9.668us         0.01%       9.668us       4.834us           0 b           0 b           0 b           0 b             2  
                                     aten::squeeze         0.00%       1.897us         0.00%       2.481us       2.481us       0.000us         0.00%       0.000us       0.000us           0 b           0 b           0 b           0 b             1  
                           aten::sparse_coo_tensor         0.05%      39.419us         0.20%     157.886us     157.886us      32.449us         0.03%     157.695us     157.695us           0 b         -32 b           0 b      -2.00 Kb             1  
                                         aten::min         0.03%      19.995us         0.04%      30.820us      30.820us      37.312us         0.03%      37.312us      37.312us           0 b           0 b       1.00 Kb           0 b             1  
                                         aten::max         0.02%      15.761us         0.03%      25.731us      25.731us      28.672us         0.02%      28.672us      28.672us           0 b           0 b       1.00 Kb           0 b             1  
                                          aten::to         0.04%      27.329us         0.08%      62.993us      20.998us      21.406us         0.02%      46.977us      15.659us          32 b           0 b           0 b           0 b             3  
    aten::_sparse_coo_tensor_with_dims_and_tensors         0.01%      10.487us         0.02%      13.047us      13.047us      12.957us         0.01%      12.957us      12.957us           0 b           0 b           0 b           0 b             1  
                                 aten::_coalesced_         0.00%       2.744us         0.00%       2.744us       2.744us       2.980us         0.00%       2.980us       2.980us           0 b           0 b           0 b           0 b             1  
                                        aten::_nnz         0.01%      10.091us         0.01%      10.091us      10.091us      10.910us         0.01%      10.910us      10.910us           0 b           0 b           0 b           0 b             1  
                                     aten::indices         0.04%      29.029us         0.05%      40.048us      20.024us      28.102us         0.02%      39.270us      19.635us           0 b           0 b           0 b           0 b             2  
                                aten::is_coalesced         0.01%       6.990us         0.01%       6.990us       2.330us       7.742us         0.01%       7.742us       2.581us           0 b           0 b           0 b           0 b             3  
                                       aten::alias         0.01%       9.044us         0.01%       9.044us       3.015us       8.031us         0.01%       8.031us       2.677us           0 b           0 b           0 b           0 b             3  
                                      aten::select         0.02%      18.383us         0.03%      19.495us       9.748us      18.625us         0.02%      18.625us       9.312us           0 b           0 b           0 b           0 b             2  
                                      aten::values         0.02%      13.997us         0.02%      19.012us      19.012us      14.047us         0.01%      18.652us      18.652us           0 b           0 b           0 b           0 b             1  
                                         aten::sub         0.05%      35.924us         0.05%      42.716us      42.716us      12.824ms        10.44%      12.824ms      12.824ms           0 b           0 b       2.07 Gb           0 b             1  
                              aten::frobenius_norm         0.02%      13.852us         0.15%     115.578us     115.578us       6.031us         0.00%       4.655ms       4.655ms           0 b           0 b       1.00 Kb           0 b             1  
                                        aten::norm         0.08%      64.747us         0.09%      71.100us      71.100us       4.643ms         3.78%       4.643ms       4.643ms           0 b           0 b         512 b           0 b             1  
                                   aten::ones_like         0.02%      12.675us         0.04%      32.108us      32.108us       5.211us         0.00%       8.188us       8.188us           0 b           0 b         512 b           0 b             1  
                                  aten::empty_like         0.03%      26.705us         0.15%     116.908us      29.227us       0.000us         0.00%       0.000us       0.000us           0 b           0 b       1.24 Gb           0 b             4  
                        torch::autograd::GraphRoot         0.00%       3.138us         0.00%       3.138us       3.138us       0.703us         0.00%       0.703us       0.703us           0 b           0 b           0 b           0 b             1  
                    torch::autograd::CopyBackwards         0.09%      73.814us         0.11%      87.938us      87.938us       3.297us         0.00%       3.969us       3.969us           0 b           0 b           0 b           0 b             1  
                                     NormBackward1         0.21%     163.425us         0.84%     654.880us     654.880us       9.414us         0.01%       9.145ms       9.145ms           0 b           0 b       2.07 Gb      -1.00 Kb             1  
                                         aten::div         0.12%      96.114us         0.15%     120.260us     120.260us       7.938us         0.01%       7.938us       7.938us           0 b           0 b         512 b           0 b             1  
                                          aten::eq         0.20%     152.497us         0.32%     246.627us     123.314us      11.938us         0.01%      16.070us       8.035us           0 b           0 b       1.00 Kb           0 b             2  
                                aten::masked_fill_         0.09%      72.085us         0.09%      72.085us      72.085us       6.141us         0.00%       6.141us       6.141us           0 b           0 b           0 b           0 b             1  
                                         aten::mul         0.28%     214.059us         0.32%     251.052us     125.526us      18.102ms        14.74%      18.102ms       9.051ms           0 b           0 b       4.13 Gb           0 b             2  
                                      SubBackward0         0.06%      42.875us         0.40%     311.953us     311.953us       5.820us         0.00%      17.995ms      17.995ms           0 b           0 b       2.07 Gb      -2.07 Gb             1  
                                         aten::neg         0.15%     117.979us         0.27%     212.790us     106.395us       8.997ms         7.32%      17.988ms       8.994ms           0 b           0 b       4.13 Gb           0 b             2  
                   torch::autograd::AccumulateGrad         0.14%     108.556us         0.28%     220.850us      55.213us      11.344us         0.01%      27.469us       6.867us           0 b           0 b           0 b           0 b             4  
                                      aten::detach         0.09%      71.968us         0.14%     112.294us      28.073us      12.117us         0.01%      16.125us       4.031us           0 b           0 b           0 b           0 b             4  
                                            detach         0.05%      40.326us         0.05%      40.326us      10.082us       4.008us         0.00%       4.008us       1.002us           0 b           0 b           0 b           0 b             4  
                                     swc2dBackward         0.25%     195.908us         0.82%     634.351us     634.351us       9.906us         0.01%       2.464ms       2.464ms           0 b           0 b       1.24 Gb    -132.50 Kb             1  
                                  aten::zeros_like         0.09%      69.403us         0.40%     310.665us     103.555us      10.000us         0.01%       2.444ms     814.724us           0 b           0 b       1.24 Gb           0 b             3  
                                        aten::ones         0.03%      25.867us         0.11%      87.405us      87.405us       3.367us         0.00%       9.477us       9.477us           0 b           0 b     132.50 Kb           0 b             1  
--------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
Self CPU time total: 77.811ms
CUDA time total: 122.831ms



IndentationError: unindent does not match any outer indentation level (<tokenize>, line 5)

In [None]:
-----------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                               Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-----------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                              swc2d         1.29%     108.750us        72.35%       6.114ms       6.114ms      79.931us         0.00%        4.477s        4.477s           0 b           0 b       2.07 Gb     -10.06 Gb             1  
                         aten::view         0.60%      50.973us         0.60%      50.973us      12.743us       0.000us         0.00%       0.000us       0.000us           0 b           0 b           0 b           0 b             4  
                        aten::empty        34.44%       2.911ms        34.44%       2.911ms     223.887us       0.000us         0.00%       0.000us       0.000us           0 b           0 b      14.97 Gb      14.97 Gb            13  
                         aten::rand         0.11%       9.586us         0.54%      45.252us      45.252us      13.824us         0.00%       6.928ms       6.928ms           0 b           0 b       3.35 Gb           0 b             1  
                     aten::uniform_         0.35%      29.214us         0.35%      29.214us      29.214us       6.914ms         0.15%       6.914ms       6.914ms           0 b           0 b           0 b           0 b             1  
                          aten::eye         0.38%      32.294us        71.68%       6.058ms       3.029ms       7.393us         0.00%      11.999ms       6.000ms           0 b           0 b       6.71 Gb           0 b             2  
                      aten::resize_        35.23%       2.977ms        35.23%       2.977ms     744.353us       0.000us         0.00%       0.000us       0.000us           0 b           0 b       5.42 Gb       5.42 Gb             4  
                        aten::zero_         0.89%      75.369us         2.41%     204.009us      51.002us      11.763us         0.00%       8.201ms       2.050ms           0 b           0 b           0 b           0 b             4  
                        aten::fill_         2.49%     210.491us         2.49%     210.491us      30.070us       8.208ms         0.18%       8.208ms       1.173ms           0 b           0 b           0 b           0 b             7  
                       aten::stride         0.02%       1.881us         0.02%       1.881us       0.376us       0.000us         0.00%       0.000us       0.000us           0 b           0 b           0 b           0 b             5  
                   aten::as_strided         0.05%       4.637us         0.05%       4.637us       2.319us       0.000us         0.00%       0.000us       0.000us           0 b           0 b           0 b           0 b             2  
                           aten::mm         1.79%     150.980us        34.44%       2.910ms       2.910ms        4.464s        98.79%        4.464s        4.464s           0 b           0 b       3.35 Gb           0 b             1  
                          aten::sub         0.46%      38.492us         0.55%      46.634us      46.634us      11.613ms         0.26%      11.613ms      11.613ms           0 b           0 b       2.07 Gb           0 b             1  
               aten::frobenius_norm         0.21%      17.544us         1.60%     135.011us     135.011us       5.500us         0.00%       3.684ms       3.684ms           0 b           0 b       1.00 Kb           0 b             1  
                         aten::norm         0.90%      75.887us         0.98%      82.475us      82.475us       3.672ms         0.08%       3.672ms       3.672ms           0 b           0 b         512 b           0 b             1  
                        aten::copy_         0.32%      26.860us         0.32%      26.860us      26.860us       6.500us         0.00%       6.500us       6.500us           0 b           0 b           0 b           0 b             1  
                    aten::ones_like         0.17%      14.191us         0.45%      38.136us      38.136us       5.500us         0.00%       8.000us       8.000us           0 b           0 b         512 b           0 b             1  
                   aten::empty_like         0.41%      34.608us         1.49%     125.759us      31.440us       0.000us         0.00%       0.000us       0.000us           0 b           0 b       1.24 Gb           0 b             4  
                aten::empty_strided         1.08%      91.151us         1.08%      91.151us      22.788us       0.000us         0.00%       0.000us       0.000us           0 b           0 b       1.24 Gb       1.24 Gb             4  
         torch::autograd::GraphRoot         0.04%       3.497us         0.04%       3.497us       3.497us       1.000us         0.00%       1.000us       1.000us           0 b           0 b           0 b           0 b             1  
     torch::autograd::CopyBackwards         1.02%      86.409us         1.32%     111.341us     111.341us       1.500us         0.00%       3.500us       3.500us           0 b           0 b           0 b           0 b             1  
                           aten::to         0.30%      24.932us         0.30%      24.932us      24.932us       2.000us         0.00%       2.000us       2.000us           0 b           0 b           0 b           0 b             1  
                      NormBackward1         2.04%     172.593us         8.68%     733.331us     733.331us       8.000us         0.00%       8.044ms       8.044ms           0 b           0 b       2.07 Gb      -1.00 Kb             1  
                          aten::div         1.34%     113.339us         1.73%     146.214us     146.214us       8.000us         0.00%       8.000us       8.000us           0 b           0 b         512 b           0 b             1  
                           aten::eq         1.83%     154.649us         3.18%     268.835us     134.418us      14.000us         0.00%      19.000us       9.500us           0 b           0 b       1.00 Kb           0 b             2  
                 aten::masked_fill_         1.17%      99.092us         1.17%      99.092us      99.092us       6.000us         0.00%       6.000us       6.000us           0 b           0 b           0 b           0 b             1  
                          aten::mul         2.57%     216.812us         3.14%     265.106us     132.553us      15.995ms         0.35%      15.995ms       7.997ms           0 b           0 b       4.13 Gb           0 b             2  
                       SubBackward0         0.64%      54.098us         4.16%     351.768us     351.768us       6.000us         0.00%      15.970ms      15.970ms           0 b           0 b       2.07 Gb      -2.07 Gb             1  
                          aten::neg         1.60%     135.001us         2.80%     236.451us     118.226us       7.978ms         0.18%      15.951ms       7.976ms           0 b           0 b       4.13 Gb           0 b             2  
    torch::autograd::AccumulateGrad         1.30%     110.242us         2.57%     217.009us      54.252us      10.500us         0.00%      26.000us       6.500us           0 b           0 b           0 b           0 b             4  
                       aten::detach         0.79%      67.093us         1.26%     106.767us      26.692us      10.500us         0.00%      15.500us       3.875us           0 b           0 b           0 b           0 b             4  
                             detach         0.47%      39.674us         0.47%      39.674us       9.918us       5.000us         0.00%       5.000us       1.250us           0 b           0 b           0 b           0 b             4  
                      swc2dBackward         2.45%     206.893us         8.28%     700.168us     700.168us       7.000us         0.00%       2.247ms       2.247ms           0 b           0 b       1.24 Gb    -132.50 Kb             1  
                   aten::zeros_like         0.87%      73.824us         4.01%     339.139us     113.046us      11.500us         0.00%       2.231ms     743.500us           0 b           0 b       1.24 Gb           0 b             3  
                         aten::ones         0.38%      32.183us         1.31%     110.534us     110.534us       3.500us         0.00%       9.000us       9.000us           0 b           0 b     132.50 Kb           0 b             1  
-----------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
Self CPU time total: 8.451ms
CUDA time total: 4.518s