In [1]:
import timeit
import torch
from torch import nn
from separable_conv import SeparableConv2d
from statistics import mean

In [5]:
def benchmark(in_channels, out_channels, kernel_size, groups, H, W, batch_size, conv_type=None):
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    # time variables
    start = torch.cuda.Event(enable_timing=True)
    end = torch.cuda.Event(enable_timing=True)
    
    start_forward = torch.cuda.Event(enable_timing=True)
    end_forward = torch.cuda.Event(enable_timing=True)
    
    start_backward = torch.cuda.Event(enable_timing=True)
    end_backward = torch.cuda.Event(enable_timing=True)
    # to start record time
    start.record()
    # choose separable or conv2d
    if conv_type == 'depth':
        conv = SeparableConv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size)
    else:
        conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, groups=groups)
    # create toy image dataset
    xs = torch.rand((batch_size, in_channels, H, W))
    
    conv.to(device)
    
    xs = xs.to(device)
    # to start record forward operation
    start_forward.record()
    torch.cuda.empty_cache()
    xs.requires_grad = True
    
    ys = conv(xs)
    # to end record forward operations
    end_forward.record()
    # calculate memory in MB
    mb = 1024 * 1024
    forward_memory = torch.cuda.memory_allocated(device) / mb
    # to start record backward operations
    start_backward.record()
    torch.cuda.empty_cache()
    ys.backward(torch.ones_like(ys))
    # to end record backward operations
    end_backward.record()
    
    backward_memory = torch.cuda.memory_allocated()/mb
    # to end record whole time
    end.record()
    # wait while finish all operations
    torch.cuda.synchronize()
    # calculate whole, forward and backward execution times
    execution_time = start.elapsed_time(end) / 1000
    execution_time_forward = start_forward.elapsed_time(end_forward) / 1000
    execution_time_backward = start_backward.elapsed_time(end_backward) / 1000
    
    return forward_memory, backward_memory, execution_time, execution_time_forward, execution_time_backward

In [6]:
in_channels = [16, 32]
out_channels = 64
kernel_size = [3, 5]
groups = 4
H = [160, 224]
W = [160, 224]
batch_size = [16, 32]

In [7]:
regular = {'forward':[], 'backward':[], 'time':[], 'time_forward':[], 'time_backward':[]}
grouped = {'forward':[], 'backward':[], 'time':[], 'time_forward':[], 'time_backward':[]}
separabled = {'forward':[], 'backward':[], 'time':[], 'time_forward':[], 'time_backward':[]}

def dict_time_memory(vocabulary, f_memory, b_memory, whole_time, f_time, b_time):
    vocabulary['forward'].append(f_memory)
    vocabulary['backward'].append(b_memory)
    vocabulary['time'].append(whole_time)
    vocabulary['time_forward'].append(f_time)
    vocabulary['time_backward'].append(b_time)
    
    return vocabulary

In [8]:
for i in range(100):
    # regular convolution
    f_memory_reg, b_memory_reg, execution_time_reg, execution_time_f_reg, execution_time_b_reg = benchmark(in_channels=in_channels[0], out_channels=out_channels,
                                                                                                                                   kernel_size=kernel_size[0], groups=1, H=H[0], W=W[0], batch_size=batch_size[0])
    
    dict_regular = dict_time_memory(regular, f_memory_reg, b_memory_reg, execution_time_reg, execution_time_f_reg, execution_time_b_reg)
    # grouped convolution
    f_memory_grouped, b_memory_grouped, execution_time_grouped, execution_time_f_grouped, execution_time_b_grouped = benchmark(in_channels=in_channels[0], out_channels=out_channels,
                                                                                                                                         kernel_size=kernel_size[0], groups=4, H=H[0], W=W[0], batch_size=batch_size[0])
    
    dict_grouped = dict_time_memory(grouped, f_memory_grouped, b_memory_grouped, execution_time_grouped, execution_time_f_grouped, execution_time_b_grouped)
    # separable convolution
    f_memory_sep, b_memory_sep, execution_time_sep, execution_time_f_sep, execution_time_b_sep = benchmark(in_channels=in_channels[0], out_channels=out_channels,
                                                                                                                  kernel_size=kernel_size[0], groups=1, H=H[0], W=W[0],
                                                                                                                  batch_size=batch_size[0],conv_type='depth')
    
    dict_separabled = dict_time_memory(separabled, f_memory_sep, b_memory_sep, execution_time_sep, execution_time_f_sep, execution_time_b_sep)

In [9]:
print('Среднее значение памяти')
print('FORWARD-BACKWARD')
print('Обычная свертка:', mean(dict_regular['forward']), 'mb', ',', mean(dict_regular['backward']), 'mb')
print('Групповая свертка:', mean(dict_grouped['forward']), 'mb', ',',  mean(dict_grouped['backward']), 'mb')
print('Сепарабельная свертка:', mean(dict_separabled['forward']), 'mb', ',', mean(dict_separabled['backward']), 'mb')

Среднее значение памяти
FORWARD-BACKWARD
Обычная свертка: 124.03564453125 mb , 150.0712890625 mb
Групповая свертка: 124.00927734375 mb , 150.0185546875 mb
Сепарабельная свертка: 152.0048828125 mb , 152.009765625 mb


In [10]:
print('Среднее время выполнения')
print('ОБЩЕЕ-FORWARD-BACKWARD')
print('Обычная свертка:', mean(dict_regular['time']), 'sec', ',', mean(dict_regular['time_forward']), 'sec', ',', mean(dict_regular['time_backward']), 'sec')
print('Групповая свертка:', mean(dict_grouped['time']), 'sec', ',', mean(dict_grouped['time_forward']), 'sec', ',', mean(dict_grouped['time_backward']), 'sec')
print('Сепарабельная свертка:', mean(dict_separabled['time']), 'sec', ',', mean(dict_separabled['time_forward']), 'sec', ',', mean(dict_separabled['time_backward']), 'sec')

Среднее время выполнения
ОБЩЕЕ-FORWARD-BACKWARD
Обычная свертка: 0.06699037525177003 sec , 0.012474384155273438 sec , 0.005242756481170654 sec
Групповая свертка: 0.0534506273651123 sec , 0.0012544864010810852 sec , 0.004362536010742188 sec
Сепарабельная свертка: 0.05114467216491699 sec , 0.0008200121593475341 sec , 0.002119809592962265 sec
