In [1]:
%load_ext autoreload
%env CUDA_VISIBLE_DEVICES = 2

env: CUDA_VISIBLE_DEVICES=2


In [2]:
import contextlib

import numpy as np
import pandas as pd

import torch
import torch.nn as nn

from ptflops import get_model_complexity_info

%autoreload 2
pd.set_option("display.precision", 2)

In [3]:
device = torch.device('cuda')
device

device(type='cuda')

In [4]:
def get_memory(reset_memory=True, device=None):
    """Take current max allocated memory, either with or without resetting"""
    if reset_memory:
        torch.cuda.reset_peak_memory_stats()
            
    max_memory = torch.cuda.max_memory_allocated(device)
        
    return max_memory
    
    
def make_initialization_inputs(inputs, device=None):
    """ Take either tensor, shape tuple or list of them, and always return tensor or list of them. """
    if isinstance(inputs, torch.Tensor):
        pass
    elif isinstance(inputs, tuple):
        inputs = torch.rand(*inputs, device=device)
    elif isinstance(inputs, list):
        inputs = [make_initialization_inputs(item, device=device) for item in inputs]
    return inputs
    
    
def tracker(module, inputs, repeats=300, warmup=40, device=None, track_backward=True, channels_last=False, amp=False) -> dict:
    """Track module #macs, #parameters, time and memory consumption on forward and backward pass for a given inputs tensor or inputs shape"""
    MB_CONSTANT = 2 ** 20
    
    memory_format = torch.channels_last if channels_last else torch.contiguous_format
    result = {}
    
    torch.cuda.empty_cache()
    
    start = torch.cuda.Event(enable_timing=True)
    end = torch.cuda.Event(enable_timing=True)
    
    inputs = make_initialization_inputs(inputs=inputs, device=device)
    if channels_last:
        inputs.to(memory_format=memory_format)
    module.to(device, memory_format=memory_format)
        
        
    # calculate macs and parameters number
    macs, params = get_model_complexity_info(module, tuple(inputs.shape[1:]), as_strings=False, print_per_layer_stat=False)
    result['macs'] = macs
    result['parameters'] = float(params)
    
    
    # calculate memory for forward operation
    forward_start_memory = get_memory(device=device)
    outputs = module(inputs)
    forward_end_memory = get_memory(reset_memory=False, device=device) - forward_start_memory
    result['forward memory(MB)'] = forward_end_memory / MB_CONSTANT
    
    
    # calculate memory for backward operation if needed
    if track_backward:
        backward_start_memory = get_memory(device=device)
        outputs.backward(outputs)
        backward_end_memory = get_memory(reset_memory=False, device=device) - backward_start_memory
        backward_timings = []
        result['backward memory(MB)'] = backward_end_memory / MB_CONSTANT

    forward_timings = []
    
    for i in range(repeats):
        
        with (torch.cuda.amp.autocast() if amp else contextlib.nullcontext()):
            while i < warmup:
                outputs = module(inputs)
                del outputs
                i += 1
                continue

            start.record()
            outputs = module(inputs)
            end.record()

            torch.cuda.synchronize()

            forward_timings.append(start.elapsed_time(end)) 
        
        if track_backward:
            start.record()
            outputs.backward(outputs)
            end.record()

            torch.cuda.synchronize()
            
            backward_timings.append(start.elapsed_time(end))
            del outputs
   
    result['forward time mean(ms)'] = np.mean(forward_timings)
    result['forward time std(ms)'] = np.std(forward_timings)
   
    if track_backward:
        result['backward time mean(ms)'] = np.mean(backward_timings)
        result['backward time std(ms)'] = np.std(backward_timings)
    
    return result

In [5]:
shape = (1, 64, 128, 128)

module_collection = {'conv_64_512_1x1': nn.Conv2d(kernel_size=1, in_channels=64, out_channels=512), 
                     'conv_64_512_3x3': nn.Conv2d(kernel_size=3, in_channels=64, out_channels=512), 
                     'bottleneck_64_512_3x3': nn.Sequential(*[nn.Conv2d(kernel_size=1, in_channels=64, out_channels=32), 
                                                             nn.Conv2d(kernel_size=3, in_channels=32, out_channels=512),
                                                              ]),
                      'conv_64_512_3x3_g2': nn.Conv2d(kernel_size=7, in_channels=64, out_channels=512, groups=2),
                      'conv_64_512_3x3_g8': nn.Conv2d(kernel_size=7, in_channels=64, out_channels=512, groups=8),
                      'conv_padding': nn.Conv2d(kernel_size=3, in_channels=64, out_channels=512, padding=1),
                      'conv_nn_Padding': nn.Sequential(nn.ZeroPad2d(1),
                                                       nn.Conv2d(kernel_size=3, in_channels=64, out_channels=512))
                    }

In [6]:
# contiguous
module_collection_stats = pd.DataFrame(index=module_collection.keys(), 
                                               columns=['forward time mean(ms)', 'forward time std(ms)',
                                                        'backward time mean(ms)', 'backward time std(ms)',
                                                        'forward memory(MB)','backward memory(MB)',
                                                        'macs', 'parameters'])
        
for module_name, module_value in module_collection.items():
    module_collection_stats.loc[module_name] = tracker(module_value, inputs=shape, device=device, track_backward=True)

module_collection_stats

Unnamed: 0,forward time mean(ms),forward time std(ms),backward time mean(ms),backward time std(ms),forward memory(MB),backward memory(MB),macs,parameters
conv_64_512_1x1,0.4,0.22,0.42,0.13,32.22,0.13,545259520.0,33280.0
conv_64_512_3x3,0.82,0.04,1.31,0.1,35.13,1.13,4690151424.0,295424.0
bottleneck_64_512_3x3,0.77,0.18,1.52,0.13,35.57,2.56,2383218688.0,150048.0
conv_64_512_3x3_g2,1.99,0.03,2.73,0.08,33.23,3.06,11956733952.0,803328.0
conv_64_512_3x3_g8,0.67,0.08,1.51,0.11,31.46,0.77,2994898944.0,201216.0
conv_padding,0.75,0.03,1.02,0.21,35.13,87.13,4840226816.0,295424.0
conv_nn_Padding,0.82,0.13,1.35,0.08,39.38,1.13,4840226816.0,295424.0


In [7]:
# channels last
module_collection_stats = pd.DataFrame(index=module_collection.keys(), 
                                               columns=['forward time mean(ms)', 'forward time std(ms)',
                                                        'backward time mean(ms)', 'backward time std(ms)',
                                                        'forward memory(MB)','backward memory(MB)',
                                                        'macs', 'parameters'])
        
for module_name, module_value in module_collection.items():
    module_collection_stats.loc[module_name] = tracker(module_value, inputs=shape, device=device, track_backward=True, channels_last=True)

module_collection_stats

Unnamed: 0,forward time mean(ms),forward time std(ms),backward time mean(ms),backward time std(ms),forward memory(MB),backward memory(MB),macs,parameters
conv_64_512_1x1,0.36,0.02,0.3,0.08,36.47,34.0,545259520.0,33280.0
conv_64_512_3x3,0.99,0.04,1.37,0.05,37.22,34.0,4690151424.0,295424.0
bottleneck_64_512_3x3,0.81,0.14,2.45,0.2,38.66,34.0,2383218688.0,150048.0
conv_64_512_3x3_g2,1.9,0.06,13.52,0.21,37.42,39.2,11956733952.0,803328.0
conv_64_512_3x3_g8,1.05,0.04,12.08,0.11,35.64,34.77,2994898944.0,201216.0
conv_padding,0.96,0.02,2.08,0.11,37.41,34.0,4840226816.0,295424.0
conv_nn_Padding,1.03,0.04,1.39,0.04,41.47,34.0,4840226816.0,295424.0


In [8]:
# amp + channels last
module_collection_stats = pd.DataFrame(index=module_collection.keys(), 
                                               columns=['forward time mean(ms)', 'forward time std(ms)',
                                                        'backward time mean(ms)', 'backward time std(ms)',
                                                        'forward memory(MB)','backward memory(MB)',
                                                        'macs', 'parameters'])
        
for module_name, module_value in module_collection.items():
    module_collection_stats.loc[module_name] = tracker(module_value, inputs=shape, device=device, track_backward=True, channels_last=True, amp=True)

module_collection_stats

Unnamed: 0,forward time mean(ms),forward time std(ms),backward time mean(ms),backward time std(ms),forward memory(MB),backward memory(MB),macs,parameters
conv_64_512_1x1,0.36,0.05,0.45,0.21,36.22,34.0,545259520.0,33280.0
conv_64_512_3x3,0.46,0.08,0.57,0.08,37.22,34.0,4690151424.0,295424.0
bottleneck_64_512_3x3,0.7,0.12,1.24,0.24,38.66,34.0,2383218688.0,150048.0
conv_64_512_3x3_g2,10.57,0.05,12.0,0.07,37.23,39.2,11956733952.0,803328.0
conv_64_512_3x3_g8,2.51,0.03,10.0,0.08,35.46,34.77,2994898944.0,201216.0
conv_padding,0.49,0.08,0.66,0.16,37.22,34.0,4840226816.0,295424.0
conv_nn_Padding,0.5,0.08,0.58,0.15,41.47,34.0,4840226816.0,295424.0
