In [1]:
%load_ext autoreload
%autoreload 2
%env CUDA_VISIBLE_DEVICES = 3

env: CUDA_VISIBLE_DEVICES=3


In [2]:
from batchflow.models.torch.utils import make_initialization_inputs

In [3]:
import pandas as pd
import torch.nn as nn

from benchmark import get_module_performance

pd.set_option("display.precision", 2)

In [4]:
device = 'cuda:0'
device

'cuda:0'

In [5]:
shape = (1, 64, 128, 128)

module_collection = {'conv_64_512_1x1': nn.Conv2d(kernel_size=1, in_channels=64, out_channels=512), 
                     'conv_64_512_3x3': nn.Conv2d(kernel_size=3, in_channels=64, out_channels=512), 
                     'bottleneck_64_512_3x3': nn.Sequential(*[nn.Conv2d(kernel_size=1, in_channels=64, out_channels=32), 
                                                             nn.Conv2d(kernel_size=3, in_channels=32, out_channels=512),
                                                              ]),
                      'conv_64_512_3x3_g2': nn.Conv2d(kernel_size=7, in_channels=64, out_channels=512, groups=2),
                      'conv_64_512_3x3_g8': nn.Conv2d(kernel_size=7, in_channels=64, out_channels=512, groups=8),
                      'conv_padding': nn.Conv2d(kernel_size=3, in_channels=64, out_channels=512, padding=1),
                      'conv_nn_Padding': nn.Sequential(nn.ZeroPad2d(1),
                                                       nn.Conv2d(kernel_size=3, in_channels=64, out_channels=512))
                    }

In [6]:
# contiguous
module_collection_stats = pd.DataFrame(index=module_collection.keys(), 
                                       columns=['forward time mean, ms', 'forward time std, ms',
                                                'backward time mean, ms', 'backward time std, ms',
                                                'forward memory, KB','backward memory, KB',
                                                'macs', 'parameters', 'time total, ms'])
        
for module_name, module_value in module_collection.items():
    module_collection_stats.loc[module_name] = get_module_performance(module_value, inputs=shape, device=device, 
                                                               track_backward=True, memory_unit='KB')

module_collection_stats

Unnamed: 0,"forward time mean, ms","forward time std, ms","backward time mean, ms","backward time std, ms","forward memory, KB","backward memory, KB",macs,parameters,"time total, ms"
conv_64_512_1x1,0.48,0.04,0.57,0.08,32994.5,128.0,545259520.0,33280.0,1482.58
conv_64_512_3x3,0.86,0.02,1.34,0.02,35972.0,1152.0,4690151424.0,295424.0,792.1
bottleneck_64_512_3x3,0.9,0.03,1.57,0.02,36420.0,2624.0,2383218688.0,150048.0,883.08
conv_64_512_3x3_g2,1.99,0.02,2.75,0.03,34032.5,3136.0,11956733952.0,803328.0,1650.64
conv_64_512_3x3_g8,0.7,0.0,1.54,0.02,32210.0,784.0,2994898944.0,201216.0,799.49
conv_padding,0.8,0.0,1.02,0.02,35972.0,89216.0,4840226816.0,295424.0,657.17
conv_nn_Padding,0.89,0.01,1.36,0.02,40324.0,1152.0,4840226816.0,295424.0,805.87


In [7]:
# channels last
module_collection_stats = pd.DataFrame(index=module_collection.keys(), 
                                       columns=['forward time mean(ms)', 'forward time std(ms)',
                                                'backward time mean(ms)', 'backward time std(ms)',
                                                'forward memory','backward memory',
                                                'macs', 'parameters','time total(ms)'])
        
for module_name, module_value in module_collection.items():
    module_collection_stats.loc[module_name] = get_module_performance(module_value, inputs=shape, device=device,
                                                               track_backward=True, channels_last=True, memory_unit='MB')

module_collection_stats

Unnamed: 0,forward time mean(ms),forward time std(ms),backward time mean(ms),backward time std(ms),forward memory,backward memory,macs,parameters,time total(ms)
conv_64_512_1x1,,,,,,,545259520.0,33280.0,
conv_64_512_3x3,,,,,,,4690151424.0,295424.0,
bottleneck_64_512_3x3,,,,,,,2383218688.0,150048.0,
conv_64_512_3x3_g2,,,,,,,11956733952.0,803328.0,
conv_64_512_3x3_g8,,,,,,,2994898944.0,201216.0,
conv_padding,,,,,,,4840226816.0,295424.0,
conv_nn_Padding,,,,,,,4840226816.0,295424.0,


In [8]:
# amp
module_collection_stats = pd.DataFrame(index=module_collection.keys(), 
                                       columns=['forward time mean(ms)', 'forward time std(ms)',
                                                'backward time mean(ms)', 'backward time std(ms)',
                                                'forward memory','backward memory',
                                                'macs', 'parameters','time total(ms)'])
        
for module_name, module_value in module_collection.items():
    module_collection_stats.loc[module_name] = get_module_performance(module_value, inputs=shape, device=device,
                                                               track_backward=True, amp=True, memory_unit='GB')

module_collection_stats

Unnamed: 0,forward time mean(ms),forward time std(ms),backward time mean(ms),backward time std(ms),forward memory,backward memory,macs,parameters,time total(ms)
conv_64_512_1x1,,,,,,,545259520.0,33280.0,
conv_64_512_3x3,,,,,,,4690151424.0,295424.0,
bottleneck_64_512_3x3,,,,,,,2383218688.0,150048.0,
conv_64_512_3x3_g2,,,,,,,11956733952.0,803328.0,
conv_64_512_3x3_g8,,,,,,,2994898944.0,201216.0,
conv_padding,,,,,,,4840226816.0,295424.0,
conv_nn_Padding,,,,,,,4840226816.0,295424.0,


In [9]:
# amp + channels last
module_collection_stats = pd.DataFrame(index=module_collection.keys(), 
                                       columns=['forward time mean(ms)', 'forward time std(ms)',
                                                'backward time mean(ms)', 'backward time std(ms)',
                                                'forward memory','backward memory',
                                                'macs', 'parameters','time total(ms)'])
        
for module_name, module_value in module_collection.items():
    module_collection_stats.loc[module_name] = get_module_performance(module_value, inputs=shape, device=device,
                                                               track_backward=True, channels_last=True, amp=True, memory_unit='B')

module_collection_stats

Unnamed: 0,forward time mean(ms),forward time std(ms),backward time mean(ms),backward time std(ms),forward memory,backward memory,macs,parameters,time total(ms)
conv_64_512_1x1,,,,,,,545259520.0,33280.0,
conv_64_512_3x3,,,,,,,4690151424.0,295424.0,
bottleneck_64_512_3x3,,,,,,,2383218688.0,150048.0,
conv_64_512_3x3_g2,,,,,,,11956733952.0,803328.0,
conv_64_512_3x3_g8,,,,,,,2994898944.0,201216.0,
conv_padding,,,,,,,4840226816.0,295424.0,
conv_nn_Padding,,,,,,,4840226816.0,295424.0,
