In [None]:
import time

import numpy as np

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn

from models.densenet import densenet88
from models.c3d import C3D
from models.convlstm import ConvLSTM

In [None]:
def measure(model, x, y=None):
    # synchronize gpu time and measure forward-pass time
    torch.cuda.synchronize()
    t0 = time.time()
    y_pred = model(x)
    torch.cuda.synchronize()
    elapsed_fp = time.time() - t0
    if y == None:
        return elapsed_fp, None

    # zero gradients, synchronize gpu time and measure backward-pass time
    model.zero_grad()
    t0 = time.time()
    y_pred.backward(y)
    torch.cuda.synchronize()
    elapsed_bp = time.time() - t0
    return elapsed_fp, elapsed_bp

def benchmark(model, x, y=None):

    # dry runs to warm-up
    for _ in range(5):
        _, _ = measure(model, x, y)

    print('DONE WITH DRY RUNS, NOW BENCHMARKING')

    # start benchmarking
    t_forward = []
    t_backward = []
    for _ in range(10):
        t_fp, t_bp = measure(model, x, y)
        t_forward.append(t_fp)
        t_backward.append(t_bp)

    return t_forward, t_backward

def result(t_list):
    t_array = np.asarray(t_list)
    avg = np.mean(t_array) * 1e3
    std = np.std(t_array) * 1e3
    
    print(
        avg, 'ms', '+/-',
        std, 'ms'
    )
    
    return avg, std
    

In [None]:
use_cuda = True
multigpus = True

# set cudnn backend to benchmark config
cudnn.benchmark = True
device = torch.device('cuda')

In [None]:
densenet88 = densenet88(num_classes=2, sample_size=112, sample_duration=16).eval().to(device)
c3d = C3D(num_classes=2).eval().to(device)
convlstm = ConvLSTM(256, device).eval().to(device)

result_list = []

for batch_size in [1, 8, 16]:
    batch_list = []
    x = torch.rand(batch_size, 3, 16, 112, 112).to(device)
    t_forward,_ = benchmark(densenet88,x)
    avg, _ = result(t_forward)
    batch_list.append(avg)
    t_forward,_ = benchmark(c3d,x)
    avg, _ = result(t_forward)
    batch_list.append(avg)
    x = torch.rand(batch_size, 3, 2, 224, 224).to(device)
    t_forward,_ = benchmark(convlstm, x)
    avg, _ = result(t_forward)
    batch_list.append(avg)
    result_list.append(batch_list)

In [None]:
import numpy as np

data = np.array(result_list)

In [None]:
data