In [1]:
import torch
import time
from typing import Callable


In [2]:
def vanilla(x: torch.Tensor, q: torch.Tensor = None):
    copy = x.detach().clone().abs()
    max = torch.max(copy)
    mean = torch.mean(copy)
    std = torch.std(copy)
    return max > mean + 1_000 * std

def boost_max(x: torch.Tensor, q: torch.Tensor = None):
    copy = x.detach().clone().abs()
    copy = copy.view(512, -1).amax(dim=(1,))
    q1, q3 = torch.quantile(copy, q)
    return copy.max() > q3 + 1_000 * (q3 - q1)

def boost_sum(x: torch.Tensor, q: torch.Tensor = None):
    copy = x.detach().clone()
    copy = copy.view(512, -1).sum(dim=(1,)).abs()
    q1, q3 = torch.quantile(copy, q)
    return copy.max() > q3 + 1_000 * (q3 - q1)

In [3]:
def test(x: torch.Tensor, f: Callable[[torch.Tensor], bool], iters: int, q: torch.Tensor = None):
    start_time = time.time()
    for _ in range(iters):
        f(x, q)
    return time.time() - start_time

In [4]:
# [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144, 524288]

x = torch.normal(mean=0, std=1e-9, size=(32768, 16384)) * 1e-8
x[0][0] = 1e35
q = torch.tensor([0.25, 0.75])
x = x.cuda()
q = q.cuda()

result_boost_sum = test(x, boost_sum, 100, q)
result_boost_max = test(x, boost_max, 100, q)
result_vanilla = test(x, vanilla, 100, q)

print('time for vanilla: {}'.format(result_vanilla))
print('time for boost_max: {}'.format(result_boost_max))
print('time for boost_sum: {}'.format(result_boost_sum))

print('optimization for boost_max: {}%'.format((result_boost_max / result_vanilla - 1) * 100))
print('optimization for boost_sum: {}%'.format((result_boost_sum / result_vanilla - 1) * 100))

time for vanilla: 1.573725700378418
time for boost_max: 2.236877679824829
time for boost_sum: 1.482189416885376
optimization for boost_max: 42.138981354053605%
optimization for boost_sum: -5.816533559249315%


# Results

In [5]:
# CPU:
# time for vanilla: 69.03535866737366
# time for boost_max: 49.39474010467529
# time for boost_sum: 27.623595237731934
# optimization for boost_max: 39.76257091560109%
# optimization for boost_sum: 149.9144592629858%

# GPU:
# time for vanilla: 1.5768744945526123
# time for boost_max: 2.2301137447357178
# time for boost_sum: 1.5734341144561768
# optimization for boost_max: 41.426204332668924%
# optimization for boost_sum: -0.21817716681451493%