### Overheads in funsor implementation
It turns out there can be time/memory overheads when converting some Pyro objects to funsor objects.

In [4]:
import sys
import pathlib

path = pathlib.Path().resolve()
parent = str(path.parent)
grandparent = str(path.parent.parent)
sys.path.append(parent)
sys.path.append(grandparent)
sys.dont_write_bytecode = True

import torch
import funsor
import pyro.distributions as dist
from pyro.contrib.funsor.handlers.primitives import to_funsor

from util import time_it

device = "cuda" if torch.cuda.is_available() else "cpu"
torch.set_default_device(device)

probs = torch.ones([16, 16, 1, 1, 1, 50, 127, 1, 16]) / 16
D = dist.Categorical(probs=probs)

# Takes non-negligible time to convert the distribution to a funsor
with time_it() as t:
    X = to_funsor(D, output=funsor.Real, dim_to_name={-8: "prev", -7: "curr", -3: "sequences", -2: "lengths"})(value="a")
print(t.time)

# Takes non-negligible time to reduce the funsor
with time_it() as t:
    X.reduce(funsor.ops.logaddexp, "a")
print(t.time)

# Same reduce operation on a tensor, takes much less time
with time_it() as t:
    probs.logsumexp(-1, keepdim=True)
print(t.time)

0.012957535743713378
0.016504735946655275
0.00217907190322876


In [5]:
%reset -f
import torch
import funsor
import pyro.distributions as dist
from pyro.contrib.funsor.handlers.primitives import to_funsor

torch.cuda.reset_peak_memory_stats()
print(torch.cuda.max_memory_allocated())
A = dist.Categorical(torch.ones(100, 100, 100))
print(torch.cuda.max_memory_allocated())
T = to_funsor(A, output=funsor.Real, dim_to_name={-1: "a", -2: "b"}) # Additional memory is required to convert a distribution to a funsor
print(torch.cuda.max_memory_allocated())

del A, T

0
9011200
16002048
