In [1]:
import torch

a = torch.tensor(0.7) 
b = torch.tensor(2.0)

base = torch.distributions.Normal(0.0, 1.0)

def forward(z):
    # x = f(z)
    return a * z + b

def inverse(x):
    # z = f^{-1}(x)
    return (x - b) / a

def log_prob_x(x):
    z = inverse(x)
    return base.log_prob(z) - torch.log(torch.abs(a))

def reconstruct_x(x):
    # x -> z -> x_hat
    z = inverse(x)
    x_hat = forward(z)
    return x_hat

z = base.sample((5,))
x = forward(z)

x_hat = reconstruct_x(x)

err = (x_hat - x).abs()

print("z:", z)
print("x:", x)
print("x_hat:", x_hat)
print("abs error:", err)
print("max abs error:", err.max().item())

print("log p(x):", log_prob_x(x))


z: tensor([-0.3040,  0.2222,  0.8658,  0.2976, -1.6826])
x: tensor([1.7872, 2.1556, 2.6060, 2.2083, 0.8222])
x_hat: tensor([1.7872, 2.1556, 2.6060, 2.2083, 0.8222])
abs error: tensor([0., 0., 0., 0., 0.])
max abs error: 0.0
log p(x): tensor([-0.6085, -0.5870, -0.9370, -0.6065, -1.9778])


In [None]:
import math
import torch
import torch.nn as nn

class AffineFlow1D(nn.Module):
    def __init__(self):
        super().__init__()
        self.s = nn.Parameter(torch.tensor(0.0))  
        self.b = nn.Parameter(torch.tensor(0.0))  

        self.base = torch.distributions.Normal(loc=0.0, scale=1.0)

    @property
    def a(self):
        return torch.exp(self.s)

    def sample(self, n: int, device=None):
        if device is None:
            device = self.s.device
        z = self.base.sample((n,)).to(device)
        x = self.a * z + self.b
        return x

    def log_prob(self, x: torch.Tensor):
  
        z = (x - self.b) / self.a
        return self.base.log_prob(z) - self.s 

def main():
    torch.manual_seed(0)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    true_mu = 2.0
    true_sigma = 0.7
    target = torch.distributions.Normal(true_mu, true_sigma)

    n_data = 5000
    x_data = target.sample((n_data,)).to(device)

    flow = AffineFlow1D().to(device)
    opt = torch.optim.Adam(flow.parameters(), lr=5e-2)

    for step in range(1, 501):
        opt.zero_grad()
        nll = -flow.log_prob(x_data).mean()  
        nll.backward()
        opt.step()

        if step % 100 == 0:
            with torch.no_grad():
                est_mu = flow.b.item()
                est_sigma = flow.a.item() 
            print(f"step {step:4d} | NLL {nll.item():.4f} | a={flow.a.item():.4f} b={flow.b.item():.4f} "
                  f"| est_mu={est_mu:.4f} est_sigma={est_sigma:.4f}")

    with torch.no_grad():
        xs = flow.sample(200000, device=device)
        print("\nTarget:   mu=%.3f sigma=%.3f" % (true_mu, true_sigma))
        print("Learned:  mu=%.3f sigma=%.3f" % (xs.mean().item(), xs.std(unbiased=False).item()))
        print("DONE")
        
if __name__ == "__main__":
    main()


step  100 | NLL 1.0543 | a=0.7008 b=1.9704 | est_mu=1.9704 est_sigma=0.7008
step  200 | NLL 1.0536 | a=0.6940 b=1.9948 | est_mu=1.9948 est_sigma=0.6940
step  300 | NLL 1.0536 | a=0.6940 b=1.9947 | est_mu=1.9947 est_sigma=0.6940
step  400 | NLL 1.0536 | a=0.6940 b=1.9947 | est_mu=1.9947 est_sigma=0.6940
step  500 | NLL 1.0536 | a=0.6940 b=1.9947 | est_mu=1.9947 est_sigma=0.6940

Target:   mu=2.000 sigma=0.700
Learned:  mu=1.994 sigma=0.695
DONE


In [None]:
import math
import torch
import torch.nn as nn

class PoissonLatentAffine(nn.Module):
    def __init__(self):
        super().__init__()
        self.a = nn.Parameter(torch.tensor(0.0)) 
        self.b = nn.Parameter(torch.tensor(0.0))  
        self.base = torch.distributions.Normal(0.0, 1.0)

    def log_prob(self, x, mc_samples: int = 32):
        
        x = x.to(self.a.device)
        z = self.base.sample((mc_samples, x.shape[0])).to(self.a.device)

        log_rate = self.a * z + self.b
        rate = torch.exp(log_rate)  

        dist = torch.distributions.Poisson(rate=rate)
        logp = dist.log_prob(x.unsqueeze(0).expand_as(rate))

        return torch.logsumexp(logp, dim=0) - math.log(mc_samples)

    def sample(self, n: int):
        z = self.base.sample((n,)).to(self.a.device)
        rate = torch.exp(self.a * z + self.b)
        return torch.distributions.Poisson(rate=rate).sample()

def main():
    torch.manual_seed(0)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    true_rate = 3.0
    target = torch.distributions.Poisson(rate=torch.tensor(true_rate, device=device))
    x_data = target.sample((5000,))

    model = PoissonLatentAffine().to(device)
    opt = torch.optim.Adam(model.parameters(), lr=5e-2)

    for step in range(1, 501):
        opt.zero_grad()
        nll = -model.log_prob(x_data, mc_samples=32).mean()
        nll.backward()
        opt.step()

        if step % 100 == 0:
            print(f"step {step:4d} | NLL {nll.item():.4f} | a={model.a.item():.4f} b={model.b.item():.4f}")

    with torch.no_grad():
        xs = model.sample(200000).to(device)
        print("\nTarget rate:", true_rate)
        print("Sample mean (model):", xs.float().mean().item())
        print("DONE")

if __name__ == "__main__":
    main()


step  100 | NLL 1.9235 | a=0.0407 b=1.0901
step  200 | NLL 1.9234 | a=0.0091 b=1.0899
step  300 | NLL 1.9235 | a=-0.0017 b=1.0899
step  400 | NLL 1.9235 | a=-0.0114 b=1.0899
step  500 | NLL 1.9234 | a=0.0098 b=1.0899

Target rate: 3.0
Sample mean (model): 2.9680349826812744
DONE
