In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Normal

# Define the base distribution (Gaussian noise)
class BaseDistribution:
    def __init__(self):
        self.base_dist = Normal(torch.tensor(0.0), torch.tensor(1.0))
        
    def sample(self, n_samples, dim):
        return self.base_dist.sample((n_samples, dim))
    
    def log_prob(self, samples):
        return self.base_dist.log_prob(samples)

# Define a normalizing flow transformation
class NormalizingFlow(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.input_dim = input_dim
        
        # Define a simple affine coupling layer
        self.fc1 = nn.Linear(input_dim // 2, 128)
        self.fc2 = nn.Linear(128, input_dim // 2)

    def forward(self, z):
        z1, z2 = z.chunk(2, dim=1)
        h = torch.tanh(self.fc1(z1))
        shift = self.fc2(h)
        z2_transformed = z2 + shift
        return torch.cat((z1, z2_transformed), dim=1)

# Define the Flow Model
class FlowModel(nn.Module):
    def __init__(self, input_dim, n_flows):
        super().__init__()
        self.flows = nn.ModuleList([NormalizingFlow(input_dim) for _ in range(n_flows)])
        self.base_dist = BaseDistribution()
        self.input_dim = input_dim
    
    def forward(self, z):
        for flow in self.flows:
            z = flow(z)
        return z
    
    def sample(self, n_samples):
        z = self.base_dist.sample(n_samples, self.input_dim)  # Shape: (n_samples, input_dim)
        return self.forward(z)

    def log_prob(self, x):
        for flow in reversed(self.flows):
            x = flow(x)
        return self.base_dist.log_prob(x)

# Define the function g(y, e)
def g(y, e):
    return 2 * y + torch.sin(e)

# Observed variables
n_samples = 1000
y_observed = torch.randn(n_samples, 1)  # Shape: (1000, 1)
d_observed = g(y_observed, torch.randn_like(y_observed))  # Shape: (1000, 1)

# Normalize the data
y_observed = (y_observed - y_observed.mean()) / y_observed.std()
d_observed = (d_observed - d_observed.mean()) / d_observed.std()

# Initialize the flow model
input_dim = 2  # Assumes inputs [y, e]
n_flows = 5
flow_model = FlowModel(input_dim, n_flows)
flow_model.apply(init_weights)  # Apply custom weight initialization
optimizer = optim.AdamW(flow_model.parameters(), lr=1e-4, weight_decay=1e-5)

# Training loop
epochs = 1000
for epoch in range(epochs):
    # Sample from the flow model
    e_samples = flow_model.sample(n_samples)  # Shape: (1000, 2)
    
    # Split e_samples into y and e components
    e_only = e_samples[:, 1:]  # Shape: (1000, 1), selects the noise
    g_samples = g(y_observed, e_only)  # Shape: (1000, 1), calculate g(y, e)
    
    # Compute the loss
    loss = torch.mean((g_samples - d_observed) ** 2)
    
    # Optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

NameError: name 'init_weights' is not defined