In [1]:
import seaborn as sns
import torch
import numpy as np
import torch.nn as nn
from torch.distributions.normal import Normal
from torch.distributions.dirichlet import Dirichlet

In [2]:
def compute_expec_loss_cond_epsilon(Z_i_star, V, sigma):
    """
    Compute the given expression in PyTorch.
    
    Args:
        Z_i_star (torch.Tensor): The Z_{i*} vector, shape (p,1).
        V (torch.Tensor): The V matrix, shape (p,p).
        sigma (float): The standard deviation (scalar).
        
    Returns:
        torch.Tensor: The loss and derivative of the loss with respect to V
    """
    # Initialize normal distribution for standard normal
    normal = Normal(0, 1)

    # Compute intermediate values
    term = -Z_i_star.T @ V / sigma  # Shape: scalar
    phi = normal.log_prob(term).exp()  # \phi(x), scalar
    Phi = normal.cdf(term)  # \Phi(x), scalar

    # Compute the element-wise operations
    vec = (phi +  term * Phi) * sigma
    
    # Final computation
    loss = torch.sum(vec)
    deriv = -Z_i_star @ Phi
    
    return  loss, deriv


    



def compute_raw_loss(Z_i_star, V, sigma):
    """
    Compute the given loss in PyTorch.
    
    Args:
        Z_i_star (torch.Tensor): The Z_{i*} vector, shape (p,1).
        V (torch.Tensor): The V matrix, shape (p,p).
        sigma (float): The standard deviation (scalar).
        
    Returns:
        torch.Tensor: The result of the expression, shape (p,).
    """
    p = Z_i_star.shape[0]
    noise_gauss = Normal(0, sigma)
    relu = nn.ReLU()
    
    epsilon = noise_gauss.sample((p,)).view(p,1)
    loss = torch.sum(relu((- Z_i_star - epsilon).T @ V))
    return loss
    
    
def proj_Op_tangent_space_at_W(M, W):
    """
    Projects a matrix M onto the tangent space of the orthogonal group O_p at W.
    
    Parameters:
        M (torch.Tensor or np.ndarray): The matrix to be projected, of shape (p, p).
        W (torch.Tensor or np.ndarray): A point in the orthogonal group O_p, of shape (p, p)
                                         
    Returns:
        torch.Tensor or np.ndarray: The projection of M onto the tangent space of O_p at W,
                                    of shape (p, p).
    """
    proj = W @ (W.T @ M - M.T @ W) / 2
    return proj
    

def sample_Z_i_star(p):
    """
    Samples Z_i_star uniformly from the standard basis of R^p.
    
    Args:
        p (int): Dimension of the space R^p.
        
    Returns:
        torch.Tensor: A p-dimensional tensor representing the sampled standard basis vector.
    """
    # Create an identity matrix of shape (p, p) where rows are standard basis vectors
    standard_basis = torch.eye(p)
    
    # Randomly choose one row (basis vector) uniformly
    idx = torch.randint(0, p, (1,)).item()
    Z_i_star = standard_basis[idx]
    
    return Z_i_star



In [3]:
def compute_expec_deriv_discrete_2D(a, b, p, theta, sigma):

    normal = Normal(0, 1)
    theta = torch.tensor([theta], dtype = torch.float32)
    cos_theta = torch.cos(theta)
    sin_theta = torch.sin(theta)
    V = torch.tensor([[cos_theta, sin_theta],
                      [-sin_theta,  cos_theta]])
    
    z1 = torch.Tensor([a, b]).view(2, 1)
    z2 = torch.Tensor([b, a]).view(2, 1)

    term_1 = normal.cdf(-z1.T @ V / sigma)
    term_2 = normal.cdf(-z2.T @ V / sigma)

    eu_deriv = z1 @ term_1 * p + z2 @ term_2 * (1 - p)

    re_deriv = proj_Op_tangent_space_at_W(eu_deriv, V)
    
    return re_deriv

In [8]:
import torch
import numpy as np
import plotly.graph_objects as go
from torch.distributions.normal import Normal

# Assuming proj_Op_tangent_space_at_W is already defined

def compute_expec_deriv_discrete_2D(a, b, p, theta, sigma):
    normal = Normal(0, 1)
    theta = torch.tensor([theta], dtype=torch.float32)
    cos_theta = torch.cos(theta)
    sin_theta = torch.sin(theta)
    V = torch.tensor([[cos_theta, sin_theta],
                      [-sin_theta, cos_theta]])

    z1 = torch.Tensor([a, b]).view(2, 1)
    z2 = torch.Tensor([b, a]).view(2, 1)

    term_1 = normal.cdf(-z1.T @ V / sigma)
    term_2 = normal.cdf(-z2.T @ V / sigma)

    eu_deriv = z1 @ term_1 * p + z2 @ term_2 * (1 - p)
    re_deriv = proj_Op_tangent_space_at_W(eu_deriv, V)

    return re_deriv

# Parameters
a = 0.5
b = 0.5
sigma = 0.2

# Grid for p (x-axis) and theta (y-axis)
p_values = np.linspace(0, 1, 100)  # 50 points between 0 and 1
theta_values = np.linspace(0, 2 * np.pi, 100)  # 50 points from 0 to 2*pi

# Create a meshgrid
P, Theta = np.meshgrid(p_values, theta_values)

# Compute Z (z-axis values)
Z = np.zeros_like(P)
for i in range(P.shape[0]):
    for j in range(P.shape[1]):
        p = P[i, j]
        theta = Theta[i, j]
        result = compute_expec_deriv_discrete_2D(a, b, p, theta, sigma)
        Z[i, j] = result.norm().item()  # Use the norm of the resulting tensor for scalar plotting

# Convert to 2D arrays for Plotly
x = P
y = Theta
z = Z

# Create the Plotly figure
fig = go.Figure(data=[go.Surface(z=z, x=x, y=y, colorscale='Viridis')])

# Update layout for axis labels and title
fig.update_layout(
    title='3D Plot of compute_expec_deriv_discrete_2D',
    scene=dict(
        xaxis_title='p (x-axis)',
        yaxis_title='theta (y-axis)',
        zaxis_title='Result norm (z-axis)',
    ),
)

# Show the plot
fig.show()


In [3]:
# Example usage
p = 2
Z_i_star = sample_Z_i_star(p).view(p, 1)  # Example Z_{i*} vector (p=10)
# Z_i_star = torch.randn(p).view(p, 1)  # Example Z_{i*} vector (p=10)
V = torch.eye(p)        # Example V vector (p=10)
sigma = 1.0                 # Example scalar sigma

compute_raw_loss(Z_i_star, V, sigma),compute_expec_loss_cond_epsilon(Z_i_star, V, sigma)[0]

(tensor(0.0276), tensor(0.4823))

To verify empirically that the expectation computation, and the derivative computation is correct.

In [19]:
n, p = 10000, 2
sigma = 1.0 
V = torch.eye(p) 


raw_loss = []
conditional_epsilon = []

for i in range(n):
    Z_i_star = sample_Z_i_star(p).view(p, 1)
    # Z_i_star = dir_distr.sample((1,)).view(p, 1)  # Example Z_{i*} vector (p=10)
    conditional_epsilon.append(compute_expec_loss_cond_epsilon(Z_i_star, V, sigma)[0])
    raw_loss.append(compute_raw_loss(Z_i_star, V, sigma))
    
# Difference in mean
print(f"the difference between the mean raw loss and the mean loss conditional on epsilon is {torch.mean(torch.tensor(conditional_epsilon)) - torch.mean(torch.tensor(raw_loss))}" )

# Variance of the two lists
# print(torch.var(torch.tensor(conditional_epsilon)), torch.var(torch.tensor(raw_loss)))

V = torch.eye(p) 
V.requires_grad_(True)
result = compute_expec_loss_cond_epsilon(Z_i_star, V, sigma)
result[0].backward()
print(f"the difference between computed derivative and autograd is {torch.norm(V.grad - result[1])}")

the difference between the mean raw loss and the mean loss conditional on epsilon is -0.004753679037094116
the difference between computed derivative and autograd is 1.4901161193847656e-08


In [41]:
n, p = 10000, 10
sigma = 0.1
V = torch.eye(p) 
dir_distr = Dirichlet(torch.ones(p)*5)


derivative_across_n = torch.zeros(p, p)

for i in range(n):
    # Z_i_star = sample_Z_i_star(p).view(p, 1)
    Z_i_star = dir_distr.sample((1,)).view(p, 1)  # Example Z_{i*} vector (p=10)
    derivative_across_n = derivative_across_n + compute_expec_loss_cond_epsilon(Z_i_star, V, sigma)[1]
    
avg_deriv = derivative_across_n/n

proj_avg_deriv = proj_Op_tangent_space_at_W(avg_deriv, V)

print(torch.norm(proj_Op_tangent_space_at_W(avg_deriv, V), p = "fro"))

tensor(0.0011)
