In [None]:
import torch
from methods.clustering.faster_mix_k_means_pytorch import pairwise_distance
from project_utils.lorentz import pairwise_dist as lorentz_pairwise_dist
from project_utils.lorentz import pairwise_inner as lorentz_pairwise_inner

In [9]:
embeds = torch.randn(10, 512)
centers = torch.randn(5, 512)
pair_dist = pairwise_distance(embeds, centers)
dist2 = torch.matmul(embeds, centers.t())
hyp_pair_dist = lorentz_pairwise_dist(embeds, centers, 2) 

In [10]:
print(pair_dist.shape)
print(dist2.shape)
print(hyp_pair_dist.shape)


torch.Size([10, 5])
torch.Size([10, 5])
torch.Size([10, 5])


In [11]:
print(pair_dist)
print(dist2)

tensor([[1012.7142, 1023.2885,  989.1441, 1057.6973,  995.8853],
        [1068.1846,  971.4751, 1031.1577, 1067.6117, 1085.8468],
        [ 992.2310,  993.9666, 1010.7922,  958.0060,  971.3592],
        [1059.5291,  995.1831,  894.7130, 1066.6942, 1057.3076],
        [1044.4165,  980.5476,  969.7465, 1113.5642, 1017.2137],
        [1189.9812,  985.1036, 1032.6238, 1051.2634, 1079.8756],
        [1015.4780,  873.4585, 1013.3519, 1067.2152,  979.9601],
        [1213.7441, 1045.0063, 1086.8579, 1049.1482, 1095.7402],
        [1123.5165, 1035.3019, 1034.4357, 1037.2007, 1046.1222],
        [1134.2799, 1051.1781, 1027.2856, 1138.2362, 1128.4786]])
tensor([[ 29.3874, -25.6107, -14.6008, -13.9754,   8.1716],
        [ 19.3349,  17.9787, -17.9249,  -1.2499, -19.1265],
        [ 33.2585, -17.3201, -31.7953,  29.4998,  14.0641],
        [  6.3797, -11.1581,  33.0146, -18.0740, -22.1398],
        [ 11.1189,  -6.6574,  -7.3192, -44.3261,  -4.9099],
        [-35.1894,  17.5385, -12.2839,  13.2982, 

In [38]:
embed = torch.arange(0, 7).float().view(-1, 1)
center = torch.arange(-3, 4).float().view(-1, 1)
torch.sqrt(pairwise_distance(embed, center))
lorentz_pairwise_dist(embed, center, 11)

tensor([[0.9025, 0.7812, 0.5771, 0.0000, 0.5771, 0.7812, 0.9025],
        [1.4796, 1.3583, 1.1542, 0.5771, 0.0000, 0.2041, 0.3254],
        [1.6837, 1.5624, 1.3583, 0.7812, 0.2041, 0.0000, 0.1213],
        [1.8050, 1.6837, 1.4796, 0.9025, 0.3254, 0.1213, 0.0000],
        [1.8914, 1.7701, 1.5660, 0.9889, 0.4118, 0.2077, 0.0864],
        [1.9585, 1.8372, 1.6331, 1.0560, 0.4789, 0.2748, 0.1535],
        [2.0134, 1.8921, 1.6880, 1.1109, 0.5338, 0.3297, 0.2084]])

In [28]:
logits = torch.arange(0, 1, 0.1)
logits = torch.zeros(10)
logits[9] = 1
target = torch.tensor([9])
torch.nn.functional.cross_entropy(logits.view(1, -1), torch.tensor([target]).view(1), reduction='none')

tensor([1.4612])

In [None]:
#logits = -torch.arange(0, 1, 0.1)
logits = torch.zeros(10)
logits[9] = -1
target = torch.tensor([9])
torch.nn.functional.cross_entropy(logits.view(1, -1), torch.tensor([target]).view(1), reduction='none')

tensor([3.1244])

# Testing mappings between Euclidean, Lorentz and Klein models

In [4]:
import project_utils.lorentz as L

curv = 1.0

a = 2*torch.tensor([1.0, 1.0, 1.0, 1.0])

get_time = lambda x: torch.sqrt(1 / curv + torch.sum(x**2, dim=-1, keepdim=True))

x_space = L.exp_map0(a, curv)
x_time = get_time(x_space)

x_K = x_space / x_time
K_to_B = lambda x: (1/(1+torch.sqrt(1-torch.sum(x**2, dim=-1)))) * x
x_B = K_to_B(x_K)
x_BL = x_space / (1+x_time)

#time_from_K = torch.sqrt(1 / (curv*(1 + torch.sum(x_K**2, dim=-1, keepdim=True))))
time_from_K = torch.sqrt(1 / (curv*(1 - torch.sum(x_K**2, dim=-1, keepdim=True))))

print(x_space)
print(torch.cat([x_time, x_space], dim=-1))
print(x_K)
print(x_B)
#print(x_BL)
print(time_from_K)
print(torch.sum(x_K**2, dim=-1, keepdim=True))
print(torch.sum(x_B**2, dim=-1, keepdim=True))
#print(torch.sum(x_BL**2, dim=-1, keepdim=True))


tensor([13.6450, 13.6450, 13.6450, 13.6450])
tensor([27.3082, 13.6450, 13.6450, 13.6450, 13.6450])
tensor([0.4997, 0.4997, 0.4997, 0.4997])
tensor([0.4820, 0.4820, 0.4820, 0.4820])
tensor([27.3079])
tensor([0.9987])
tensor([0.9293])


Note: Mapped values can easily reach the edges of the circle in the Klein model. Euclidean vectors with length 3 are already reaching the edge. Embedding vectors need to be really clamped to be able to work with Einstein midpoint.

I should consider just mapping to Euclidean to find the mean and then map the center. Maybe this will be an interesting ablation, but I will need severe normalization if I am to use Einstein midpoint.

# Poincare to Klein

In [17]:
import math

def poincare_exp_map0(x, curv: float = 1.0, eps: float = 1e-8):
    """
    Map points from the tangent space at the vertex of hyperboloid, on to the
    hyperboloid. This mapping is done using the exponential map of Poincare model.

    Args:
        x: Tensor of shape `(B, D)` giving batch of Euclidean vectors to project
            onto the hyperboloid. These vectors are interpreted as velocity
            vectors in the tangent space at the hyperboloid vertex.
        curv: Positive scalar denoting negative hyperboloid curvature.
        eps: Small float number to avoid division by zero.

    Returns:
        Tensor of same shape as `x`, giving space components of the mapped
        vectors on the hyperboloid.
    """

    xnorm = torch.linalg.vector_norm(x, dim=-1, keepdim=True)
    rc_xnorm = curv**0.5 * xnorm

    # Ensure numerical stability in sinh by clamping input.
    h_input = torch.clamp(rc_xnorm, min=eps, max=math.asinh(2**15))
    _output = (torch.sinh(h_input)/(torch.cosh(h_input)+1)) * (x / torch.clamp(xnorm, min=eps))
    return _output

x_B = poincare_exp_map0(a, curv)
B_to_K = lambda x: (2/(1+torch.sum(x**2, dim=-1))) * x
K_to_B = lambda x: (1/(1+torch.sqrt(1-torch.sum(x**2, dim=-1)))) * x
x_K = B_to_K(x_B)
print(x_B)
print(x_K)
print(K_to_B(x_K))
print(torch.sum(x_K**2, dim=-1, keepdim=True))
print(torch.sum(x_B**2, dim=-1, keepdim=True))

tensor([0.4038, 0.0000, 0.4038, 0.4038])
tensor([0.5423, 0.0000, 0.5423, 0.5423])
tensor([0.4038, 0.0000, 0.4038, 0.4038])
tensor([0.8823])
tensor([0.4891])


Notes: For some reason the vector I get going from Lorentz to Klein is the same one I get from the Poincare exponential map. **What gives?**

**I FOUND OUT** Turns out this is actually the Klein exponential map for curvature of 1. The derivation can be found in my notes, and I changed it to the Klein map in the next code block.

In [23]:
def klein_exp_map0(x, curv: float = 1.0, eps: float = 1e-8):
    """
    Map points from the tangent space at the vertex of hyperboloid, on to the
    hyperboloid. This mapping is done using the exponential map of Poincare model.

    Args:
        x: Tensor of shape `(B, D)` giving batch of Euclidean vectors to project
            onto the hyperboloid. These vectors are interpreted as velocity
            vectors in the tangent space at the hyperboloid vertex.
        curv: Positive scalar denoting negative hyperboloid curvature.
        eps: Small float number to avoid division by zero.

    Returns:
        Tensor of same shape as `x`, giving space components of the mapped
        vectors on the hyperboloid.
    """

    xnorm = torch.linalg.vector_norm(x, dim=-1, keepdim=True)
    rc_xnorm = curv**0.5 * xnorm

    # Ensure numerical stability in sinh by clamping input.
    tanh_input = torch.clamp(rc_xnorm, min=eps, max=math.asinh(2**15))
    _output = torch.tanh(tanh_input) * x / torch.clamp(xnorm, min=eps)
    return _output

x_K = klein_exp_map0(a, curv)
# These conversions don't really work since they do not account for curvature
# Nevermind they work even without having an explicit curvature term, but that does not mean they are correct
B_to_K = lambda x: (2/(1+torch.sum(x**2, dim=-1))) * x
K_to_B = lambda x: (1/(1+torch.sqrt(1-torch.sum(x**2, dim=-1)))) * x
# These conversions are from the survey and have curvature in them
# But these don't make sense, since going from Klein to Poincare
# Requires Klein points to have a max dot product of 1/c
#B_to_K = lambda x: (2/(1+curv*torch.sum(x**2, dim=-1))) * x
#K_to_B = lambda x: (1/(1+torch.sqrt(1-curv*torch.sum(x**2, dim=-1)))) * x
x_B = K_to_B(x_K)
print(x_K)
print(x_B)
print(B_to_K(x_B))
print(torch.sum(x_K**2, dim=-1, keepdim=True))
print(torch.sum(x_B**2, dim=-1, keepdim=True))

tensor([0.5688, 0.0000, 0.5688, 0.5688])
tensor([0.4856, 0.0000, 0.4856, 0.4856])
tensor([0.5688, 0.0000, 0.5688, 0.5688])
tensor([0.9706])
tensor([0.7074])


# Working with functions from the survey

In [6]:
import project_utils.lorentz as L
from math import sqrt

curv = 2.0

a = 1*torch.tensor([1.0, 1.0, 1.0, 1.0])

get_time = lambda x: torch.sqrt(1 / curv + torch.sum(x**2, dim=-1, keepdim=True))

x_space = L.exp_map0(a, curv)
x_time = get_time(x_space)

x_temp = x_space / (sqrt(curv)*x_time)
x_K = 2/(1+curv*torch.sum(x_temp**2, dim=-1, keepdim=True)) * x_temp
K_to_B = lambda x: (1/(1+curv*torch.sqrt(1-torch.sum(x**2, dim=-1)))) * x
x_B = K_to_B(x_K)
x_BL = x_space / (1+x_time)

#time_from_K = torch.sqrt(1 / (curv*(1 + torch.sum(x_K**2, dim=-1, keepdim=True))))
time_from_K = torch.sqrt(1 / (curv*(1 - torch.sum(x_K**2, dim=-1, keepdim=True))))

print(x_space)
print(torch.cat([x_time, x_space], dim=-1))
print(x_K)
print(x_B)
#print(x_BL)
print(time_from_K)
print(torch.sum(x_K**2, dim=-1, keepdim=True))
print(torch.sum(x_B**2, dim=-1, keepdim=True))
#print(torch.sum(x_BL**2, dim=-1, keepdim=True))


tensor([2.9804, 2.9804, 2.9804, 2.9804])
tensor([6.0026, 2.9804, 2.9804, 2.9804, 2.9804])
tensor([0.3535, 0.3535, 0.3535, 0.3535])
tensor([0.1464, 0.1464, 0.1464, 0.1464])
tensor([1.0000])
tensor([0.5000])
tensor([0.0858])


In [22]:
def poincare_exp_map0_old(x, curv: float = 1.0, eps: float = 1e-8):
    """
    Map points from the tangent space at the vertex of hyperboloid, on to the
    hyperboloid. This mapping is done using the exponential map of Poincare model.

    Args:
        x: Tensor of shape `(B, D)` giving batch of Euclidean vectors to project
            onto the hyperboloid. These vectors are interpreted as velocity
            vectors in the tangent space at the hyperboloid vertex.
        curv: Positive scalar denoting negative hyperboloid curvature.
        eps: Small float number to avoid division by zero.

    Returns:
        Tensor of same shape as `x`, giving space components of the mapped
        vectors on the hyperboloid.
    """

    xnorm = torch.linalg.vector_norm(x, dim=-1, keepdim=True)
    rc_xnorm = curv**0.5 * xnorm

    # Ensure numerical stability in sinh by clamping input.
    tanh_input = torch.clamp(rc_xnorm, min=eps, max=math.asinh(2**15))
    _output = torch.tanh(tanh_input) * x / torch.clamp(rc_xnorm, min=eps)
    return _output

x_B = poincare_exp_map0_old(a, curv)
# These conversions don't really work since they do not account for curvature
# Nevermind they work even without having an explicit curvature term
B_to_K = lambda x: (2/(1+curv*torch.sum(x**2, dim=-1))) * x
K_to_B = lambda x: (1/(1+torch.sqrt(1-curv*torch.sum(x**2, dim=-1)))) * x
x_K = B_to_K(x_B)
print(x_B)
print(x_K)
print(K_to_B(x_K))
print(torch.sum(x_K**2, dim=-1, keepdim=True))
print(torch.sum(x_B**2, dim=-1, keepdim=True))

tensor([0.4022, 0.0000, 0.4022, 0.4022])
tensor([0.4082, 0.0000, 0.4082, 0.4082])
tensor([0.4022, 0.0000, 0.4022, 0.4022])
tensor([0.4999])
tensor([0.4853])


# Testing Centroid Functions

In [1]:
import torch
import project_utils.lorentz as L
X = 1*torch.randn(512, 10)
print(X.mean(dim=0))

tensor([-0.0750, -0.0724, -0.0049,  0.0612, -0.0479,  0.0207, -0.0229,  0.0406,
         0.0103,  0.0100])


In [2]:
curv = 1.0
EM = L.einstein_midpoint(X, curv)
EM2 = L.einstein_midpoint2(X, curv)
C = L.centroid(X, curv)
print(EM)
print(EM2)
print(C)
print(L.pairwise_dist(X, EM, curv).sum())
print(L.pairwise_dist(X, EM2, curv).sum())
print(L.pairwise_dist(X, C, curv).sum())


torch.Size([512, 10]) torch.Size([512, 1])
tensor([-0.0233, -0.0225, -0.0015,  0.0190, -0.0149,  0.0064, -0.0071,  0.0126,
         0.0032,  0.0031])
tensor([-0.0233, -0.0225, -0.0015,  0.0190, -0.0149,  0.0064, -0.0071,  0.0126,
         0.0032,  0.0031])
tensor([-38.3865, -37.0484,  -2.5233,  31.3243, -24.5357,  10.5811, -11.7083,
         20.7877,   5.2845,   5.1183])
tensor(929.1268)
tensor(929.1268)
tensor(3082.0398)


  xyl = x @ y.T - x_time @ y_time.T


**Conclusion:** Use the Einstein midpoint without transforming to Klein