# Distance Losses - Comparison

## Imports

In [1]:
from utils.D_SimCLR import *

  if not hasattr(numpy, tp_name):
  "lr_options": generate_power_seq(LEARNING_RATE_CIFAR, 11),
  contrastive_task: Union[FeatureMapContrastiveTask] = FeatureMapContrastiveTask("01, 02, 11"),
  self.nce_loss = AmdimNCELoss(tclip)


In [2]:
random.seed(42)
np.random.seed(42)
os.environ["PYTHONHASHSEED"] = "42"

## Base InfoNCE Loss (SimCLR)

$$
\ell_{i,j} = -\log \frac{
    \exp\left(\text{sim}(\mathbf{z}_i, \mathbf{z}_j) / \tau\right)
}{
    \sum\limits_{k \neq i} \exp\left(\text{sim}(\mathbf{z}_i, \mathbf{z}_k) / \tau\right)
}
$$


## Inverse Distance Weighting (D-SimCLR)

Important: only re-weights the positive examples

Let $w_{i,j} = \frac{1}{d_{i,j} + \epsilon}$

$$
\ell_{i,j}^{\text{inv}} = -w_{i,j} \cdot \log \frac{
    \exp\left(\text{sim}(\mathbf{z}_i, \mathbf{z}_j) / \tau\right)
}{
    \sum\limits_{k \neq i} \exp\left(\text{sim}(\mathbf{z}_i, \mathbf{z}_k) / \tau\right)
}
$$


## Distance-Aware Softmax Denominator (D-SimCLR)

$$
\ell_{i,j}^{\text{softdenom}} = -\log \frac{
    \exp\left(\text{sim}(\mathbf{z}_i, \mathbf{z}_j) / \tau\right)
}{
    \sum\limits_{k \neq i} \exp\left(\left[\text{sim}(\mathbf{z}_i, \mathbf{z}_k) - \lambda d_{i,k}\right] / \tau\right)
}
$$


## Distance-Aware Temperature Scaling (D-SimCLR)

Let $\tau_{i,j} = \tau + d_{i, j}$

$$
\ell_{i,j}^{\text{inv}} = -\log \frac{
    \exp\left(\text{sim}(\mathbf{z}_i, \mathbf{z}_j) / \tau_{i, j}\right)
}{
    \sum\limits_{k \neq i} \exp\left(\text{sim}(\mathbf{z}_i, \mathbf{z}_k) / \tau_{i, k}\right)
}
$$


## Tests

In [3]:
batch_size = 2
embedding_dim = 8
device = "cuda" if torch.cuda.is_available() else "cpu"

# Random example pair (neutral baseline)
proj_1_random = F.normalize(torch.randn(batch_size, embedding_dim), dim=1)
proj_2_random = F.normalize(torch.randn(batch_size, embedding_dim), dim=1)

# Very close pair (high similarity, low distance — should give low loss)
base_vec = F.normalize(torch.randn(embedding_dim), dim=0)
proj_1_very_close = base_vec.unsqueeze(0) + 0.01 * torch.randn(1, embedding_dim)
proj_2_very_close = base_vec.unsqueeze(0) + 0.01 * torch.randn(1, embedding_dim)
proj_1_very_close = F.normalize(proj_1_very_close, dim=1)
proj_2_very_close = F.normalize(proj_2_very_close, dim=1)

# Very far pair (high similarity, high distance — should give high loss)
base_vec_far = F.normalize(torch.randn(embedding_dim), dim=0)
proj_1_very_far = base_vec_far.unsqueeze(0)
proj_2_very_far = base_vec_far.unsqueeze(0) + 0.01 * torch.randn(1, embedding_dim)
proj_1_very_far = F.normalize(proj_1_very_far, dim=1)
proj_2_very_far = F.normalize(proj_2_very_far, dim=1)

# Construct final tensors of shape [2, embedding_dim]
proj_1_very_close = torch.cat([proj_1_very_close, torch.randn(1, embedding_dim)], dim=0)
proj_2_very_close = torch.cat([proj_2_very_close, torch.randn(1, embedding_dim)], dim=0)

proj_1_very_far = torch.cat([proj_1_very_far, torch.randn(1, embedding_dim)], dim=0)
proj_2_very_far = torch.cat([proj_2_very_far, torch.randn(1, embedding_dim)], dim=0)

proj_1_very_close = F.normalize(proj_1_very_close, dim=1)
proj_2_very_close = F.normalize(proj_2_very_close, dim=1)
proj_1_very_far = F.normalize(proj_1_very_far, dim=1)
proj_2_very_far = F.normalize(proj_2_very_far, dim=1)

# Distance matrix, manually scaled
distances = torch.tensor([
    [0.0, 0.1, 2.0, 3.0],
    [0.1, 0.0, 4.0, 5.0],
    [2.0, 4.0, 0.0, 20.0],
    [3.0, 5.0, 20.0, 0.0]
])
distances = distances / torch.max(distances)


# Send to device
proj_1_random = proj_1_random.to(device)
proj_2_random = proj_2_random.to(device)
proj_1_very_far = proj_1_very_far.to(device)
proj_2_very_far = proj_2_very_far.to(device)
proj_1_very_close = proj_1_very_close.to(device)
proj_2_very_close = proj_2_very_close.to(device)
distances = distances.to(device)

# Print summaries
print("=== Setup Summary ===")
print(f"Projection 1 Random:\n{proj_1_random}")
print(f"Projection 2 Random:\n{proj_2_random}")
print(f"Projection 1 Close:\n{proj_1_very_close}")
print(f"Projection 2 Close:\n{proj_2_very_close}")
print(f"Projection 1 Far:\n{proj_1_very_far}")
print(f"Projection 2 Far:\n{proj_2_very_far}")
print(f"Distances:\n{distances}")


=== Setup Summary ===
Projection 1 Random:
tensor([[ 0.3024, -0.1965, -0.0225,  0.0477,  0.0751,  0.0743,  0.8901, -0.2524],
        [ 0.4753, -0.0790, -0.1545,  0.4062, -0.1795, -0.7145,  0.1082,  0.1566]],
       device='cuda:0')
Projection 2 Random:
tensor([[ 0.1844,  0.1396,  0.2600,  0.2230, -0.4860,  0.4117, -0.6174,  0.2057],
        [ 0.5711, -0.1661,  0.2878,  0.0842, -0.4568,  0.3208,  0.2501,  0.4268]],
       device='cuda:0')
Projection 1 Close:
tensor([[ 0.2814, -0.0761, -0.0718, -0.0060,  0.6319,  0.5267, -0.0701,  0.4777],
        [ 0.1863,  0.4175, -0.3607,  0.3855,  0.0147, -0.1160, -0.0511,  0.7043]],
       device='cuda:0')
Projection 2 Close:
tensor([[ 0.2676, -0.0403, -0.0604, -0.0332,  0.6326,  0.5423, -0.0728,  0.4715],
        [ 0.0986, -0.3830, -0.4297, -0.3707,  0.4443,  0.3562,  0.3892, -0.2141]],
       device='cuda:0')
Projection 1 Far:
tensor([[-0.3910,  0.0029,  0.0753,  0.0123, -0.7946, -0.4404, -0.1171,  0.0472],
        [ 0.1674, -0.2130, -0.1206,  0.1

In [4]:
loss = ContrastiveLoss(batch_size, device)
result_random = loss(proj_1_random, proj_2_random)
result_close = loss(proj_1_very_close, proj_2_very_close)
result_far = loss(proj_1_very_far, proj_2_very_far)
print(f"Results Random: {result_random}, Results Close: {result_close}, Results Far: {result_far}")

Results Random: 1.771040439605713, Results Close: 1.344935417175293, Results Far: 1.06294846534729


In [5]:
loss = InverseDistanceContrastiveLoss(batch_size, device)
result_random = loss(proj_1_random, proj_2_random, distances)
result_close = loss(proj_1_very_close, proj_2_very_close, distances)
result_far = loss(proj_1_very_far, proj_2_very_far, distances)
print(f"Results Random: {result_random}, Results Close: {result_close}, Results Far: {result_far}")

Results Random: 14.378768920898438, Results Close: 6.752707481384277, Results Far: 4.880659103393555


In [6]:
loss = SoftDenominatorContrastiveLoss(batch_size, device)
result_random = loss(proj_1_random, proj_2_random, distances)
result_close = loss(proj_1_very_close, proj_2_very_close, distances)
result_far = loss(proj_1_very_far, proj_2_very_far, distances)
print(f"Results Random: {result_random}, Results Close: {result_close}, Results Far: {result_far}")

Results Random: 1.1697614192962646, Results Close: 0.9429987072944641, Results Far: 0.6880234479904175


In [7]:
loss = TempretureScalingContrastiveLoss(batch_size, device)
result_random = loss(proj_1_random, proj_2_random, distances)
result_close = loss(proj_1_very_close, proj_2_very_close, distances)
result_far = loss(proj_1_very_far, proj_2_very_far, distances)
print(f"Results Random: {result_random}, Results Close: {result_close}, Results Far: {result_far}")

Results Random: 1.5604825019836426, Results Close: 1.167198896408081, Results Far: 0.9504165649414062
