In [3]:
import os
import sys
import subprocess
from pathlib import Path


def get_project_root():
    # get the absolute path to the root of the git repo
    root = subprocess.check_output(["git", "rev-parse", "--show-toplevel"]).strip().decode("utf-8")
    return Path(root)

# get project root and append it to path
project_root = get_project_root()
sys.path.append(str(project_root))

# embeddings path
dataset = "waymo"
data_dir = f"{dataset}_data"
base_path = os.path.normpath(os.path.join(project_root, ".."))

# output dir
out_reldir = f"out/control-vectors/{dataset}/"

In [4]:
import os
from glob import glob
from utils.embs_all import load_embeddings
from utils.embs_contrastive import load_contrastive_embed_pairs


# load data
data_path = os.path.join(base_path, "data", data_dir)
paths_inputs = sorted(glob(f"{data_path}/input*"))
paths_embeds = sorted(glob(f"{data_path}/target_embs*"))

# stack embeddings wrt types
embs = load_embeddings(paths_inputs, paths_embeds)

100%|██████████| 204/204 [00:00<00:00, 540.22it/s]


In [5]:
import torch
import numpy as np


# Step 1: Labels for the clusters
labels = ['low', 'moderate', 'high', 'decelerate', 'constant', 'accelerate', 'stationary', 'straight', 'right', 'left', 'vehicle', 'pedestrian', 'cyclist' ]

# Step 2: Pairwise distances matrix (normalized values as before)
mean_tensor = torch.empty((len(labels), 128))
mean_tensor[0] = embs["speed"]["low"].mean
mean_tensor[1] = embs["speed"]["moderate"].mean
mean_tensor[2] = embs["speed"]["high"].mean
mean_tensor[3] = embs["acceleration"]["decelerate"].mean
mean_tensor[4] = embs["acceleration"]["constant"].mean
mean_tensor[5] = embs["acceleration"]["accelerate"].mean
mean_tensor[6] = embs["direction"]["stationary"].mean
mean_tensor[7] = embs["direction"]["straight"].mean
mean_tensor[8] = embs["direction"]["right"].mean
mean_tensor[9] = embs["direction"]["left"].mean
mean_tensor[10] = embs["agent"]["vehicle"].mean
mean_tensor[11] = embs["agent"]["pedestrian"].mean
mean_tensor[12] = embs["agent"]["cyclist"].mean

vars_tensor = torch.empty((len(labels), 128))
vars_tensor[0] = embs["speed"]["low"].var
vars_tensor[1] = embs["speed"]["moderate"].var
vars_tensor[2] = embs["speed"]["high"].var
vars_tensor[3] = embs["acceleration"]["decelerate"].var
vars_tensor[4] = embs["acceleration"]["constant"].var
vars_tensor[5] = embs["acceleration"]["accelerate"].var
vars_tensor[6] = embs["direction"]["stationary"].var
vars_tensor[7] = embs["direction"]["straight"].var
vars_tensor[8] = embs["direction"]["right"].var
vars_tensor[9] = embs["direction"]["left"].var
vars_tensor[10] = embs["agent"]["vehicle"].var
vars_tensor[11] = embs["agent"]["pedestrian"].var
vars_tensor[12] = embs["agent"]["cyclist"].var



**Within-Class Variance**

$
\text{Var}_{\text{within}}(c) = \frac{1}{N_c} \sum_{i \in c} \| x_i - \mu_c \|^2
$

$
S_w = \frac{1}{C} \sum_{c=1}^{C} \text{Var}_{\text{within}}(c)
$

$S_w$: Average within-class Variance
$C$: total number of classes


---


**Between-Class Distances**

Squared Euclidean distance between each pair of class means ($d_b(c_1, c_2)$):

$
d_b(c_1, c_2) = \| \mu_{c_1} - \mu_{c_2} \|^2
$

Average between-class squared distance ($S_b$):

$
S_b = \frac{2}{C(C - 1)} \sum_{c_1 < c_2} d_b(c_1, c_2)
$

The factor $\frac{2}{C(C - 1)}$ ensures averaging over all unique class pairs.



---


**Class-Distance Normalized Variance (CDNV)**

$
\text{CDNV} = \frac{S_w}{S_b}
$

 A lower CDNV value indicates that the within-class variance is small relative to the between-class variance, implying better class separability in your embedding space.


In [6]:
from future_motion.utils.interpretability.neural_collapse import CDNV


CDNV(mean_tensor, vars_tensor)

tensor(0.9498)