# Cosine similarity of optimal directions

This notebook loads the optimal direction vectors for arithmetic-base8, chess, and programming, and computes pairwise cosine similarities.


In [1]:
import os
import torch
import numpy as np

# Absolute paths to the three optimal direction tensors
prog_path = "/home/jholshuijsen/reasoning-reciting-probing/outputs/optimal_directions/programming/optimal_direction.pth"
chess_path = "/home/jholshuijsen/reasoning-reciting-probing/outputs/optimal_directions/chess/optimal_direction.pth"
arith_path = "/home/jholshuijsen/reasoning-reciting-probing/outputs/optimal_directions/arithmetic-base8/optimal_direction.pth"
combined_path = "/home/jholshuijsen/reasoning-reciting-probing/outputs/optimal_directions/combined_balanced/optimal_direction.pth"

for p in [prog_path, chess_path, arith_path]:
    if not os.path.exists(p):
        raise FileNotFoundError(f"Missing file: {p}")

prog = torch.load(prog_path, map_location="cpu")
chess = torch.load(chess_path, map_location="cpu")
arith = torch.load(arith_path, map_location="cpu")
combined = torch.load(combined_path, map_location="cpu")

# Ensure 1D tensors
prog = prog.view(-1).float()
chess = chess.view(-1).float()
arith = arith.view(-1).float()
combined = combined.view(-1).float()

# Normalize to unit vectors
prog_n = prog / (prog.norm(p=2) + 1e-12)
chess_n = chess / (chess.norm(p=2) + 1e-12)
arith_n = arith / (arith.norm(p=2) + 1e-12)
combined_n = combined / (combined.norm(p=2) + 1e-12)

# Compute cosine similarities
cos_prog_chess = torch.dot(prog_n, chess_n).item()
cos_prog_arith = torch.dot(prog_n, arith_n).item()
cos_chess_arith = torch.dot(chess_n, arith_n).item()
cos_prog_combined = torch.dot(prog_n, combined_n).item()
cos_chess_combined = torch.dot(chess_n, combined_n).item()
cos_arith_combined = torch.dot(arith_n, combined_n).item()

# Assemble a symmetric matrix for display
labels = ["programming", "chess", "arithmetic-base8", "combined"]
cos_matrix = np.array([
    [1.0,              cos_prog_chess, cos_prog_arith, cos_prog_combined],
    [cos_prog_chess,   1.0,            cos_chess_arith, cos_chess_combined],
    [cos_prog_arith,   cos_chess_arith, 1.0,            cos_arith_combined],
    [cos_prog_combined, cos_chess_combined, cos_arith_combined, 1.0],
])

print("Labels:", labels)
print("Cosine similarity matrix:\n", np.round(cos_matrix, 6))


Labels: ['programming', 'chess', 'arithmetic-base8', 'combined']
Cosine similarity matrix:
 [[ 1.        0.003805 -0.017509  0.297196]
 [ 0.003805  1.        0.004627  0.044612]
 [-0.017509  0.004627  1.        0.391008]
 [ 0.297196  0.044612  0.391008  1.      ]]


In [2]:
# Optional: pretty print with pandas
import pandas as pd

labels = ["programming", "chess", "arithmetic-base8", "combined"]
df = pd.DataFrame(cos_matrix, index=labels, columns=labels)
df.style.format("{:.6f}")


Unnamed: 0,programming,chess,arithmetic-base8,combined
programming,1.0,0.003805,-0.017509,0.297196
chess,0.003805,1.0,0.004627,0.044612
arithmetic-base8,-0.017509,0.004627,1.0,0.391008
combined,0.297196,0.044612,0.391008,1.0
