# Setup

In [None]:
%%capture

import sys
import os

if "google.colab" in sys.modules
    os.makedirs("/content/qk-spectral-analysis", exist_ok=True)
    os.chdir("/content/qk-spectral-analysis")
else:
    os.makedirs("qk-spectral-analysis", exist_ok=True)
    os.chdir("qk-spectral-analysis")

!pip install papermill

# Run this to download figures, ESDs, and code.
# !git clone https://github.com/SD-interp/qk-spectral-analysis.git

# Run this to only fetch python dependencies
! wget https://raw.githubusercontent.com/SD-interp/qk-spectral-analysis/refs/heads/main/utils.py
! wget https://raw.githubusercontent.com/SD-interp/qk-spectral-analysis/refs/heads/main/models.py


# Compute ESD and generate plots

This notebook downloads pretrained transformer weights (e.g., Qwen3-4B),
extracts the Query and Key projection matrices (`W_Q`, `W_K`),
computes their singular-value spectra, and generates plots for `Classification_A` and `Classification_B`.

The target model family can be specified by setting the `family` variable.
Model lists for each family are defined in `models.py`.

You can extend support for new models or families by editing `models.py`.
Each model should follow the standard file and layer naming conventions
and be accessible as `.safetensors` files.

Processed results are saved to `/content/qk-spectral-analysis/{model_family}/data/`.

In [None]:
family = "Qwen3" #Qwen3 Gemma2 Llama Mistral

In [None]:
import torch as t
from safetensors.torch import load_file, load
from huggingface_hub import hf_hub_download, HfApi
from transformers import AutoConfig
import einops
from tqdm.notebook import tqdm_notebook
import pickle
import numpy as np
from collections import defaultdict
import re
import threading
import queue
import random
from functools import partial
import time
from models import get_model_names
from utils import robust_lowrank_singular_values, get_stats

In [None]:
t.set_grad_enabled(False)
t.manual_seed(0)
random.seed(0)

api = HfApi()
device = "cuda" if t.cuda.is_available() else "cpu"

models = get_model_names(family)
os.makedirs(f"{family}/data", exist_ok=True)

In [None]:
"""
Run download and processing on separate threads for efficiency.
Save to drive in RAM to reduce write-times.
Delete files once processed to free up disk space.
"""

records = defaultdict(dict)

def download_worker():
    for filename in files_to_download:
        print(f"⬇️ Downloading {filename} ...")
        path = hf_hub_download(
            repo_id=model_name, filename=filename, local_dir="/dev/shm")
        file_queue.put(path)  # send to GPU worker
        print(f"✅ Downloaded {filename}\n")
    file_queue.put(None)  # signal completion

def gpu_worker():
    while True:
        path = file_queue.get()
        if path is None:  # download finished
            file_queue.put(None)
            break
        print(f"🚩 Starting processing {os.path.basename(path)}")

        tensors = load_file(path, device=device)
        os.remove(path) # delete file after loading

        for tensor_name in tensors:
            if "q_proj" not in tensor_name:
                continue

            q_name = tensor_name
            k_name = tensor_name.replace("q_proj", "k_proj") # Look for correspoding k-proj
            layer = int(re.search(r"layers\.(\d+)\.", q_name).group(1))

            assert k_name in tensors, f"key tensor not found for layer {layer}"

            # Using t.float64 does not have noticeable difference
            W_Q = tensors[q_name].to(t.float32)
            W_K = tensors[k_name].to(t.float32)

            W_Q = einops.rearrange(W_Q,
                "(q_head d_head) d_model -> q_head d_head d_model", q_head=n_heads)
            W_K = einops.rearrange(W_K,
                "(k_head d_head) d_model -> k_head d_head d_model", k_head=n_kv_heads)

            # if GQA repreat W_K to match W_Q
            if n_heads != n_kv_heads:
                ratio = n_heads//n_kv_heads

                W_K = t.repeat_interleave(W_K, dim=0, repeats=ratio)
                assert t.equal(W_K[0], W_K[1]), f"incorrect repetition order"

            # compute singular and eigen values
            singular_values = robust_lowrank_singular_values(W_Q, W_K)
            eigen_values = singular_values ** 2

            records['singular_values'][layer] = singular_values.cpu().numpy()
            svd_stats = get_stats(singular_values)
            records['singular_values_stats'][layer] = svd_stats

            records["eigen_values"][layer] = eigen_values.cpu().numpy()
            eigen_stats = get_stats(eigen_values)
            records['eigen_values_stats'][layer] = eigen_stats

        print(f"☑️ Completed processing {os.path.basename(path)}\n")

    with open(f"{family}/data/{model_name.split('/')[-1]}.pkl", "wb") as f:
        pickle.dump(records, f)

def run():

    t1 = threading.Thread(target=partial(download_worker), daemon=True)
    t2 = threading.Thread(target=gpu_worker, daemon=True)

    t1.start()
    t2.start()

    t1.join()
    t2.join()

In [None]:
done = os.listdir(f"{family}/data")
# Skip processing if summary file exists
models = [x for x in models if x.split('/')[-1]+".pkl" not in done]

for model_name in tqdm_notebook(models):
    files_to_download = [x for x in api.list_repo_files(model_name) if x.endswith(".safetensors") and "model" in x.casefold()]
    cfg = AutoConfig.from_pretrained(model_name) # get model config dict

    d_model = cfg.hidden_size
    d_head = cfg.head_dim
    n_heads = cfg.num_attention_heads
    n_kv_heads = getattr(cfg, "num_key_value_heads", n_heads)

    file_queue = queue.Queue()
    records = defaultdict(dict)
    run()
    print(f"completed {model_name}\n\n")
    time.sleep(0.5)

## For Colab
Use these cells if you cannot run the other scripts directly

In [None]:
import papermill as pm
_ = pm.execute_notebook("/content/Classification_A.ipynb", "/content/Classification_A_out.ipynb")
_ = pm.execute_notebook("/content/Classification_B.ipynb", "/content/Classification_B_out.ipynb")