# 1.8e: Black Hole Masks

We discovered 4 black holes in the core (2,179 tokens sitting at r = 0.00007553):
1. BH1: 866 tokens at (x=-ε, y=0, z=0)
2. BH2: 734 tokens at (x=+ε, y=+ε, z=0)
3. BH3: 329 tokens at (x=+ε, y=0, z=0)
4. BH4: 249 tokens at (x=-ε, y=+ε, z=0)

Where ε ≈ 1.526e-5 (1 ULP in bfloat16).

**Goal:** Create reusable masks and metadata for the black hole tokens.

## Parameters

In [1]:
# Model to analyze
MODEL_NAME = "Qwen3-4B-Instruct-2507"

## Imports

In [2]:
import torch
import ml_dtypes
import numpy as np
from safetensors.torch import load_file, save_file
from pathlib import Path
from collections import defaultdict

## Helper Function

In [3]:
def torch_bf16_to_numpy_bf16(tensor):
    """Convert PyTorch bfloat16 tensor to numpy array with ml_dtypes.bfloat16 dtype."""
    return tensor.cpu().view(torch.uint16).numpy().view(ml_dtypes.bfloat16)

## Load Data

In [4]:
# Load W in bfloat16
W_path = Path(f"../tensors/{MODEL_NAME}/W.safetensors")
W_bf16 = load_file(W_path)["W"]

# Load core data
core_path = Path(f"../tensors/{MODEL_NAME}/1.8a_core.safetensors")
core_data = load_file(core_path)

core_mask = core_data["core_mask"].to(torch.bool)
core_token_ids = core_data["core_token_ids"].to(torch.int64)
n_core = core_data["n_core"].item()

# Basis indices
north_idx = core_data["north_idx"].item()
meridian_idx = core_data["meridian_idx"].item()
equinox_idx = core_data["equinox_idx"].item()

print(f"Loaded core: {n_core:,} tokens")
print(f"Basis dimensions: x={meridian_idx}, y={equinox_idx}, z={north_idx}")

Loaded core: 2,179 tokens
Basis dimensions: x=322, y=1564, z=163


## Compute Centered Coordinates (bfloat16)

In [5]:
print("\nComputing centered coordinates...\n")

# Extract core and center
W_core_bf16 = W_bf16[core_mask]
core_centroid_bf16 = W_core_bf16.mean(dim=0)
W_core_centered_bf16 = W_core_bf16 - core_centroid_bf16

# Extract coordinates for basis dimensions
x_bf16 = W_core_centered_bf16[:, meridian_idx]
y_bf16 = W_core_centered_bf16[:, equinox_idx]
z_bf16 = W_core_centered_bf16[:, north_idx]
r_squared_bf16 = x_bf16**2 + y_bf16**2 + z_bf16**2

print("✓ Computed centered coordinates in bfloat16")


Computing centered coordinates...

✓ Computed centered coordinates in bfloat16


## Group by Coordinates to Find Black Holes

In [6]:
print("\nGrouping tokens by coordinates...\n")

# Create coordinate tuples
coords_bf16 = torch.stack([r_squared_bf16, x_bf16, y_bf16, z_bf16], dim=1)
coords_np_bf16 = torch_bf16_to_numpy_bf16(coords_bf16)

# Group tokens
coord_groups = defaultdict(list)
for i in range(n_core):
    coord_tuple = tuple(coords_np_bf16[i])
    coord_groups[coord_tuple].append(i)

# Get black holes (groups with >1 token)
black_holes = [(coord, indices) for coord, indices in coord_groups.items() if len(indices) > 1]
black_holes.sort(key=lambda x: len(x[1]), reverse=True)

n_black_holes = len(black_holes)
print(f"Found {n_black_holes} black holes")
for i, (coord, indices) in enumerate(black_holes, 1):
    r2, x, y, z = coord
    print(f"  BH{i}: {len(indices):4,} tokens at (x={float(x):.6e}, y={float(y):.6e}, z={float(z):.6e})")


Grouping tokens by coordinates...

Found 4 black holes
  BH1:  866 tokens at (x=-1.525879e-05, y=0.000000e+00, z=0.000000e+00)
  BH2:  734 tokens at (x=1.525879e-05, y=1.525879e-05, z=0.000000e+00)
  BH3:  329 tokens at (x=1.525879e-05, y=0.000000e+00, z=0.000000e+00)
  BH4:  249 tokens at (x=-1.525879e-05, y=1.525879e-05, z=0.000000e+00)


## Create Masks and Metadata

In [7]:
print("\nCreating masks and metadata...\n")

# Get vocab size from W
vocab_size = W_bf16.shape[0]

# Initialize masks
black_hole_mask = torch.zeros(vocab_size, dtype=torch.bool)
black_hole_labels = torch.full((vocab_size,), -1, dtype=torch.int32)  # -1 = not a BH

# Black hole sizes
black_hole_sizes = torch.tensor([len(indices) for _, indices in black_holes], dtype=torch.int32)

# Token IDs for each black hole (we'll store these separately)
bh_token_ids = {}

# Fill in the masks
for bh_idx, (coord, core_indices) in enumerate(black_holes, 1):
    # core_indices are indices into the core (2,179 tokens)
    # We need to map them back to full vocab indices
    
    # Get the full vocab token IDs for this black hole
    token_ids = core_token_ids[core_indices]
    
    # Mark them in the masks
    black_hole_mask[token_ids] = True
    black_hole_labels[token_ids] = bh_idx
    
    # Store token IDs
    bh_token_ids[f"bh{bh_idx}_token_ids"] = token_ids
    
    print(f"BH{bh_idx}: {len(token_ids):4,} tokens (IDs {token_ids.min().item()}-{token_ids.max().item()})")

n_bh_tokens = black_hole_mask.sum().item()
print(f"\nTotal black hole tokens: {n_bh_tokens:,} ({n_bh_tokens/vocab_size*100:.3f}% of vocab)")


Creating masks and metadata...

BH1:  866 tokens (IDs 80091-149445)
BH2:  734 tokens (IDs 125-151934)
BH3:  329 tokens (IDs 124-151919)
BH4:  249 tokens (IDs 123939-151935)

Total black hole tokens: 2,178 (1.433% of vocab)


## Save to Safetensors

In [8]:
print("\nSaving masks and metadata...\n")

output_path = Path(f"../tensors/{MODEL_NAME}/1.8e_black_hole_masks.safetensors")

# Prepare data dict
save_dict = {
    'black_hole_mask': black_hole_mask,
    'black_hole_labels': black_hole_labels,
    'n_black_holes': torch.tensor(n_black_holes, dtype=torch.int32),
    'black_hole_sizes': black_hole_sizes,
}

# Add individual BH token IDs
save_dict.update(bh_token_ids)

# Save
save_file(save_dict, str(output_path))

print(f"✓ Saved to {output_path}")
print(f"\nContents:")
print(f"  black_hole_mask: {black_hole_mask.shape} (boolean mask for all BH tokens)")
print(f"  black_hole_labels: {black_hole_labels.shape} (BH number 1-4, or -1 for non-BH)")
print(f"  n_black_holes: scalar ({n_black_holes})")
print(f"  black_hole_sizes: {black_hole_sizes.shape} (sizes: {black_hole_sizes.tolist()})")
for i in range(1, n_black_holes + 1):
    key = f"bh{i}_token_ids"
    print(f"  {key}: {save_dict[key].shape} (token IDs for BH{i})")


Saving masks and metadata...

✓ Saved to ../tensors/Qwen3-4B-Instruct-2507/1.8e_black_hole_masks.safetensors

Contents:
  black_hole_mask: torch.Size([151936]) (boolean mask for all BH tokens)
  black_hole_labels: torch.Size([151936]) (BH number 1-4, or -1 for non-BH)
  n_black_holes: scalar (4)
  black_hole_sizes: torch.Size([4]) (sizes: [866, 734, 329, 249])
  bh1_token_ids: torch.Size([866]) (token IDs for BH1)
  bh2_token_ids: torch.Size([734]) (token IDs for BH2)
  bh3_token_ids: torch.Size([329]) (token IDs for BH3)
  bh4_token_ids: torch.Size([249]) (token IDs for BH4)


## Summary

In [9]:
print("\n" + "="*60)
print("SUMMARY: BLACK HOLE MASKS")
print("="*60)
print()
print(f"Created masks for {n_black_holes} black holes")
print(f"Total tokens in black holes: {n_bh_tokens:,}")
print()
print(f"Black hole sizes:")
for i, size in enumerate(black_hole_sizes, 1):
    print(f"  BH{i}: {size.item():,} tokens")
print()
print(f"Saved to: {output_path}")
print()
print("="*60)


SUMMARY: BLACK HOLE MASKS

Created masks for 4 black holes
Total tokens in black holes: 2,178

Black hole sizes:
  BH1: 866 tokens
  BH2: 734 tokens
  BH3: 329 tokens
  BH4: 249 tokens

Saved to: ../tensors/Qwen3-4B-Instruct-2507/1.8e_black_hole_masks.safetensors

