# Neuron Theme Extraction: Build Per-Layer Query Sets (Layers 17–23)

This notebook loads precomputed neuron activation statistics (`stats`) and a QID→query mapping (`qids_to_queries`), then exports **per-layer** dictionaries that map each neuron to the queries that activated it. These outputs are used for **LLM-based subject annotation** of neuron themes.

**Inputs**
- `stats["by_layer"]["layer_XX"][neuron]["entries"]` → list of QIDs per neuron
- `qids_to_queries[QID]` → original query string

**Outputs (written)**
- `neurons_and_queries/layer_XX_neurons_and_queries.json`
- `neurons_and_queries/layer_XX_neurons_and_queries_small.json` (only neurons with 10–1000 queries)


In [None]:
import json
import numpy as np
import torch

## Load activation statistics (`stats`)

In [None]:
def _to_int(x):
    """Best-effort int conversion for scalars, 0-d tensors/np scalars, or single-item lists."""
    try:
        if hasattr(x, 'item'):
            return int(x.item())
        return int(x)
    except Exception:
        try:
            if isinstance(x, np.generic):
                return int(x)
        except Exception:
            pass
        if isinstance(x, (list, tuple)) and len(x) == 1:
            try:
                return int(x[0])
            except Exception:
                return None
        return None

def load_result_and_stats(result_path: str='activated_neurons_train_val.json', stats_path: str='neuron_activation_stats.json'):
    with open(result_path, 'r', encoding='utf-8') as f:
        rd = json.load(f)
    if isinstance(rd, dict) and 'data' in rd and isinstance(rd['data'], dict):
        rd = rd['data']
    for q, rec in list(rd.items()):
        if not isinstance(rec, dict):
            continue
        cdi = rec.get('correct_doc_id', None)
        if cdi is not None:
            val = _to_int(cdi)
            if val is None and isinstance(cdi, (list, tuple)) and cdi:
                val = _to_int(cdi[0])
            rec['correct_doc_id'] = val
        rds = rec.get('relevant_docs', None)
        if rds is not None:
            rec['relevant_docs'] = [v for x in rds if (v := _to_int(x)) is not None]
        an = rec.get('activated_neurons', {})
        if isinstance(an, dict):
            for lk, lst in list(an.items()):
                if isinstance(lst, list):
                    an[lk] = [v for x in lst if (v := _to_int(x)) is not None]
        rec['activated_neurons'] = an
        rd[q] = rec
    with open(stats_path, 'r', encoding='utf-8') as f:
        stats = json.load(f)
    stats.setdefault('total_entries', len(rd))
    stats.setdefault('by_layer', {})
    stats.setdefault('global', {})
    return (rd, stats)

In [None]:
result_dict, stats = load_result_and_stats('activated_neurons_test_copy.json', 'neuron_activation_stats_with_entries_test_data.json')

#### Load QID → query mapping (`qids_to_queries`)

In [None]:
qids_to_queries = torch.load('../q_ids_to_queries.json')

In [None]:
# Sanity checks for required inputs
assert 'stats' in globals(), "Missing `stats`. Load the activation stats before running the export."
assert 'qids_to_queries' in globals(), "Missing `qids_to_queries`. Load the QID→query mapping first."

# Minimal structural checks (won't be exhaustive)
assert 'by_layer' in stats, "`stats` must have a 'by_layer' key."
for L in range(17, 24):
    lk = f"layer_{L}"
    assert lk in stats['by_layer'], f"Missing {lk} in stats['by_layer']."


## Export per-layer neuron→queries dictionaries

In [None]:
for layer_idx in range(17, 24):  
    layer_key = f"layer_{layer_idx}"
    layer_neurons_and_queries = {}
    for neuron, info in stats["by_layer"][layer_key].items():
        qid_entries = info["entries"]
        layer_neurons_and_queries[neuron] = {
            "queries": [qids_to_queries[qid] for qid in qid_entries],
            "subjects": []
        }
    # Filter 10–1000 queries per neuron
    layer_neurons_and_queries_small = {
        key: value for key, value in layer_neurons_and_queries.items()
        if 10 <= len(value["queries"]) <= 1000
    }
    # Save each layer separately as JSON
    with open(f"{layer_key}_neurons_and_queries.json", "w") as f:
        json.dump(layer_neurons_and_queries, f)
    with open(f"{layer_key}_neurons_and_queries_small.json", "w") as f:
        json.dump(layer_neurons_and_queries_small, f)