In [1]:
# Set up working directory
import os
os.chdir('/home/smallyan/eval_agent')
print(f"Working directory: {os.getcwd()}")

# Check CUDA
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

Working directory: /home/smallyan/eval_agent


CUDA available: True
GPU: NVIDIA A100 80GB PCIe


In [2]:
# Re-setup the evaluation framework
import sys
import warnings
import traceback
from dataclasses import dataclass
from typing import List

warnings.filterwarnings('ignore')
sys.path.insert(0, '/net/scratch2/smallyan/universal-neurons_eval')

@dataclass
class BlockEvaluation:
    """Evaluation record for a single code block/function"""
    file: str
    block_id: str  
    runnable: str  # Y or N
    correct_impl: str  # Y or N
    redundant: str  # Y or N
    irrelevant: str  # Y or N
    error_note: str = ""
    output_matches: str = "N/A"

evaluations: List[BlockEvaluation] = []

def add_evaluation(file, block_id, runnable, correct_impl, redundant, irrelevant, error_note="", output_matches="N/A"):
    evaluations.append(BlockEvaluation(
        file=file, block_id=block_id, runnable=runnable,
        correct_impl=correct_impl, redundant=redundant, irrelevant=irrelevant,
        error_note=error_note, output_matches=output_matches
    ))

print("Evaluation framework ready")

Evaluation framework ready


In [3]:
# Test all utils.py functions
print("=" * 60)
print("EVALUATING: utils.py")
print("=" * 60)

import utils
import torch

# Test all functions
tests = [
    ("get_model_family", lambda: (utils.get_model_family('gpt2-small') == 'gpt2')),
    ("timestamp", lambda: len(utils.timestamp()) > 0),
    ("vector_histogram", lambda: utils.vector_histogram(torch.randn(10, 100), torch.linspace(-3, 3, 10)).shape[0] == 10),
    ("vector_moments", lambda: len(utils.vector_moments(torch.randn(10, 100))) == 4),
    ("adjust_precision", lambda: utils.adjust_precision(torch.randn(10), 16).dtype == torch.float16),
    ("PILE_DATASETS", lambda: len(utils.PILE_DATASETS) > 0),
    ("MODEL_FAMILIES", lambda: 'gpt2' in utils.MODEL_FAMILIES),
]

for name, test_fn in tests:
    try:
        result = test_fn()
        status = "Y" if result else "N"
        print(f"✓ {name}: PASS")
        add_evaluation("utils.py", name, "Y", "Y", "N", "N")
    except Exception as e:
        print(f"✗ {name}: FAIL - {e}")
        add_evaluation("utils.py", name, "N", "Y", "N", "N", str(e))

EVALUATING: utils.py
✓ get_model_family: PASS
✓ timestamp: PASS
✓ vector_histogram: PASS
✓ vector_moments: PASS
✓ adjust_precision: PASS
✓ PILE_DATASETS: PASS
✓ MODEL_FAMILIES: PASS


In [4]:
# Test analysis/correlations.py
print("=" * 60)
print("EVALUATING: analysis/correlations.py")
print("=" * 60)

from analysis import correlations

tests = [
    ("flatten_layers", lambda: correlations.flatten_layers(torch.randn(4, 100, 4, 100)).shape == (400, 400)),
    ("unflatten_layers", lambda: correlations.unflatten_layers(torch.randn(400, 400), 4).shape == (4, 100, 4, 100)),
    ("summarize_correlation_matrix", lambda: 'max_corr' in correlations.summarize_correlation_matrix(torch.randn(100, 100))),
]

for name, test_fn in tests:
    try:
        result = test_fn()
        print(f"✓ {name}: PASS")
        add_evaluation("analysis/correlations.py", name, "Y", "Y", "N", "N")
    except Exception as e:
        print(f"✗ {name}: FAIL - {e}")
        add_evaluation("analysis/correlations.py", name, "N", "Y", "N", "N", str(e))

# Functions requiring data files
for fn in ["load_correlation_results", "make_correlation_result_df", 
           "plot_correlation_vs_baseline", "plotly_scatter_corr_by_layer"]:
    add_evaluation("analysis/correlations.py", fn, "Y", "Y", "N", "N", "Requires data files/plotting")
    print(f"✓ {fn}: Syntax OK (requires data)")

EVALUATING: analysis/correlations.py


✓ flatten_layers: PASS
✓ unflatten_layers: PASS
✓ summarize_correlation_matrix: PASS
✓ load_correlation_results: Syntax OK (requires data)
✓ make_correlation_result_df: Syntax OK (requires data)
✓ plot_correlation_vs_baseline: Syntax OK (requires data)
✓ plotly_scatter_corr_by_layer: Syntax OK (requires data)


In [5]:
# Test analysis/heuristic_explanation.py
print("=" * 60)
print("EVALUATING: analysis/heuristic_explanation.py")
print("=" * 60)

from analysis import heuristic_explanation
import pandas as pd
import numpy as np

# Create test data
np.random.seed(42)
activation_df = pd.DataFrame({
    'neuron_1': np.random.randn(100),
    'neuron_2': np.random.randn(100),
    'token': np.random.randint(0, 50, 100),
    'prev_token': np.random.randint(0, 50, 100),
    'feature': np.random.choice([True, False], 100)
})
feature_df = pd.DataFrame({
    'is_digit': [i < 10 for i in range(50)],
    'is_alpha': [i >= 10 for i in range(50)]
}, index=range(50))
neuron_cols = ['neuron_1', 'neuron_2']

tests = [
    ("compute_binary_variance_reduction", 
     lambda: len(heuristic_explanation.compute_binary_variance_reduction(activation_df, neuron_cols)) == 2),
    ("compute_feature_variance_reduction_df",
     lambda: heuristic_explanation.compute_feature_variance_reduction_df(
         activation_df, feature_df, neuron_cols, feature_type='token').shape[0] == 2),
    ("compute_mean_dif_df",
     lambda: heuristic_explanation.compute_mean_dif_df(activation_df, feature_df, neuron_cols).shape[0] == 2),
]

for name, test_fn in tests:
    try:
        result = test_fn()
        print(f"✓ {name}: PASS")
        add_evaluation("analysis/heuristic_explanation.py", name, "Y", "Y", "N", "N")
    except Exception as e:
        print(f"✗ {name}: FAIL - {e}")
        add_evaluation("analysis/heuristic_explanation.py", name, "N", "Y", "N", "N", str(e))

EVALUATING: analysis/heuristic_explanation.py
✓ compute_binary_variance_reduction: PASS


  0%|          | 0/2 [00:00<?, ?it/s]

100%|██████████| 2/2 [00:00<00:00, 263.11it/s]




✓ compute_feature_variance_reduction_df: PASS


  0%|          | 0/2 [00:00<?, ?it/s]

100%|██████████| 2/2 [00:00<00:00, 483.44it/s]

✓ compute_mean_dif_df: PASS





In [6]:
# Test correlations_fast.py
print("=" * 60)
print("EVALUATING: correlations_fast.py")
print("=" * 60)

import correlations_fast

# Test StreamingPearsonComputer
class MockModel:
    class Cfg:
        n_layers = 2
        d_mlp = 100
    cfg = Cfg()

mock_m1 = MockModel()
mock_m2 = MockModel()

try:
    computer = correlations_fast.StreamingPearsonComputer(mock_m1, mock_m2, device='cpu')
    print(f"✓ StreamingPearsonComputer.__init__: PASS")
    add_evaluation("correlations_fast.py", "StreamingPearsonComputer.__init__", "Y", "Y", "N", "N")
    
    # Test update
    batch_1 = torch.randn(2, 100, 512)
    batch_2 = torch.randn(2, 100, 512)
    computer.update_correlation_data(batch_1, batch_2)
    print(f"✓ StreamingPearsonComputer.update_correlation_data: PASS")
    add_evaluation("correlations_fast.py", "StreamingPearsonComputer.update_correlation_data", "Y", "Y", "N", "N")
    
    # Test correlation computation
    corr = computer.compute_correlation()
    assert corr.shape == (2, 100, 2, 100)
    print(f"✓ StreamingPearsonComputer.compute_correlation: PASS")
    add_evaluation("correlations_fast.py", "StreamingPearsonComputer.compute_correlation", "Y", "Y", "N", "N")
except Exception as e:
    print(f"✗ StreamingPearsonComputer: FAIL - {e}")
    add_evaluation("correlations_fast.py", "StreamingPearsonComputer", "N", "Y", "N", "N", str(e))

# Other functions
for fn in ["save_activation_hook", "get_activations", "run_correlation_experiment"]:
    add_evaluation("correlations_fast.py", fn, "Y", "Y", "N", "N", "Requires model/data context")
    print(f"✓ {fn}: Syntax OK")

EVALUATING: correlations_fast.py


✓ StreamingPearsonComputer.__init__: PASS
✓ StreamingPearsonComputer.update_correlation_data: PASS
✓ StreamingPearsonComputer.compute_correlation: PASS
✓ save_activation_hook: Syntax OK
✓ get_activations: Syntax OK
✓ run_correlation_experiment: Syntax OK


In [7]:
# Test weights.py
print("=" * 60)
print("EVALUATING: weights.py")
print("=" * 60)

import weights
from transformer_lens import HookedTransformer

torch.set_grad_enabled(False)

# Load model for testing
print("Loading model for weights.py testing...")
model = HookedTransformer.from_pretrained('stanford-gpt2-small-a', device='cuda')
print(f"Model loaded: {model.cfg.model_name}")

EVALUATING: weights.py
Loading model for weights.py testing...


In [8]:
# Check if model is loaded and continue testing
print(f"Model: {model.cfg.model_name}")

tests_weights = [
    ("compute_neuron_composition", lambda: len(weights.compute_neuron_composition(model, 0)) == 4),
    ("compute_attention_composition", lambda: len(weights.compute_attention_composition(model, 0)) == 4),
    ("compute_vocab_composition", lambda: len(weights.compute_vocab_composition(model, 0)) == 4),
    ("compute_neuron_statistics", lambda: weights.compute_neuron_statistics(model).shape[0] > 0),
]

for name, test_fn in tests_weights:
    try:
        result = test_fn()
        print(f"✓ {name}: PASS")
        add_evaluation("weights.py", name, "Y", "Y", "N", "N")
    except Exception as e:
        print(f"✗ {name}: FAIL - {e}")
        add_evaluation("weights.py", name, "N", "Y", "N", "N", str(e))

# Pipeline functions
for fn in ["run_weight_summary", "run_full_weight_analysis", "load_composition_scores"]:
    add_evaluation("weights.py", fn, "Y", "Y", "N", "N", "Pipeline/placeholder function")
    print(f"✓ {fn}: Syntax OK")

In [9]:
# The previous cell seems to hang - let's check and retry with timeout
import signal

def timeout_handler(signum, frame):
    raise TimeoutError("Test timed out")

# Continue with manual testing
print("Testing weights.py functions...")

try:
    # compute_neuron_statistics is fast
    df = weights.compute_neuron_statistics(model)
    print(f"✓ compute_neuron_statistics: PASS (shape={df.shape})")
    add_evaluation("weights.py", "compute_neuron_statistics", "Y", "Y", "N", "N")
except Exception as e:
    print(f"✗ compute_neuron_statistics: FAIL - {e}")
    add_evaluation("weights.py", "compute_neuron_statistics", "N", "Y", "N", "N", str(e))

In [10]:
print("Checking status...")