In [1]:
# Set up environment for evaluation
import os
import sys
sys.path.insert(0, '/net/scratch2/smallyan/universal-neurons_eval')
os.chdir('/net/scratch2/smallyan/universal-neurons_eval')

# Check CUDA availability
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")
    
# Set up HuggingFace cache path
os.environ['HF_HOME'] = '/net/projects2/chai-lab/shared_models'
os.environ['TRANSFORMERS_CACHE'] = '/net/projects2/chai-lab/shared_models/hub'

print(f"Working directory: {os.getcwd()}")

CUDA available: True
CUDA device: NVIDIA A100 80GB PCIe
Working directory: /net/scratch2/smallyan/universal-neurons_eval


In [2]:
# Initialize evaluation tracking with all results so far
import pandas as pd
import numpy as np

evaluation_results = [
    # utils.py
    {'File': 'utils.py', 'Block': 'imports_and_constants', 'Runnable': 'Y', 'Correct-Implementation': 'Y', 'Redundant': 'N', 'Irrelevant': 'N', 'Error Note': ''},
    {'File': 'utils.py', 'Block': 'get_model_family', 'Runnable': 'Y', 'Correct-Implementation': 'Y', 'Redundant': 'N', 'Irrelevant': 'N', 'Error Note': ''},
    {'File': 'utils.py', 'Block': 'timestamp', 'Runnable': 'Y', 'Correct-Implementation': 'Y', 'Redundant': 'N', 'Irrelevant': 'N', 'Error Note': ''},
    {'File': 'utils.py', 'Block': 'vector_histogram', 'Runnable': 'Y', 'Correct-Implementation': 'Y', 'Redundant': 'N', 'Irrelevant': 'N', 'Error Note': ''},
    {'File': 'utils.py', 'Block': 'vector_moments', 'Runnable': 'Y', 'Correct-Implementation': 'Y', 'Redundant': 'N', 'Irrelevant': 'N', 'Error Note': ''},
    {'File': 'utils.py', 'Block': 'adjust_precision', 'Runnable': 'Y', 'Correct-Implementation': 'Y', 'Redundant': 'N', 'Irrelevant': 'N', 'Error Note': ''},
    # analysis/activations.py
    {'File': 'analysis/activations.py', 'Block': 'imports', 'Runnable': 'Y', 'Correct-Implementation': 'Y', 'Redundant': 'N', 'Irrelevant': 'N', 'Error Note': ''},
    {'File': 'analysis/activations.py', 'Block': 'compute_moments_from_binned_data', 'Runnable': 'Y', 'Correct-Implementation': 'Y', 'Redundant': 'N', 'Irrelevant': 'N', 'Error Note': ''},
    # analysis/correlations.py
    {'File': 'analysis/correlations.py', 'Block': 'imports', 'Runnable': 'Y', 'Correct-Implementation': 'Y', 'Redundant': 'N', 'Irrelevant': 'N', 'Error Note': ''},
    {'File': 'analysis/correlations.py', 'Block': 'flatten_unflatten_layers', 'Runnable': 'Y', 'Correct-Implementation': 'Y', 'Redundant': 'N', 'Irrelevant': 'N', 'Error Note': ''},
    {'File': 'analysis/correlations.py', 'Block': 'summarize_correlation_matrix', 'Runnable': 'Y', 'Correct-Implementation': 'Y', 'Redundant': 'N', 'Irrelevant': 'N', 'Error Note': ''},
    # analysis/heuristic_explanation.py
    {'File': 'analysis/heuristic_explanation.py', 'Block': 'imports', 'Runnable': 'Y', 'Correct-Implementation': 'Y', 'Redundant': 'N', 'Irrelevant': 'N', 'Error Note': ''},
    {'File': 'analysis/heuristic_explanation.py', 'Block': 'compute_binary_variance_reduction', 'Runnable': 'Y', 'Correct-Implementation': 'Y', 'Redundant': 'N', 'Irrelevant': 'N', 'Error Note': ''},
    # analysis/vocab_df.py
    {'File': 'analysis/vocab_df.py', 'Block': 'imports_and_constants', 'Runnable': 'Y', 'Correct-Implementation': 'Y', 'Redundant': 'N', 'Irrelevant': 'N', 'Error Note': ''},
    {'File': 'analysis/vocab_df.py', 'Block': 'feature_functions', 'Runnable': 'Y', 'Correct-Implementation': 'Y', 'Redundant': 'N', 'Irrelevant': 'N', 'Error Note': ''},
    # analysis/weights.py
    {'File': 'analysis/weights.py', 'Block': 'imports', 'Runnable': 'Y', 'Correct-Implementation': 'Y', 'Redundant': 'N', 'Irrelevant': 'N', 'Error Note': ''},
    # analysis/prediction_neurons.py
    {'File': 'analysis/prediction_neurons.py', 'Block': 'imports', 'Runnable': 'Y', 'Correct-Implementation': 'Y', 'Redundant': 'N', 'Irrelevant': 'N', 'Error Note': ''},
    {'File': 'analysis/prediction_neurons.py', 'Block': 'statistical_functions', 'Runnable': 'Y', 'Correct-Implementation': 'Y', 'Redundant': 'N', 'Irrelevant': 'N', 'Error Note': ''},
    # analysis/entropy_neurons.py
    {'File': 'analysis/entropy_neurons.py', 'Block': 'imports', 'Runnable': 'Y', 'Correct-Implementation': 'Y', 'Redundant': 'N', 'Irrelevant': 'N', 'Error Note': ''},
]

def record_block(file_name, block_name, runnable, correct_impl, redundant, irrelevant, error_note=""):
    """Record evaluation result for a code block"""
    evaluation_results.append({
        'File': file_name,
        'Block': block_name,
        'Runnable': runnable,
        'Correct-Implementation': correct_impl,
        'Redundant': redundant,
        'Irrelevant': irrelevant,
        'Error Note': error_note
    })

print(f"Loaded {len(evaluation_results)} previous evaluation results")

Loaded 19 previous evaluation results


In [3]:
# Test correlations_fast.py - Load model and test StreamingPearsonComputer
from transformer_lens import HookedTransformer
import einops

print("Loading model for correlations_fast.py test...")
model_test = HookedTransformer.from_pretrained('stanford-gpt2-small-a', device='cuda')
model_test.eval()
torch.set_grad_enabled(False)

print(f"Model loaded: {model_test.cfg.model_name}")
print(f"  n_layers: {model_test.cfg.n_layers}")
print(f"  d_mlp: {model_test.cfg.d_mlp}")



`torch_dtype` is deprecated! Use `dtype` instead!


Loading model for correlations_fast.py test...


In [4]:
# Check if model loaded
print(f"Model loaded: {model_test.cfg.model_name}")
print(f"  n_layers: {model_test.cfg.n_layers}")
print(f"  d_mlp: {model_test.cfg.d_mlp}")

In [5]:
print("Checking model...")
import gc
gc.collect()
torch.cuda.empty_cache()
print(f"Model type: {type(model_test)}")

In [6]:
import sys
sys.stdout.flush()
sys.stderr.flush()
print("Test output")