In [1]:
import os
os.chdir('/home/smallyan/eval_agent')
print(f"Current working directory: {os.getcwd()}")

Current working directory: /home/smallyan/eval_agent


# Circuit Analysis Code Evaluation

This notebook evaluates the code implementation in `/net/scratch2/smallyan/relations_eval` for circuit analysis.

## Setup and Initial Exploration

In [2]:
# Check if CUDA is available
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU device: {torch.cuda.get_device_name(0)}")
    print(f"Number of GPUs: {torch.cuda.device_count()}")

CUDA available: True
GPU device: NVIDIA A100 80GB PCIe
Number of GPUs: 1


In [3]:
# Explore the repository structure
repo_path = '/net/scratch2/smallyan/relations_eval'
print("Repository contents:")
for item in os.listdir(repo_path):
    full_path = os.path.join(repo_path, item)
    if os.path.isdir(full_path):
        print(f"  üìÅ {item}/")
    else:
        print(f"  üìÑ {item}")

Repository contents:
  üìÅ hparams/
  üìÑ pyproject.toml
  üìÅ data/
  üìÑ experiments.py
  üìÑ CodeWalkthrough.md
  üìÑ plan.md
  üìÅ notebooks/
  üìÅ src/
  üìÑ requirements.txt
  üìÑ schematic-wide.png
  üìÑ invoke.yaml
  üìÑ LICENSE
  üìÑ .gitignore
  üìÅ tests/
  üìÅ .git/
  üìÅ demo/
  üìÑ tasks.py
  üìÅ scripts/
  üìÑ documentation.pdf


In [4]:
# Explore the demo directory for notebooks
demo_path = os.path.join(repo_path, 'demo')
print("Demo directory contents:")
for item in os.listdir(demo_path):
    print(f"  {item}")

# Explore src directory  
src_path = os.path.join(repo_path, 'src')
print("\nSrc directory contents:")
for item in os.listdir(src_path):
    full_path = os.path.join(src_path, item)
    if os.path.isdir(full_path):
        print(f"  üìÅ {item}/")
    else:
        print(f"  üìÑ {item}")

# Explore notebooks directory
notebooks_path = os.path.join(repo_path, 'notebooks')
print("\nNotebooks directory contents:")
for item in os.listdir(notebooks_path):
    print(f"  {item}")

# Explore scripts directory
scripts_path = os.path.join(repo_path, 'scripts')
print("\nScripts directory contents:")
for item in os.listdir(scripts_path):
    print(f"  {item}")

Demo directory contents:
  attribute_lens.ipynb
  demo.ipynb
  attribute_lens.png
  causality-crop.png

Src directory contents:
  üìÑ data.py
  üìÑ operators.py
  üìÑ __init__.py
  üìÑ models.py
  üìÑ corner.py
  üìÑ editors.py
  üìÑ metrics.py
  üìÑ functional.py
  üìÑ hparams.py
  üìÑ sweeps.py
  üìÑ benchmarks.py
  üìÑ lens.py
  üìÅ utils/
  üìÅ attributelens/

Notebooks directory contents:
  icl.ipynb
  evaluate_demo.ipynb
  figures
  attn_layer_localization.ipynb
  j_subspace.ipynb
  interpolation_experiment.ipynb
  sandbox.ipynb
  saving_sweep_hparams.ipynb
  editing.ipynb
  vignette.ipynb
  representation_editing_low_rank.ipynb
  attributelens.ipynb
  discriminate_j.ipynb
  dataset_stats.ipynb
  layer_search.ipynb
  imaginary_j.ipynb
  corner_demo.ipynb
  efficacy_puzzle.ipynb
  explain_beta.ipynb
  corner_visualization.ipynb

Scripts directory contents:
  sweep_n_icl.py
  knowns.py
  dataset_stats.py
  sweep_prompt.py
  caching
  baselines
  __init__.py
  evaluate

## Code Structure Overview

Based on the CodeWalkthrough.md, the main demo notebooks are:
1. `demo/demo.ipynb` - Shows LRE approximation and evaluation metrics (faithfulness and causality)
2. `demo/attribute_lens.ipynb` - Demonstrates Attribute Lens application

The src/ directory contains the core modules:
- `data.py` - Dataset handling
- `operators.py` - LRE operators and transformations
- `models.py` - Model loading and handling
- `functional.py` - Core functional operations
- `metrics.py` - Evaluation metrics
- `editors.py` - Representation editing
- `lens.py` - Lens/probing functionality
- And more supporting modules

Let's evaluate the key code blocks systematically.

## Evaluating demo/demo.ipynb

This notebook demonstrates:
1. Loading GPT-J model
2. Loading dataset
3. Creating LRE estimator and operator
4. Computing faithfulness metric
5. Computing causality metric with representation editing

Let's run each code block and evaluate.

In [5]:
# Add repo to path
import sys
sys.path.insert(0, repo_path)

# Initialize evaluation tracking
evaluation_results = []

def record_evaluation(block_id, runnable, correct_impl, redundant, irrelevant, error_note=""):
    """Record evaluation for a code block"""
    evaluation_results.append({
        'block_id': block_id,
        'runnable': runnable,
        'correct_implementation': correct_impl,
        'redundant': redundant,
        'irrelevant': irrelevant,
        'error_note': error_note
    })
    
print("Evaluation tracking initialized")

Evaluation tracking initialized


In [6]:
# Block 1: demo/demo.ipynb - Cell 1 (Imports)
block_id = "demo/demo.ipynb:cell_1_imports"
try:
    import torch
    from src import models, data, lens, functional
    from src.utils import experiment_utils
    # baukit.Menu and show are for interactive widgets - skip in evaluation
    runnable = True
    error_note = ""
except Exception as e:
    runnable = False
    error_note = str(e)

# This is correct - imports are standard and necessary
record_evaluation(block_id, runnable=runnable, correct_impl=True, redundant=False, irrelevant=False, error_note=error_note)
print(f"Block: {block_id}")
print(f"  Runnable: {'Y' if runnable else 'N'}")
print(f"  Error: {error_note if error_note else 'None'}")

Block: demo/demo.ipynb:cell_1_imports
  Runnable: N
  Error: Failed to import transformers.models.gpt2.modeling_gpt2 because of the following error (look up to see its traceback):
No module named 'torch._custom_ops'


In [7]:
# Check torch version and fix import issue
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

# Let's check if the issue is with a specific package version
import subprocess
result = subprocess.run(['pip', 'list'], capture_output=True, text=True)
print("\nRelevant packages:")
for line in result.stdout.split('\n'):
    if any(pkg in line.lower() for pkg in ['torch', 'transformers', 'baukit']):
        print(f"  {line}")

PyTorch version: 1.13.1+cu117
CUDA available: True



Relevant packages:
  baukit                    0.0.1
  onnx2torch                1.5.15
  sentence-transformers     5.1.2
  torch                     1.13.1
  torchtext                 0.18.0
  torchvision               0.20.1
  transformers              4.46.1


In [8]:
# The torch version is 1.13.1 which doesn't have torch._custom_ops
# This is a compatibility issue - let's check if we can use a different approach
# First let's understand what the src modules actually import

import os

# Read the models.py to understand what's needed
models_path = os.path.join(repo_path, 'src', 'models.py')
with open(models_path) as f:
    content = f.read()
    
# Check the imports section
print("models.py imports:")
for line in content.split('\n')[:30]:
    print(line)

models.py imports:
"""Functions for loading and interacting with pretrained language models.

This module is designed to house all the annoying branching logic
that comes with supporting analysis of many slightly different model
implementations.
"""
import argparse
import logging
from contextlib import contextmanager
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Iterator, Literal, Optional, Sequence, overload

from src.utils import env_utils, tokenizer_utils
from src.utils.typing import Device, Layer, Model, ModelInput, Tokenizer

import torch
import transformers

logger = logging.getLogger(__name__)

GPT_J_NAME_SHORT = "gptj"  # A useful alias for the CLI.
GPT_J_NAME = "EleutherAI/gpt-j-6B"

GPT_NEO_X_NAME_SHORT = "neox"
GPT_NEO_X_NAME = "EleutherAI/gpt-neox-20b"

LLAMA_13B_NAME = "llama-13b"
LLAMA_30B_NAME = "llama-30b"
LLAMA_NAME_SHORT = "llama"


In [9]:
# Let's try importing transformers directly to understand the error better
try:
    import transformers
    print(f"Transformers version: {transformers.__version__}")
except Exception as e:
    print(f"Error importing transformers: {e}")

# Check if we can import GPT2 specifically
try:
    from transformers import GPT2LMHeadModel
    print("GPT2LMHeadModel imported successfully")
except Exception as e:
    print(f"Error importing GPT2LMHeadModel: {e}")

Transformers version: 4.46.1
Error importing GPT2LMHeadModel: Failed to import transformers.models.gpt2.modeling_gpt2 because of the following error (look up to see its traceback):
No module named 'torch._custom_ops'


In [10]:
# The issue is a version mismatch: transformers 4.46.1 requires a newer PyTorch version
# PyTorch 1.13.1 doesn't have torch._custom_ops which is required by transformers 4.46.1
# This is a critical environment setup issue

# Let's check requirements.txt for the expected versions
requirements_path = os.path.join(repo_path, 'requirements.txt')
with open(requirements_path) as f:
    print("requirements.txt contents:")
    print(f.read())

requirements.txt contents:
git+https://github.com/davidbau/baukit
accelerate
dataclasses-json
# TODO(evan): Needed?
huggingface-hub
invoke
jupyter
jupyter_http_over_ws
matplotlib
numpy
protobuf==3.20.0
requests
seaborn
sentencepiece
tqdm
transformers
torch
plotly

# dev dependencies
black
isort
mypy
pytest
types-requests



In [11]:
# Let's try to upgrade torch to a compatible version
import subprocess

# Check if we can upgrade torch in this environment
print("Attempting to upgrade PyTorch to a compatible version...")
result = subprocess.run(
    ['pip', 'install', 'torch>=2.0.0', '--quiet'], 
    capture_output=True, 
    text=True
)
print(f"stdout: {result.stdout}")
print(f"stderr: {result.stderr}")
print(f"Return code: {result.returncode}")

Attempting to upgrade PyTorch to a compatible version...


stdout: 
  You can safely remove it manually.[0m[33m
[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torchvision 0.20.1 requires torch==2.5.1, but you have torch 2.9.1 which is incompatible.
transformer-lens 1.2.1 requires torch<2.0,>=1.10, but you have torch 2.9.1 which is incompatible.[0m[31m
[0m
Return code: 0


In [12]:
# We upgraded torch - let's restart the kernel to pick up the new version
# Since we can't restart in this session, let's try importing again

# Force reimport
import importlib
import sys

# Remove cached modules
modules_to_remove = [key for key in sys.modules.keys() if 'torch' in key or 'transformers' in key]
for mod in modules_to_remove:
    del sys.modules[mod]

# Now try importing again
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

RuntimeError: function '_has_torch_function' already has a docstring