# RePENG Pattern Steering Test - Google Colab Version

This notebook clones the repository and runs RePENG pattern steering tests in Google Colab.

## Setup: Clone Repository and Install Dependencies

In [None]:
# Clone the repository
import os
import subprocess

# Check if already cloned
if not os.path.exists('/content/turn_point'):
    print('Cloning repository...')
    !git clone https://github.com/ChuloIva/turn_point.git /content/turn_point
    print('Repository cloned successfully!')
else:
    print('Repository already exists, pulling latest changes...')
    !cd /content/turn_point && git pull

# Change to the project directory
os.chdir('/content/turn_point')
print(f'Current directory: {os.getcwd()}')

# List directory contents to verify
!ls -la

In [None]:
# Install required packages
print('Installing dependencies...')

# Install nnsight and other dependencies
!pip install nnsight torch transformers datasets accelerate

# Install the local package in development mode
!cd /content/turn_point/NNsight_selfie && pip install -e .

print('Dependencies installed successfully!')

## Main Code: RePENG Pattern Steering Test

This section contains the adapted code from the original notebook, optimized for Google Colab.

In [None]:
# Google Colab optimized imports and setup
import os
import sys
import torch
import nnsight

# Device detection for Google Colab (usually CUDA)
if torch.cuda.is_available():
    device = torch.device("cuda")
    device_type = "cuda"
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")
    device_type = "cpu"
    print("Using CPU (GPU not available)")

print(f"Using device: {device} ({device_type})")

# Device-agnostic memory clearing function
def clear_cache():
    """Clear device cache in a device-agnostic way"""
    if device_type == "cuda":
        torch.cuda.empty_cache()
    # CPU doesn't need explicit cache clearing

# Set up paths for Colab
REPO_ROOT = '/content/turn_point'
os.chdir(REPO_ROOT)

# Add to Python path
if REPO_ROOT not in sys.path:
    sys.path.insert(0, REPO_ROOT)

# Import the local package
try:
    from nnsight_selfie import (
        InterpretationPrompt,
        compute_pattern_steering_vectors,
        inject_with_interpretation_prompt,
        list_patterns,
    )
    print('Successfully imported nnsight_selfie package')
except ImportError as e:
    print(f'Import error: {e}')
    print('Trying alternative import method...')
    
    # Alternative import with explicit path
    sys.path.insert(0, os.path.join(REPO_ROOT, 'NNsight_selfie'))
    from nnsight_selfie import (
        InterpretationPrompt,
        compute_pattern_steering_vectors,
        inject_with_interpretation_prompt,
        list_patterns,
    )
    print('Successfully imported with alternative method')

# Set model and patterns path
MODEL_NAME = os.environ.get('MODEL_NAME', 'google/Gemma-3-4b-it')

# Find patterns file
PATTERNS_PATH = None
pattern_candidates = [
    os.path.join(REPO_ROOT, 'data/final/positive_patterns.jsonl'),
    os.path.join(REPO_ROOT, 'NNsight_selfie/data/final/positive_patterns.jsonl'),
    os.path.join(REPO_ROOT, '../data/final/positive_patterns.jsonl'),
]

for p in pattern_candidates:
    if os.path.exists(p):
        PATTERNS_PATH = p
        print(f'Found patterns file: {PATTERNS_PATH}')
        break

if not PATTERNS_PATH:
    print('❌ Patterns file not found! Searching directory structure...')
    !find /content/turn_point -name "positive_patterns.jsonl" -type f
else:
    print(f'✅ Using patterns file: {PATTERNS_PATH}')

In [None]:
# Load model (optimized for Colab)
print('Loading model (bfloat16)...')
print(f'Model: {MODEL_NAME}')

# Use device_map='auto' for efficient GPU usage in Colab
model = nnsight.LanguageModel(
    MODEL_NAME,
    device_map='auto',
    dtype=torch.bfloat16,
    low_cpu_mem_usage=True,  # Important for Colab memory management
)
tokenizer = model.tokenizer

# Apply Gemma 3 4B-it vision filter behavior
if 'gemma' in MODEL_NAME.lower() and '3' in MODEL_NAME and '4b' in MODEL_NAME.lower() and 'it' in MODEL_NAME.lower():
    try:
        setattr(model, 'model_name', MODEL_NAME)
        print('Gemma 3 4B-it detected; extractor will filter out vision components.')
    except Exception:
        pass

print('✅ Model loaded successfully!')

# Check GPU memory usage
if device_type == "cuda":
    print(f'GPU memory allocated: {torch.cuda.memory_allocated() / 1024**3:.2f} GB')
    print(f'GPU memory cached: {torch.cuda.memory_reserved() / 1024**3:.2f} GB')

In [None]:
# Quick test with minimal memory usage (Colab optimized)
print('=== COLAB OPTIMIZED QUICK TEST ===')
print('Running minimal test with 1 pattern and 1 example')

# Use specific layers to minimize memory usage
specific_layers = [15]  # Single layer for testing
print(f'Extracting from layer: {specific_layers}')

try:
    # Import RePENG modules
    from nnsight_selfie.repeng.repeng_activation_extractor import RepengActivationExtractor
    from nnsight_selfie.repeng.repeng_steering_vectors import RepengSteeringVectorGenerator
    from nnsight_selfie.repeng.patterns_dataset import build_all_datasets
    
    # Load patterns
    patterns = list_patterns(PATTERNS_PATH)
    print(f'Available patterns: {len(patterns)}')
    
    # Create extractor with minimal layers
    extractor = RepengActivationExtractor(model, tokenizer, layer_indices=specific_layers)
    
    # Get minimal dataset (1 pattern, 1 example)
    datasets = build_all_datasets(PATTERNS_PATH, ['pos-neg'], max_patterns=1)
    pattern_name = list(datasets.keys())[0]
    dataset = datasets[pattern_name]['pos-neg'][:1]  # Only first example
    
    print(f"Processing 1 example from pattern: {pattern_name}")
    
    # Extract activations
    activations, inputs = extractor.extract_dataset_activations(
        dataset, batch_size=1, show_progress=True
    )
    
    # Generate steering vector
    generator = RepengSteeringVectorGenerator(model_type=getattr(model, "model_name", "unknown"))
    steering = generator.generate_steering_vectors(activations, method='pca_diff')
    
    print(f'✅ SUCCESS: Generated steering vector for {len(steering.directions)} layers')
    
    # Store for testing
    test_bundle = type('Bundle', (), {
        'steering_vector': steering, 
        'pattern_name': pattern_name,
        'layers': list(steering.directions.keys())
    })()
    
    print(f'Ready for injection testing!')
    
except Exception as e:
    print(f'❌ ERROR: {e}')
    import traceback
    traceback.print_exc()

# Clear memory after test
clear_cache()
if device_type == "cuda":
    print(f'GPU memory after cleanup: {torch.cuda.memory_allocated() / 1024**3:.2f} GB')

In [None]:
# Test pattern injection (if quick test succeeded)
if 'test_bundle' in locals():
    print('=== TESTING PATTERN INJECTION ===')
    
    # Build interpretation prompt
    interp = InterpretationPrompt.create_simple(
        tokenizer, 
        prefix='This neural pattern represents ', 
        suffix=' in emotion'
    )
    prompt_text = interp.get_prompt()
    
    print(f'Prompt: {prompt_text}')
    print(f'Pattern: {test_bundle.pattern_name}')
    print(f'Layers: {test_bundle.layers}')
    
    # Test different injection strengths
    strengths = [0.5, 1.0, 2.0]
    
    for strength in strengths:
        try:
            clear_cache()
            
            print(f'\nTesting injection strength: {strength}')
            
            # Inject and generate
            res = inject_with_interpretation_prompt(
                model, tokenizer,
                prompt_text=prompt_text,
                steering_vector=test_bundle.steering_vector,
                interpretation_prompt=interp,
                injection_strength=strength,
                max_new_tokens=30,
                do_sample=False
            )
            
            generated = res['generated_text']
            print(f'Generated ({strength}x): "{generated}"')
            
        except Exception as e:
            print(f'Error at strength {strength}: {str(e)[:100]}...')
    
    print('\n✅ Injection testing complete!')
    
else:
    print('❌ No test bundle available. Run the quick test cell first.')

# Final cleanup
clear_cache()

In [None]:
# Full dataset generation - ENABLED for Colab
print('=== FULL DATASET GENERATION FOR COLAB ===')
print('Generating steering vectors for ALL patterns with layers 18-30')

import pickle
from datetime import datetime

# Configuration optimized for Colab
target_layers = list(range(18, 31))  # Layers 18-30 (13 layers)
patterns_per_batch = 1  # Process 1 pattern at a time for memory safety
examples_per_pattern = 8  # Reduced for Colab memory limits
total_patterns = len(list_patterns(PATTERNS_PATH))

print(f'Target layers: {target_layers} ({len(target_layers)} layers)')
print(f'Processing {patterns_per_batch} pattern per batch, {examples_per_pattern} examples each')
print(f'Total patterns to process: {total_patterns}')

# Create cache directory for Colab
cache_dir = f"colab_full_dataset_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
os.makedirs(cache_dir, exist_ok=True)
print(f'Created cache directory: {cache_dir}')

# Import required modules
from nnsight_selfie.repeng.repeng_activation_extractor import RepengActivationExtractor
from nnsight_selfie.repeng.repeng_steering_vectors import RepengSteeringVectorGenerator
from nnsight_selfie.repeng.patterns_dataset import build_all_datasets

batch_files = []
processed_patterns = []
failed_patterns = []

print(f'\n🚀 Starting full dataset generation for Colab...')
print(f'Estimated time: ~{total_patterns * 3} minutes (3 min per pattern)')

try:
    for batch_start in range(0, total_patterns, patterns_per_batch):
        batch_end = min(batch_start + patterns_per_batch, total_patterns)
        batch_num = batch_start // patterns_per_batch + 1
        
        print(f'\n--- Batch {batch_num}/{(total_patterns + patterns_per_batch - 1) // patterns_per_batch}: Pattern {batch_start+1}/{total_patterns} ---')
        
        # Clear memory before each batch (critical for Colab)
        clear_cache()
        
        # Monitor GPU memory in Colab
        if device_type == "cuda":
            mem_before = torch.cuda.memory_allocated() / 1024**3
            print(f'  GPU memory before batch: {mem_before:.2f} GB')
        
        # Get datasets for this batch (1 pattern at a time)
        datasets = build_all_datasets(PATTERNS_PATH, ['pos-neg'], max_patterns=patterns_per_batch)
        
        if not datasets:
            print(f'  ⚠️ No datasets found for batch {batch_num}')
            continue
        
        # Create extractor for this batch
        extractor = RepengActivationExtractor(model, tokenizer, layer_indices=target_layers)
        generator = RepengSteeringVectorGenerator(model_type=getattr(model, "model_name", "unknown"))
        
        # Process the pattern in this batch
        batch_bundles = []
        for pattern_name, pair_map in datasets.items():
            if 'pos-neg' in pair_map:
                dataset = pair_map['pos-neg'][:examples_per_pattern]  # Limit examples for Colab
                
                print(f'  Processing: {pattern_name}')
                print(f'    Examples: {len(dataset)} (limited to {examples_per_pattern})')
                print(f'    Layers: {len(target_layers)} layers ({target_layers[0]}-{target_layers[-1]})')
                
                try:
                    # Extract activations for this pattern
                    activations, inputs = extractor.extract_dataset_activations(
                        dataset, batch_size=1, show_progress=True
                    )
                    
                    # Generate steering vector
                    steering = generator.generate_steering_vectors(activations, method='pca_diff')
                    
                    # Create bundle
                    bundle = {
                        'steering_vector': steering, 
                        'pattern_name': pattern_name,
                        'num_examples': len(dataset),
                        'layers': list(steering.directions.keys()),
                        'batch_num': batch_num,
                        'target_layers': target_layers,
                        'method': 'pca_diff'
                    }
                    
                    batch_bundles.append(bundle)
                    processed_patterns.append(pattern_name)
                    
                    print(f'    ✅ SUCCESS: Generated steering vector ({len(steering.directions)} layers)')
                    
                    # Clear activations immediately after processing (critical for Colab)
                    del activations, inputs, steering
                    clear_cache()
                    
                    # Check memory usage after processing
                    if device_type == "cuda":
                        mem_after = torch.cuda.memory_allocated() / 1024**3
                        print(f'    GPU memory after processing: {mem_after:.2f} GB')
                        
                        if mem_after > 12:  # Warning if using more than 12GB in Colab
                            print(f'    ⚠️ HIGH MEMORY USAGE: {mem_after:.2f} GB - may crash soon!')
                    
                except Exception as e:
                    print(f'    ❌ FAILED: {str(e)[:100]}...')
                    failed_patterns.append((pattern_name, str(e)))
                    continue
        
        # Save this batch to disk immediately (even if empty)
        batch_file = os.path.join(cache_dir, f'batch_{batch_num:03d}.pkl')
        batch_data = {
            'batch_num': batch_num,
            'bundles': batch_bundles,
            'model_name': MODEL_NAME,
            'target_layers': target_layers,
            'examples_per_pattern': examples_per_pattern,
            'timestamp': datetime.now().isoformat(),
            'processed_patterns': [b['pattern_name'] for b in batch_bundles]
        }
        
        with open(batch_file, 'wb') as f:
            pickle.dump(batch_data, f)
        
        batch_files.append(batch_file)
        
        if batch_bundles:
            print(f'  💾 Saved batch {batch_num} to disk ({len(batch_bundles)} patterns)')
        else:
            print(f'  💾 Saved empty batch {batch_num} to disk')
        
        # Aggressively clear memory after each batch (critical for Colab)
        del extractor, generator, datasets, batch_bundles, batch_data
        clear_cache()
        
        # Progress update
        progress_pct = (batch_num / ((total_patterns + patterns_per_batch - 1) // patterns_per_batch)) * 100
        print(f'  📊 Progress: {progress_pct:.1f}% ({len(processed_patterns)} processed, {len(failed_patterns)} failed)')
        
        # Final memory check for Colab
        if device_type == "cuda":
            mem_final = torch.cuda.memory_allocated() / 1024**3
            print(f'  🧹 GPU memory after cleanup: {mem_final:.2f} GB')
    
    # Create comprehensive index file
    index_file = os.path.join(cache_dir, 'colab_dataset_index.pkl')
    index_data = {
        'batch_files': batch_files,
        'total_patterns_attempted': total_patterns,
        'successful_patterns': len(processed_patterns),
        'failed_patterns': len(failed_patterns),
        'target_layers': target_layers,
        'examples_per_pattern': examples_per_pattern,
        'model_name': MODEL_NAME,
        'method': 'pca_diff',
        'timestamp': datetime.now().isoformat(),
        'cache_dir': cache_dir,
        'processed_pattern_names': processed_patterns,
        'failed_pattern_details': failed_patterns,
        'colab_optimized': True
    }
    
    with open(index_file, 'wb') as f:
        pickle.dump(index_data, f)
    
    print(f'\n🎉 FULL DATASET GENERATION COMPLETE!')
    print(f'📂 Cache directory: {cache_dir}')
    print(f'📊 Results:')
    print(f'   - Successful: {len(processed_patterns)}/{total_patterns} patterns')
    print(f'   - Failed: {len(failed_patterns)}/{total_patterns} patterns')
    print(f'   - Layers: {len(target_layers)} layers (18-30)')
    print(f'   - Examples per pattern: {examples_per_pattern}')
    print(f'   - Batch files: {len(batch_files)}')
    
    # Calculate cache size
    cache_files = [f for f in os.listdir(cache_dir) if f.endswith('.pkl')]
    total_size = sum(os.path.getsize(os.path.join(cache_dir, f)) for f in cache_files)
    print(f'   - Total cache size: {total_size / 1024 / 1024:.2f} MB')
    
    if failed_patterns:
        print(f'\n⚠️ Failed patterns:')
        for pattern, error in failed_patterns[:5]:  # Show first 5 failures
            print(f'   - {pattern}: {error[:80]}...')
        if len(failed_patterns) > 5:
            print(f'   ... and {len(failed_patterns) - 5} more failures')
    
    print(f'\n📋 Index file: {index_file}')
    print(f'💾 Ready for analysis and testing!')
    
    # Store results in global variables for testing
    colab_bundles = []
    for batch_file in batch_files:
        with open(batch_file, 'rb') as f:
            batch_data = pickle.load(f)
            for bundle_dict in batch_data['bundles']:
                bundle = type('Bundle', (), bundle_dict)()
                colab_bundles.append(bundle)
    
    bundles = colab_bundles  # Make available for testing cells
    print(f'✅ Loaded {len(bundles)} steering vectors for testing')
    
except Exception as e:
    print(f'\n💥 CRITICAL ERROR: {e}')
    import traceback
    traceback.print_exc()
    
    # Save partial results
    if processed_patterns:
        partial_index = os.path.join(cache_dir, 'partial_colab_index.pkl')
        with open(partial_index, 'wb') as f:
            pickle.dump({
                'partial_results': True,
                'processed_patterns': processed_patterns,
                'failed_patterns': failed_patterns,
                'batch_files': batch_files,
                'target_layers': target_layers,
                'error': str(e),
                'colab_optimized': True
            }, f)
        print(f'💾 Saved partial results to: {partial_index}')

# Final cleanup
clear_cache()
print(f'\n🧹 Memory cleanup complete')

# Colab-specific memory summary
if device_type == "cuda":
    print(f'🖥️  Final GPU memory: {torch.cuda.memory_allocated() / 1024**3:.2f} GB')
    print(f'💡 If you encounter memory issues, restart runtime and try with fewer examples_per_pattern')

## Summary

This notebook provides a Google Colab-compatible version of the RePENG Pattern Steering Test.

### What it does:
1. **Clones the repository** from GitHub
2. **Installs dependencies** automatically
3. **Loads the model** with Colab-optimized settings
4. **Runs quick tests** with minimal memory usage
5. **Tests pattern injection** with different strengths
6. **Optionally generates** full dataset (memory intensive)

### Key Colab optimizations:
- Automatic repository cloning and setup
- Memory-efficient model loading
- GPU memory monitoring
- Reduced batch sizes for stability
- Clear memory management

### Usage tips:
- Run cells in order
- Start with quick test before full generation
- Use Colab Pro for better memory limits
- Monitor GPU memory usage