# Cognitive Pattern Analysis Notebook

This notebook demonstrates the complete pipeline for analyzing cognitive patterns through neural network activations using TransformerLens, PCA, SAE, and selfie interpretation methods.

## 1. Setup and Imports

In [6]:
import os
import yaml
import torch
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Set style for plots
plt.style.use('default')
sns.set_palette("husl")

# Import our modules
from model_loader import ModelLoader
from activation_capture import ActivationCapturer
from data.data_loader import DataLoader
from analysis.pca_analysis import PCAAnalyzer
from analysis.sae_interface import SAEInterface
from analysis.interpretation import SelfieInterpreter, ActivationArithmetic
from utils.device_detection import get_device_manager, detect_and_print_devices

# Detect and display available devices
print("=== Device Detection ===")
device_manager = get_device_manager()
device_manager.print_device_info()

print("✅ Imports successful")
print(f"PyTorch version: {torch.__version__}")
print(f"Optimal device: {device_manager.get_device('auto')}")
print(f"Device type: {device_manager.optimal_device[0]}")

=== Device Detection ===
=== Device Detection Results ===
Platform: Darwin arm64
Python: 3.13.5
PyTorch: 2.8.0
Optimal Device: mps (mps)

❌ CUDA: False
✅ MPS: True
✅ MLX: True
    CPU: Apple M4 Pro
    MLX Installed: False
❌ ROCM: False
✅ CPU: True

✅ Imports successful
PyTorch version: 2.8.0
Optimal device: mps
Device type: mps


## 2. Load Configuration

In [7]:
# Load configuration
with open('./config/config.yaml', 'r') as f:
    config = yaml.safe_load(f)

print("Configuration loaded:")
print(f"Model: {config['model']['name']}")
print(f"Local path: {config['model']['local_path']}")
print(f"Layers: {config['model']['layers']}")
print(f"Cognitive patterns: {config['data']['cognitive_patterns']}")
print(f"Analysis methods: {config['analysis']['methods']}")

Configuration loaded:
Model: google/gemma-2-2b-it
Local path: google/gemma-2-2b-it
Layers: [17, 21]
Cognitive patterns: ['positive', 'negative', 'transition']
Analysis methods: ['pca', 'sae', 'selfie', 'arithmetic']


## 3. Initialize Data Loader and Load Patterns

In [8]:
# Initialize data loader
data_loader = DataLoader(base_path=config['data']['base_path'])

# Load cognitive patterns using the new method for your dataset structure
print("Loading cognitive patterns...")
cognitive_patterns = data_loader.load_cognitive_pattern_types(config['data']['main_dataset'])

# Display statistics
stats = data_loader.get_pattern_stats()
print("\nDataset Statistics:")
for pattern, stat in stats.items():
    print(f"  {pattern}: {stat['count']} samples, avg length: {stat['avg_length']:.1f}")

# Show sample data
print("\nSample data:")
for pattern_name, strings in cognitive_patterns.items():
    if strings:
        print(f"\n{pattern_name} (first 2 samples):")
        for i, sample in enumerate(strings[:2]):
            print(f"  {i+1}. {sample[:100]}..." if len(sample) > 100 else f"  {i+1}. {sample}")

Loading cognitive patterns...

Dataset Statistics:
  positive: 520 samples, avg length: 493.5
  negative: 520 samples, avg length: 258.5
  transition: 520 samples, avg length: 414.8

Sample data:

positive (first 2 samples):
  1. I'm recognizing that my energy levels are flagging today, which is totally normal. I've been pushing...
  2. I've noticed how often my mind drifts to the idea of death as a coping mechanism when I'm feeling ov...

negative (first 2 samples):
  1. Ugh, just the thought of checking my email is draining me already. It's like trying to lift a heavy ...
  2. Ugh, there they go again - those incessant whispers about what would be better if only I were dead. ...

transition (first 2 samples):
  1. I need to take a deep breath and acknowledge that my mind feels overwhelmed. That's okay; it doesn't...
  2. I remember the day it started, marked 'Beginning' on my timeline. Facts: I got into an argument with...


## 4. Initialize Model and Activation Capture

In [9]:
# Initialize activation capturer with enhanced device detection
print("Initializing activation capturer...")
activation_capturer = ActivationCapturer(
    model_name=config['model']['name'],
    device=config['model']['device']  # This will use our enhanced device detection
)

# Load the local model
print("Loading model...")
local_path = config['model']['local_path']
activation_capturer.load_model(local_path)

# Get model info
model_info = activation_capturer.get_model_info()
print("\nModel Information:")
for key, value in model_info.items():
    print(f"  {key}: {value}")

# Display device information
device_info = activation_capturer.device_manager.get_device_info()
print(f"\nUsing device: {activation_capturer.device}")
print(f"Device type: {activation_capturer.device_type}")

print("\n✅ Model loaded successfully")

Initializing activation capturer...
Loading model...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loaded pretrained model google/gemma-2-2b-it into HookedTransformer

Model Information:
  n_layers: 26
  d_model: 2304
  n_heads: 8
  d_head: 256
  vocab_size: 256000
  context_length: 8192

Using device: mps
Device type: mps

✅ Model loaded successfully


## 5. Capture Activations

In [None]:
# Capture activations for all patterns
print("Capturing activations...")

layers = config['model']['layers']
position = config['capture']['position']
max_samples = config['data'].get('max_samples_per_pattern', 1)  # Limit for notebook

activations_dict = {}

for pattern_name, strings in cognitive_patterns.items():
    if not strings:
        print(f"Skipping {pattern_name} - no data")
        continue
        
    print(f"\nProcessing pattern: {pattern_name}")
    
    # Limit samples for notebook demo
    limited_strings = strings[:max_samples]
    print(f"  Processing {len(limited_strings)} samples")
    
    # Capture activations
    activations = activation_capturer.capture_activations(
        strings=limited_strings,
        layer_nums=layers,
        cognitive_pattern=pattern_name,
        position=position
    )
    
    activations_dict[pattern_name] = activations
    
    # Show activation shapes
    for key, tensor in activations.items():
        print(f"    {key}: {tensor.shape}")

print("\n✅ Activation capture complete")

Capturing activations...

Processing pattern: positive
  Processing 520 samples


RuntimeError: stack expects each tensor to be equal size, but got [107, 2304] at entry 0 and [120, 2304] at entry 1

## 6. PCA Analysis

In [None]:
# Initialize PCA analyzer
pca_analyzer = PCAAnalyzer(n_components=config['analysis']['pca']['n_components'])

print("Running PCA analysis...")
pca_results = {}

for pattern_name, activations in activations_dict.items():
    print(f"\nPCA for pattern: {pattern_name}")
    
    # Compute PCA
    pca_result = pca_analyzer.compute_pca(
        activations=activations,
        pattern_name=pattern_name,
        standardize=config['analysis']['pca']['standardize']
    )
    
    pca_results[pattern_name] = pca_result
    
    # Show explained variance for each layer
    for layer_key in activations.keys():
        explained_var = pca_analyzer.get_explained_variance_ratio(pattern_name, layer_key)
        if len(explained_var) > 0:
            cumulative_var = pca_analyzer.get_cumulative_explained_variance(pattern_name, layer_key)
            print(f"  {layer_key}: First 5 PCs explain {cumulative_var[4]:.1%} of variance")

print("\n✅ PCA analysis complete")

In [None]:
# Plot PCA explained variance
print("Creating PCA visualizations...")

# Create plots for each pattern and layer
for pattern_name, activations in activations_dict.items():
    for layer_key in list(activations.keys())[:2]:  # Limit to first 2 layers for notebook
        print(f"Plotting {pattern_name} - {layer_key}")
        pca_analyzer.plot_explained_variance(pattern_name, layer_key)
        plt.show()

In [None]:
# PCA scatter plot comparing patterns
if len(list(activations_dict.keys())) > 1:
    print("Creating PCA comparison plots...")
    
    # Get first layer key that exists in multiple patterns
    common_layers = None
    for pattern_name, activations in activations_dict.items():
        if common_layers is None:
            common_layers = set(activations.keys())
        else:
            common_layers &= set(activations.keys())
    
    if common_layers:
        layer_to_plot = list(common_layers)[0]
        pattern_names = list(activations_dict.keys())
        
        pca_analyzer.plot_pca_scatter(
            pattern_names=pattern_names,
            layer_key=layer_to_plot,
            components=(0, 1)
        )
        plt.show()
        
        # Compute pattern separation
        if len(pattern_names) >= 2:
            separation = pca_analyzer.get_pattern_separation(
                pattern_names[0], pattern_names[1], layer_to_plot
            )
            print(f"Pattern separation ({pattern_names[0]} vs {pattern_names[1]}): {separation:.2f}")
else:
    print("Need multiple patterns for comparison plots")

## 7. SAE Analysis (Placeholder)

In [None]:
# Initialize SAE interface (placeholder)
sae_interface = SAEInterface(sae_model_path=config['analysis']['sae']['model_path'])

print("Running SAE analysis (placeholder implementation)...")
sae_results = {}

# Analyze one pattern as demonstration
demo_pattern = list(activations_dict.keys())[0]
demo_activations = activations_dict[demo_pattern]

print(f"\nAnalyzing SAE features for: {demo_pattern}")
sae_result = sae_interface.analyze_pattern_features(
    activations=demo_activations,
    pattern_name=demo_pattern,
    top_k=config['analysis']['sae']['top_k_features']
)

sae_results[demo_pattern] = sae_result

# Display results
print(f"\nSAE Analysis Results for {demo_pattern}:")
for layer_key, layer_result in sae_result['layers'].items():
    print(f"\n  {layer_key}:")
    print(f"    Sparsity: {layer_result['sparsity']:.3f}")
    print(f"    Reconstruction error: {layer_result['reconstruction_error']:.3f}")
    print(f"    Top 3 features:")
    for i, (feat_idx, activation) in enumerate(layer_result['top_features'][:3]):
        interpretation = layer_result['interpretations'].get(feat_idx, "No interpretation")
        print(f"      Feature {feat_idx}: {activation:.3f} - {interpretation}")

print("\n✅ SAE analysis complete")

## 8. Selfie Interpretation

In [None]:
# Initialize selfie interpreter
selfie_interpreter = SelfieInterpreter(model=activation_capturer.model)

print("Running selfie interpretation analysis...")

# Analyze one pattern with limited samples for demo
demo_pattern = list(cognitive_patterns.keys())[0]
demo_strings = cognitive_patterns[demo_pattern][:5]  # Limit to 5 samples
demo_activations = activations_dict[demo_pattern]

print(f"\nSelfie analysis for: {demo_pattern}")
print(f"Analyzing {len(demo_strings)} samples...")

# Limit to one layer for demo
layer_keys = list(demo_activations.keys())[:1]
limited_activations = {k: demo_activations[k][:5] for k in layer_keys}

selfie_interpretations = selfie_interpreter.batch_interpret_activations(
    activations=limited_activations,
    contexts=demo_strings,
    pattern_name=demo_pattern
)

# Display interpretations
print(f"\nSelfie Interpretations for {demo_pattern}:")
for layer_key, interpretations in selfie_interpretations.items():
    print(f"\n  {layer_key}:")
    for i, interpretation in enumerate(interpretations[:3]):
        print(f"    Sample {i+1}: {interpretation[:200]}...")

# Validate interpretations
validation_results = {}
for layer_key, layer_interpretations in selfie_interpretations.items():
    validation = selfie_interpreter.validate_interpretations(
        layer_interpretations, demo_pattern
    )
    validation_results[layer_key] = validation
    print(f"\nValidation for {layer_key}:")
    print(f"  Keyword match ratio: {validation['keyword_match_ratio']:.2f}")
    print(f"  Avg interpretation length: {validation['avg_interpretation_length']:.0f} chars")

print("\n✅ Selfie interpretation complete")

## 9. Activation Arithmetic

In [None]:
# Initialize activation arithmetic
activation_arithmetic = ActivationArithmetic()

print("Running activation arithmetic analysis...")

# Prepare data for arithmetic operations
layers = config['model']['layers']
pattern_activations = {}

for pattern_name, activations in activations_dict.items():
    pattern_activations[pattern_name] = {}
    for layer_num in layers:
        layer_key = f"{pattern_name}_layer_{layer_num}"
        if layer_key in activations:
            pattern_activations[pattern_name][layer_num] = activations[layer_key]

print(f"Prepared activations for {len(pattern_activations)} patterns")

# Compute similarity matrices for each layer
arithmetic_results = {}
for layer_num in layers:
    print(f"\nAnalyzing layer {layer_num}:")
    
    # Get patterns that have data for this layer
    layer_patterns = {}
    for pattern_name, pattern_data in pattern_activations.items():
        if layer_num in pattern_data:
            layer_patterns[pattern_name] = pattern_data[layer_num]
    
    if len(layer_patterns) > 1:
        # Compute similarity matrix
        similarities = activation_arithmetic.compute_similarity_matrix(layer_patterns)
        
        print(f"  Pattern similarities (cosine):")
        for (p1, p2), sim in similarities.items():
            if p1 < p2:  # Only show each pair once
                print(f"    {p1} <-> {p2}: {sim:.3f}")
        
        arithmetic_results[f"layer_{layer_num}"] = {
            'similarities': similarities
        }
    else:
        print(f"  Not enough patterns for comparison")

# Pattern arithmetic operations (if multiple patterns available)
pattern_names = list(pattern_activations.keys())
if len(pattern_names) >= 2:
    print(f"\nPattern arithmetic operations:")
    
    # Choose first layer and first two patterns
    demo_layer = layers[0]
    pattern1, pattern2 = pattern_names[0], pattern_names[1]
    
    if (demo_layer in pattern_activations[pattern1] and 
        demo_layer in pattern_activations[pattern2]):
        
        act1 = pattern_activations[pattern1][demo_layer]
        act2 = pattern_activations[pattern2][demo_layer]
        
        # Compute difference vector
        diff_vector = activation_arithmetic.compute_pattern_difference(act1, act2)
        print(f"  Difference vector ({pattern1} - {pattern2}): shape {diff_vector.shape}")
        print(f"  Difference magnitude: {torch.norm(diff_vector).item():.3f}")
        
        # Compute transition vector
        transition = activation_arithmetic.find_transition_vector(act1, act2)
        print(f"  Transition vector ({pattern1} -> {pattern2}): magnitude {torch.norm(transition).item():.3f}")
        
        # Create interpolation
        interpolated = activation_arithmetic.interpolate_patterns(act1, act2, steps=5)
        print(f"  Created {len(interpolated)} interpolation steps")

print("\n✅ Activation arithmetic complete")

## 10. Summary and Results

In [None]:
# Create summary of all results
print("=" * 60)
print("COGNITIVE PATTERN ANALYSIS SUMMARY")
print("=" * 60)

print(f"\nModel: {config['model']['name']}")
print(f"Local path: {config['model']['local_path']}")
print(f"Layers analyzed: {config['model']['layers']}")

print(f"\nPatterns analyzed:")
for pattern_name, activations in activations_dict.items():
    activation_keys = list(activations.keys())
    sample_shape = list(activations.values())[0].shape if activations else "N/A"
    print(f"  - {pattern_name}: {len(activation_keys)} layer combinations, shape: {sample_shape}")

print(f"\nAnalysis methods completed:")
completed_methods = []
if pca_results:
    completed_methods.append("✅ PCA Analysis")
if sae_results:
    completed_methods.append("✅ SAE Analysis (placeholder)")
if 'selfie_interpretations' in locals():
    completed_methods.append("✅ Selfie Interpretation")
if arithmetic_results:
    completed_methods.append("✅ Activation Arithmetic")

for method in completed_methods:
    print(f"  {method}")

print(f"\nKey findings:")
print(f"  - Successfully captured activations from {len(layers)} layers")
print(f"  - PCA analysis shows variance structure across cognitive patterns")
if len(pattern_names) >= 2 and arithmetic_results:
    # Find highest similarity
    all_similarities = []
    for layer_result in arithmetic_results.values():
        for (p1, p2), sim in layer_result['similarities'].items():
            if p1 != p2:
                all_similarities.append(sim)
    if all_similarities:
        max_sim = max(all_similarities)
        min_sim = min(all_similarities)
        print(f"  - Pattern similarities range from {min_sim:.3f} to {max_sim:.3f}")

print(f"\n🎯 Analysis pipeline completed successfully!")
print(f"\nNext steps:")
print(f"  1. Download actual SAE models to replace placeholder")
print(f"  2. Experiment with different layers and cognitive patterns")
print(f"  3. Use activation arithmetic for pattern steering")
print(f"  4. Validate interpretations with domain experts")

## 11. Save Results (Optional)

In [None]:
# Optional: Save results to files
save_results = input("Save results to files? (y/n): ").strip().lower() == 'y'

if save_results:
    # Create results directory
    results_dir = Path("./notebook_results")
    results_dir.mkdir(exist_ok=True)
    
    print(f"Saving results to {results_dir}...")
    
    # Save activations
    for pattern_name, activations in activations_dict.items():
        torch.save(activations, results_dir / f"{pattern_name}_activations.pt")
    
    # Save PCA results
    if pca_results:
        torch.save(pca_results, results_dir / "pca_results.pt")
    
    # Save arithmetic results
    if arithmetic_results:
        with open(results_dir / "arithmetic_results.yaml", 'w') as f:
            yaml.dump(arithmetic_results, f)
    
    print(f"✅ Results saved to {results_dir}")
else:
    print("Results not saved.")