In [4]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "6,7"

import sys
from pathlib import Path

notebook_path = Path().resolve()
project_root = notebook_path.parents[1]
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

import torch
import numpy as np
from PIL import Image
from transformers import AutoModelForImageTextToText, AutoProcessor

print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device count: {torch.cuda.device_count()}")

torch.manual_seed(0)
np.random.seed(0)

model_id = "Qwen/Qwen3-VL-32B-Instruct"

print(f"\nLoading {model_id}...")
model = AutoModelForImageTextToText.from_pretrained(
    model_id,
    device_map="auto", 
    torch_dtype=torch.bfloat16, 
    trust_remote_code=True,
    # Remove flash_attention_2 - it will use default attention instead
)

processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
model_name = "qwen3_vl_32b"
print("Model loaded successfully!\n")

# Rest of your code...
dataset_path = Path("../imgs")

def prepare_vlm_dataset(dataset_path, num_samples=200):
    inputs = []
    labels = []
    
    categories = {
        'liberal': {'label': 0, 'desc': 'left and liberal viewpoint'},
        'conservative': {'label': 1, 'desc': 'right and conservative viewpoint'}
    }
    
    for cat_name, info in categories.items():
        cat_dir = dataset_path / cat_name
        if not cat_dir.exists():
            print(f"Warning: {cat_dir} does not exist")
            continue
        
        img_files = []
        for ext in ['jpg', 'jpeg', 'png', 'JPG', 'JPEG', 'PNG']:
            img_files.extend(cat_dir.rglob(f"*.{ext}"))
        
        np.random.shuffle(img_files)
        print(f"Processing {cat_name}: found {len(img_files)} images, using {min(num_samples, len(img_files))}")
        
        for img_path in img_files[:num_samples]:
            try:
                img = Image.open(img_path).convert("RGB")
                prompt_text = f"This image shows a {info['desc']}."
                
                messages = [
                    {"role": "user", "content": [
                        {"type": "image"}, 
                        {"type": "text", "text": prompt_text}
                    ]}
                ]
                
                text = processor.apply_chat_template(
                    messages, 
                    tokenize=False, 
                    add_generation_prompt=False
                )
                
                processed = processor(
                    text=[text], 
                    images=[img], 
                    return_tensors="pt"
                )
                
                inputs.append(processed)
                labels.append(info['label'])
                
            except Exception as e:
                print(f"Error loading {img_path}: {e}")
                
    return inputs, labels

print("Loading and processing VLM dataset...")
vlm_inputs, vlm_labels = prepare_vlm_dataset(dataset_path)
print(f"\nLoaded {len(vlm_inputs)} samples total.")
print(f"Liberal samples: {vlm_labels.count(0)}")
print(f"Conservative samples: {vlm_labels.count(1)}")

CUDA available: True
CUDA device count: 2

Loading Qwen/Qwen3-VL-32B-Instruct...


Loading checkpoint shards: 100%|██████████| 14/14 [00:12<00:00,  1.09it/s]


Model loaded successfully!

Loading and processing VLM dataset...
Processing liberal: found 500 images, using 200
Processing conservative: found 500 images, using 200

Loaded 400 samples total.
Liberal samples: 200
Conservative samples: 200


In [11]:
from neural_controllers import NeuralController

class VLMNeuralController(NeuralController):
    def __init__(self, model, tokenizer, **kwargs):
        # 1. Determine the correct layer count from the nested config
        if hasattr(model.config, "text_config"):
            self.num_layers = model.config.text_config.num_hidden_layers
        else:
            # Fallback for standard LLMs or other Qwen versions
            self.num_layers = getattr(model.config, "num_hidden_layers", 
                              getattr(model.config, "num_layers", 40))

        # 2. Temporarily patch model.config so the parent __init__ doesn't crash
        # We add the attribute the parent class is looking for
        if not hasattr(model.config, "num_hidden_layers"):
            setattr(model.config, "num_hidden_layers", self.num_layers)

        # 3. Call parent init (now it will find model.config.num_hidden_layers)
        super().__init__(model, tokenizer, **kwargs)
        
        # 4. Ensure our hidden_layers list is exactly what we want
        self.hidden_layers = list(range(-1, -self.num_layers, -1))

    def compute_vlm_directions(self, vlm_inputs, labels):
        """
        Custom computation loop using the is_vlm flag for direction_utils.
        """
        self.directions, self.signs, self.detector_coefs, _ = self.toolkit._compute_directions(
            vlm_inputs, 
            labels, 
            None, None, # val data
            self.model, 
            self.tokenizer, 
            self.hidden_layers, 
            self.hyperparams,
            is_vlm=True 
        )

# Initialize
vlm_controller = VLMNeuralController(
    model, 
    processor.tokenizer, 
    control_method='rfm', 
    rfm_iters=8, 
    batch_size=1
)

print(f"Success! Controller initialized with {len(vlm_controller.hidden_layers)} layers.")
print(f"Layer range: {vlm_controller.hidden_layers[0]} to {vlm_controller.hidden_layers[-1]}")

n_components: 5
Hidden layers KA: [-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, -38, -39, -40, -41, -42, -43, -44, -45, -46, -47, -48, -49, -50, -51, -52, -53, -54, -55, -56, -57, -58, -59, -60, -61, -62, -63]
Hidden layers: [-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, -38, -39, -40, -41, -42, -43, -44, -45, -46, -47, -48, -49, -50, -51, -52, -53, -54, -55, -56, -57, -58, -59, -60, -61, -62, -63]

Controller hyperparameters:
control_method       : rfm
rfm_iters            : 8
forward_batch_size   : 1
M_batch_size         : 2048
n_components         : 5
calibrate            : False

Success! Controller initialized with 63 layers.
Layer range: -1 to -63


In [26]:
import torch
from tqdm import tqdm
from pathlib import Path
import numpy as np
from PIL import Image

# Step 1: Define the collection function
def collect_image_paths(dataset_path, num_samples=200):
    """
    Collects image paths from the dataset directory.
    Handles deeply nested directory structures.
    """
    image_data = {
        'liberal': {},
        'conservative': {}
    }
    
    for viewpoint in ['liberal', 'conservative']:
        viewpoint_dir = dataset_path / viewpoint
        if not viewpoint_dir.exists():
            print(f"Warning: {viewpoint_dir} does not exist")
            continue
        
        # Get all image files recursively, regardless of depth
        all_images = []
        
        # Use rglob with case-insensitive matching
        for pattern in ['*.jpg', '*.jpeg', '*.png', '*.JPG', '*.JPEG', '*.PNG']:
            all_images.extend(viewpoint_dir.rglob(pattern))
        
        if all_images:
            # Group by topic folder (first level subdirectory)
            for img_path in all_images:
                # Get the topic name (first subdirectory under liberal/conservative)
                try:
                    relative = img_path.relative_to(viewpoint_dir)
                    topic_name = relative.parts[0]
                    
                    if topic_name not in image_data[viewpoint]:
                        image_data[viewpoint][topic_name] = []
                    
                    image_data[viewpoint][topic_name].append(img_path)
                except:
                    continue
            
            # Print summary
            for topic, paths in image_data[viewpoint].items():
                print(f"Found {len(paths)} images in {viewpoint}/{topic}")
        else:
            print(f"Warning: No images found in {viewpoint_dir}")
    
    return image_data

# Step 2: Define the processing function
def get_vlm_activations(image_data, processor, num_samples_per_class=100):
    """
    Processes the raw image paths into tensors ready for activation extraction.
    Samples evenly across topics and includes topic in prompt.
    """
    processed_inputs = []
    labels = []

    # Map your categories to binary labels
    mapping = {'liberal': 0, 'conservative': 1}

    for viewpoint, topics in image_data.items():
        if viewpoint not in mapping:
            continue
        
        # Flatten all images with their topics
        all_imgs_with_topics = []
        for topic_name, img_paths in topics.items():
            for img_path in img_paths:
                all_imgs_with_topics.append((img_path, topic_name))
        
        # Shuffle and sample
        np.random.shuffle(all_imgs_with_topics)
        sampled = all_imgs_with_topics[:num_samples_per_class]
        
        print(f"\nProcessing {viewpoint} images: {len(sampled)} samples")
        
        for img_path, topic_name in tqdm(sampled, desc=f"{viewpoint}"):
            try:
                img = Image.open(img_path).convert("RGB")
                
                # Create topic-specific prompt
                clean_topic = topic_name.lower()
                prompt_text = f"This photo represents a {viewpoint} standpoint on {clean_topic}"
                
                messages = [
                    {"role": "user", "content": [
                        {"type": "image"}, 
                        {"type": "text", "text": prompt_text}
                    ]}
                ]
                
                text = processor.apply_chat_template(
                    messages, 
                    tokenize=False, 
                    add_generation_prompt=False
                )
                
                inputs = processor(text=[text], images=[img], return_tensors="pt")
                
                processed_inputs.append(inputs)
                labels.append(mapping[viewpoint])
                
            except Exception as e:
                print(f"Error processing {img_path}: {e}")
                continue

        print(f"Successfully processed {sum(1 for l in labels if l == mapping[viewpoint])} {viewpoint} images")

    return processed_inputs, labels

# Step 3: Actually collect and process the data
print("="*60)
print("STEP 1: Collecting image paths...")
print("="*60)

dataset_path = Path("../imgs")
image_data = collect_image_paths(dataset_path)

# Print summary
total_liberal = sum(len(paths) for paths in image_data['liberal'].values())
total_conservative = sum(len(paths) for paths in image_data['conservative'].values())
print(f"\nTotal liberal images found: {total_liberal}")
print(f"Total conservative images found: {total_conservative}")

if total_liberal == 0 and total_conservative == 0:
    print("\n❌ ERROR: No images found!")
    print("Checking if path is correct...")
    print(f"Dataset path: {dataset_path.resolve()}")
    print(f"Path exists: {dataset_path.exists()}")
else:
    print("\n" + "="*60)
    print("STEP 2: Processing images for activation extraction...")
    print("="*60)
    
    vlm_train_inputs, vlm_train_labels = get_vlm_activations(
        image_data, 
        processor, 
        num_samples_per_class=200
    )
    
    print(f"\n{'='*60}")
    print(f"STEP 2 COMPLETE: Dataset ready!")
    print(f"{'='*60}")
    print(f"Total samples: {len(vlm_train_inputs)}")
    print(f"Liberal samples: {vlm_train_labels.count(0)}")
    print(f"Conservative samples: {vlm_train_labels.count(1)}")
    
    if len(vlm_train_inputs) > 0:
        print(f"\n{'='*60}")
        print("STEP 3: Computing steering directions...")
        print("="*60)
        vlm_controller.compute_vlm_directions(vlm_train_inputs, vlm_train_labels)
        print("\n✅ Directions computed successfully!")
    else:
        print("\n❌ ERROR: No samples were processed successfully!")

STEP 1: Collecting image paths...
Found 27 images in liberal/Welfare
Found 21 images in liberal/isis
Found 30 images in liberal/unemployment
Found 18 images in liberal/black lives matter
Found 27 images in liberal/racism
Found 29 images in liberal/Border Security
Found 27 images in liberal/terrorism
Found 25 images in liberal/abortion
Found 18 images in liberal/Minimum Wage
Found 23 images in liberal/war on drugs
Found 4 images in liberal/Vaccines
Found 24 images in liberal/Fracking
Found 23 images in liberal/lgbt
Found 39 images in liberal/religion
Found 11 images in liberal/Homelessness
Found 32 images in liberal/immigration
Found 32 images in liberal/Climate Change
Found 26 images in liberal/Animal Rights
Found 35 images in liberal/blue lives matter
Found 29 images in liberal/Gun Control
Found 27 images in conservative/Welfare
Found 26 images in conservative/isis
Found 40 images in conservative/unemployment
Found 24 images in conservative/black lives matter
Found 29 images in conser

liberal:   0%|          | 0/200 [00:00<?, ?it/s]

liberal: 100%|██████████| 200/200 [00:02<00:00, 74.99it/s] 


Successfully processed 200 liberal images

Processing conservative images: 200 samples


conservative: 100%|██████████| 200/200 [00:01<00:00, 137.95it/s]


Successfully processed 200 conservative images

STEP 2 COMPLETE: Dataset ready!
Total samples: 400
Liberal samples: 200
Conservative samples: 200

STEP 3: Computing steering directions...
Computing directions for 400 samples...
Label distribution: 200 vs 200
Tuning metric: auc
Error during direction computation: Could not infer dtype of NoneType

Trying alternative format...
Tuning metric: auc


RuntimeError: CUDA error: no kernel image is available for execution on the device
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [25]:
# Now try computing directions
print("\nComputing steering directions...")
vlm_controller.compute_vlm_directions(vlm_train_inputs, vlm_train_labels)
print("Directions computed successfully!")


Computing steering directions...
Computing directions for 400 samples...
Label distribution: 200 vs 200
Tuning metric: auc
Error during direction computation: Could not infer dtype of NoneType

Trying alternative format...
Tuning metric: auc


RuntimeError: CUDA error: no kernel image is available for execution on the device
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
# Compute directions across all 63 layers
vlm_controller.compute_vlm_directions(vlm_train_inputs, vlm_train_labels)

# Save the political steering directions
vlm_controller.save(
    concept='political_lean', 
    model_name='qwen3_32b_vlm', 
    path='../directions/'
)