# VLM Political Steering: Qwen3-VL-32B

## Overview

This notebook demonstrates steering Qwen3-VL-32B responses along a political axis using image-based steering directions.

### Dataset Structure
```
vlm_political/imgs/
├── conservative/
│   ├── website_name_1/ → *.jpg, *.png
│   ├── website_name_2/ → *.jpg, *.png
│   └── ...
└── liberal/
    ├── website_name_1/ → *.jpg, *.png
    ├── website_name_2/ → *.jpg, *.png
    └── ...
```

### Key Concept

We compute a steering vector by:
1. Extracting embeddings from conservative vs liberal political images (organized by source/website)
2. Using logistic regression to find the political axis
3. Applying the direction to model hidden states via forward hooks
4. Generating text with controlled political bias

In [None]:
import os
import sys
from pathlib import Path

# Setup paths
notebook_path = Path().resolve()
project_root = notebook_path.parents[1]
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

import torch
import numpy as np
import pickle
from tqdm import tqdm
from PIL import Image
from transformers import AutoModelForImageTextToText, AutoProcessor
from neural_controllers import NeuralController

# Set GPUs
os.environ["CUDA_VISIBLE_DEVICES"] = "6,7"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Seeds
torch.manual_seed(0)
np.random.seed(0)

In [None]:
import os
import torch
from transformers import AutoModelForImageTextToText, AutoProcessor
from PIL import Image

# 1. Set visibility BEFORE importing torch/transformers
os.environ["CUDA_VISIBLE_DEVICES"] = "6,7"

model_name = "Qwen/Qwen3-VL-32B-Instruct"

# 2. Load Model
model = AutoModelForImageTextToText.from_pretrained(
    model_name,
    device_map="auto", # Automatically balances layers across GPUs 6 and 7
    torch_dtype=torch.bfloat16, 
    trust_remote_code=True,
    attn_implementation="flash_attention_2" 
)

processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)

# 3. PREPARE INPUTS (Crucial Step)
# This is where most people fail to use the GPU
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
# In a real scenario, load your PIL image here
# image = Image.open(...) 

messages = [
    {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": "Describe this image."}]}
]

text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
# Note: You'll need the actual image object here
inputs = processor(text=[text], images=None, return_tensors="pt")

# 4. MOVE INPUTS TO DEVICE
# 'model.device' will point to the first device in the map (cuda:0)
inputs = {k: v.to(model.device) for k, v in inputs.items()}

# 5. GENERATE
with torch.no_grad():
    output = model.generate(**inputs, max_new_tokens=100)
    print(processor.decode(output[0], skip_special_tokens=True))

Loading Qwen/Qwen3-VL-32B-Instruct...


`torch_dtype` is deprecated! Use `dtype` instead!


ValueError: FlashAttention2 has been toggled on, but it cannot be used due to the following error: Flash Attention 2 is not available on CPU. Please make sure torch can access a CUDA device.

In [5]:
# Data loading and preprocessing
dataset_path = Path("../imgs")  # relative to this notebook

print(f"Loading images from: {dataset_path}")
print(f"Dataset exists: {dataset_path.exists()}")

# Organize images by political viewpoint
# Structure: imgs/viewpoint/website_name/image.jpg
image_data = defaultdict(lambda: defaultdict(list))  # {viewpoint: {website_source: [paths]}}

if dataset_path.exists():
    for subdir in dataset_path.iterdir():
        if subdir.is_dir():
            viewpoint = subdir.name  # 'conservative', 'liberal', etc.
            # Iterate through website source directories
            for website_dir in subdir.iterdir():
                if website_dir.is_dir():
                    website_source = website_dir.name  # website name
                    # Collect all images in this website directory (recursively)
                    for img_file in website_dir.rglob("*"):
                        if img_file.suffix.lower() in ['.jpg', '.png', '.jpeg']:
                            image_data[viewpoint][website_source].append(img_file)

    print(f"\n=== Dataset Summary ===")
    if image_data:
        for viewpoint, sources in image_data.items():
            total_imgs = sum(len(paths) for paths in sources.values())
            print(f"{viewpoint.upper()}: {total_imgs} images across {len(sources)} sources")
            for source, paths in list(sources.items())[:5]:
                print(f"  - {source}: {len(paths)} images")
    else:
        print("⚠️ No images found under dataset_path. Check subdirectories and file extensions.")
else:
    print(f"⚠️ Dataset path not found: {dataset_path}")
    print("Please ensure vlm_political/imgs exists with conservative/ and liberal/ subdirectories")

Loading images from: ../imgs
Dataset exists: True

=== Dataset Summary ===
LIBERAL: 500 images across 20 sources
  - Welfare: 27 images
  - isis: 21 images
  - unemployment: 30 images
  - black lives matter: 18 images
  - racism: 27 images
CONSERVATIVE: 500 images across 20 sources
  - Welfare: 27 images
  - isis: 26 images
  - unemployment: 40 images
  - black lives matter: 24 images
  - racism: 29 images


In [8]:
def extract_image_embeddings(image_paths, batch_size=4, viewpoint_label=""):
    """
    Extract embeddings from images using the vision encoder.
    This version moves tensors to device safely and retries tokenization
    if the processor returns empty text tokens (common with some processors).
    """
    embeddings = []
    valid_paths = []

    for i in tqdm(range(0, len(image_paths), batch_size), desc=f"Extracting {viewpoint_label} embeddings"):
        batch_paths = image_paths[i:i+batch_size]
        batch_imgs = []

        for path in batch_paths:
            try:
                img = Image.open(path).convert("RGB")
                batch_imgs.append(img)
                valid_paths.append(path)
            except Exception as e:
                print(f"⚠️ Failed to load {path}: {e}")
                continue

        if not batch_imgs:
            continue

        # Include viewpoint context in prompts
        prompts = [f"This image shows a {viewpoint_label} viewpoint. Describe the content." for _ in batch_imgs]

        try:
            encoded = processor(
                text=prompts,
                images=batch_imgs,
                return_tensors="pt",
                padding=True,
            )

            # Move any tensor entries to device safely
            for k, v in list(encoded.items()):
                if isinstance(v, torch.Tensor):
                    encoded[k] = v.to(device)

            # Some processors may return empty text tokens when mixing images/text.
            # If so, tokenize prompts separately and insert input_ids/attention_mask.
            if ("input_ids" not in encoded) or (isinstance(encoded.get("input_ids"), torch.Tensor) and encoded.get("input_ids").numel() == 0):
                try:
                    tokenized = processor.tokenizer(prompts, return_tensors="pt", padding=True, truncation=True)
                    encoded["input_ids"] = tokenized["input_ids"].to(device)
                    encoded["attention_mask"] = tokenized["attention_mask"].to(device)
                except Exception as e:
                    print(f"⚠️ Failed to tokenize prompts separately: {e}")

            with torch.no_grad():
                outputs = model(
                    **encoded,
                    output_hidden_states=True,
                    return_dict=True,
                )

                if hasattr(outputs, "hidden_states") and outputs.hidden_states:
                    hidden = outputs.hidden_states[-1]
                    emb = hidden.mean(dim=1)
                else:
                    emb = outputs.last_hidden_state.mean(dim=1)

                embeddings.append(emb.cpu())
        except Exception as e:
            print(f"⚠️ Error processing batch: {e}")
            # Debug: print shapes/types of encoded items if available
            try:
                if 'encoded' in locals():
                    for k, v in encoded.items():
                        if isinstance(v, torch.Tensor):
                            print(f"  - {k}: {tuple(v.shape)}")
                        else:
                            print(f"  - {k}: {type(v)}")
            except Exception:
                pass
            continue

    if embeddings:
        embeddings = torch.cat(embeddings, dim=0)
        return embeddings, valid_paths
    else:
        return torch.tensor([]), []

print("✓ Image embedding extraction function defined")

✓ Image embedding extraction function defined


In [9]:
# Extract embeddings from conservative and liberal images
print("\n=== Extracting Image Embeddings ===")

conservative_paths = []
liberal_paths = []

for viewpoint, topics in image_data.items():
    paths = [p for topic_paths in topics.values() for p in topic_paths]
    if viewpoint.lower() == "conservative":
        conservative_paths.extend(paths)
    elif viewpoint.lower() == "liberal":
        liberal_paths.extend(paths)

print(f"Conservative images: {len(conservative_paths)}")
print(f"Liberal images: {len(liberal_paths)}")

if conservative_paths and liberal_paths:
    cons_emb, cons_paths = extract_image_embeddings(
        conservative_paths,
        batch_size=4,
        viewpoint_label="conservative"
    )
    lib_emb, lib_paths = extract_image_embeddings(
        liberal_paths,
        batch_size=4,
        viewpoint_label="liberal"
    )
    
    print(f"\nConservative embeddings: {cons_emb.shape}")
    print(f"Liberal embeddings: {lib_emb.shape}")
else:
    print("⚠️ Insufficient images to compute directions")


=== Extracting Image Embeddings ===
Conservative images: 500
Liberal images: 500


Extracting conservative embeddings:   2%|▏         | 3/125 [00:02<01:09,  1.74it/s]

⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 12785
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (51140, 1536)
  - image_grid_thw: (4, 3)
⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1497
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (5988, 1536)
  - image_grid_thw: (4, 3)
⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1079
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (4316, 1536)
  - image_grid_thw: (4, 3)


Extracting conservative embeddings:   5%|▍         | 6/125 [00:02<00:31,  3.73it/s]

⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1481
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (5924, 1536)
  - image_grid_thw: (4, 3)
⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1516
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (6064, 1536)
  - image_grid_thw: (4, 3)
⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1672
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (6688, 1536)
  - image_grid_thw: (4, 3)


Extracting conservative embeddings:   6%|▋         | 8/125 [00:02<00:24,  4.78it/s]

⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1684
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (6736, 1536)
  - image_grid_thw: (4, 3)
⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1671
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (6684, 1536)
  - image_grid_thw: (4, 3)


Extracting conservative embeddings:   8%|▊         | 10/125 [00:03<00:21,  5.41it/s]

⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 3309
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (13236, 1536)
  - image_grid_thw: (4, 3)
⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1796
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (7184, 1536)
  - image_grid_thw: (4, 3)


Extracting conservative embeddings:   9%|▉         | 11/125 [00:03<00:20,  5.70it/s]

⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 517
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (2068, 1536)
  - image_grid_thw: (4, 3)
⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1158
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (4632, 1536)
  - image_grid_thw: (4, 3)


Extracting conservative embeddings:  10%|█         | 13/125 [00:04<00:34,  3.28it/s]

⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 7431
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (29724, 1536)
  - image_grid_thw: (4, 3)


Extracting conservative embeddings:  11%|█         | 14/125 [00:04<00:33,  3.32it/s]

⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 865
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (3460, 1536)
  - image_grid_thw: (4, 3)
⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1578
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (6312, 1536)
  - image_grid_thw: (4, 3)


Extracting conservative embeddings:  14%|█▍        | 18/125 [00:04<00:18,  5.84it/s]

⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 2131
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (8524, 1536)
  - image_grid_thw: (4, 3)
⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 871
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (3484, 1536)
  - image_grid_thw: (4, 3)
⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1532
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (6128, 1536)
  - image_grid_thw: (4, 3)


Extracting conservative embeddings:  16%|█▌        | 20/125 [00:04<00:14,  7.09it/s]

⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1042
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (4168, 1536)
  - image_grid_thw: (4, 3)
⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1283
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (5132, 1536)
  - image_grid_thw: (4, 3)


Extracting conservative embeddings:  17%|█▋        | 21/125 [00:05<00:15,  6.54it/s]

⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 2717
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (10868, 1536)
  - image_grid_thw: (4, 3)


Extracting conservative embeddings:  19%|█▉        | 24/125 [00:05<00:17,  5.71it/s]

⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 3170
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (12680, 1536)
  - image_grid_thw: (4, 3)
⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1584
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (6336, 1536)
  - image_grid_thw: (4, 3)
⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1457
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (5828, 1536)
  - image_grid_thw: (4, 3)


Extracting conservative embeddings:  21%|██        | 26/125 [00:05<00:13,  7.45it/s]

⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1190
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (4760, 1536)
  - image_grid_thw: (4, 3)
⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 667
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (2668, 1536)
  - image_grid_thw: (4, 3)
⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 638
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (2552, 1536)
  - image_grid_thw: (4, 3)


Extracting conservative embeddings:  22%|██▏       | 28/125 [00:06<00:11,  8.61it/s]

⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 887
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (3548, 1536)
  - image_grid_thw: (4, 3)
⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 920
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (3680, 1536)
  - image_grid_thw: (4, 3)


Extracting conservative embeddings:  26%|██▌       | 32/125 [00:06<00:10,  9.20it/s]

⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 2022
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (8088, 1536)
  - image_grid_thw: (4, 3)
⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1320
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (5280, 1536)
  - image_grid_thw: (4, 3)
⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1036
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (4144, 1536)
  - image_grid_thw: (4, 3)


Extracting conservative embeddings:  27%|██▋       | 34/125 [00:06<00:09,  9.87it/s]

⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1191
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (4764, 1536)
  - image_grid_thw: (4, 3)
⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1001
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (4004, 1536)
  - image_grid_thw: (4, 3)


Extracting conservative embeddings:  29%|██▉       | 36/125 [00:07<00:10,  8.43it/s]

⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 2220
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (8880, 1536)
  - image_grid_thw: (4, 3)
⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1874
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (7496, 1536)
  - image_grid_thw: (4, 3)


Extracting conservative embeddings:  30%|███       | 38/125 [00:07<00:09,  9.22it/s]

⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1147
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (4588, 1536)
  - image_grid_thw: (4, 3)
⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 923
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (3692, 1536)
  - image_grid_thw: (4, 3)


Extracting conservative embeddings:  32%|███▏      | 40/125 [00:07<00:09,  9.24it/s]

⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 2181
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (8724, 1536)
  - image_grid_thw: (4, 3)
⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1222
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (4888, 1536)
  - image_grid_thw: (4, 3)
⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 2160
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (8640, 1536)
  - image_grid_thw: (4, 3)


Extracting conservative embeddings:  35%|███▌      | 44/125 [00:07<00:09,  8.49it/s]

⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1856
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (7424, 1536)
  - image_grid_thw: (4, 3)
⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 956
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (3824, 1536)
  - image_grid_thw: (4, 3)
⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1596
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (6384, 1536)
  - image_grid_thw: (4, 3)


Extracting conservative embeddings:  36%|███▌      | 45/125 [00:08<00:09,  8.47it/s]

⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1696
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (6784, 1536)
  - image_grid_thw: (4, 3)


Extracting conservative embeddings:  37%|███▋      | 46/125 [00:08<00:12,  6.11it/s]

⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1160
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (4640, 1536)
  - image_grid_thw: (4, 3)


Extracting conservative embeddings:  39%|███▉      | 49/125 [00:08<00:10,  7.47it/s]

⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 2660
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (10640, 1536)
  - image_grid_thw: (4, 3)
⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 854
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (3416, 1536)
  - image_grid_thw: (4, 3)
⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1225
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (4900, 1536)
  - image_grid_thw: (4, 3)


Extracting conservative embeddings:  41%|████      | 51/125 [00:08<00:08,  8.46it/s]

⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 871
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (3484, 1536)
  - image_grid_thw: (4, 3)
⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1464
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (5856, 1536)
  - image_grid_thw: (4, 3)


Extracting conservative embeddings:  42%|████▏     | 53/125 [00:09<00:12,  5.64it/s]

⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 2275
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (9100, 1536)
  - image_grid_thw: (4, 3)
⚠️ Error processing batch: Image features and image tokens do not match: tokens: 0, features 1437
  - input_ids: (4, 11)
  - attention_mask: (4, 11)
  - pixel_values: (5748, 1536)
  - image_grid_thw: (4, 3)





KeyboardInterrupt: 

In [10]:
# Use NeuralController to compute/apply directions from image embeddings
# This cell attempts to reuse the project's `NeuralController` to compute directions
# from `cons_emb` and `lib_emb` and then generate controlled outputs.

from pathlib import Path
import numpy as np

try:
    from neural_controllers import NeuralController
except Exception as e:
    print(f"⚠️ Failed to import NeuralController: {e}")
    NeuralController = None

if NeuralController is None:
    print("NeuralController unavailable — ensure neural_controllers.py is on PYTHONPATH.")
else:
    # Ensure embeddings exist
    if 'cons_emb' not in globals() or 'lib_emb' not in globals():
        print("⚠️ cons_emb/lib_emb not found. Run the embedding extraction cells first.")
    else:
        # Prepare training data
        # Many of the toolkits expect 2D arrays [n_samples, dim]
        cons_X = cons_emb.numpy() if isinstance(cons_emb, torch.Tensor) else np.array(cons_emb)
        lib_X = lib_emb.numpy() if isinstance(lib_emb, torch.Tensor) else np.array(lib_emb)

        X = np.vstack([cons_X, lib_X])
        y = np.array([0]*len(cons_X) + [1]*len(lib_X))

        print(f"Prepared training data: X.shape={X.shape}, y.shape={y.shape}")

        # Instantiate NeuralController
        try:
            # NeuralController expects (model, tokenizer). For VLM we pass the model and processor.tokenizer
            tokenizer_like = getattr(processor, 'tokenizer', None)
            controller = NeuralController(model, tokenizer_like, control_method='logistic', batch_size=2)
            controller.name = 'qwen3-vl'
            print("NeuralController instantiated")

            # Try computing directions using the controller's compute_directions wrapper
            try:
                controller.compute_directions(X, y)
                print("✅ NeuralController computed directions")
            except Exception as e:
                print(f"⚠️ NeuralController.compute_directions failed: {e}")
                print("Attempting to call toolkit directly (logistic mean-diff fallback)")

                # Fallback: compute mean-diff direction manually and store in controller.directions
                cons_mean = cons_X.mean(axis=0, keepdims=True)
                lib_mean = lib_X.mean(axis=0, keepdims=True)
                direction = lib_mean - cons_mean
                direction = direction / (np.linalg.norm(direction) + 1e-10)
                # store as single-layer dummy direction to be used by hook utilities
                controller.directions = { -1: direction }
                print("✅ Fallback mean-diff direction computed and stored in controller.directions")

            # Quick controlled generation example using controller.generate
            try:
                topics = ["abortion rights", "climate change"]
                for topic in topics:
                    prompt = f"What is your view on {topic}?"
                    print(f"\n--- Controlled generate for: {topic} ---")
                    out = controller.generate(prompt, layers_to_control=[-1], control_coef=0.8, max_new_tokens=80)
                    print(out)
                print("\n✅ Controller generation attempted")
            except Exception as e:
                print(f"⚠️ Controlled generation failed: {e}")
                print("You may need to adapt `NeuralController` to the VLM model shape/API or use generation_utils hooks directly.")

        except Exception as e:
            print(f"⚠️ Failed to create NeuralController: {e}")


⚠️ Failed to import NeuralController: No module named 'neural_controllers'
NeuralController unavailable — ensure neural_controllers.py is on PYTHONPATH.


In [None]:
def compute_steering_directions(cons_emb, lib_emb, method="logistic"):
    """
    Compute steering directions from embeddings.
    """
    cons_emb = cons_emb.numpy() if isinstance(cons_emb, torch.Tensor) else cons_emb
    lib_emb = lib_emb.numpy() if isinstance(lib_emb, torch.Tensor) else lib_emb
    
    if method == "mean_diff":
        cons_mean = cons_emb.mean(axis=0, keepdims=True)
        lib_mean = lib_emb.mean(axis=0, keepdims=True)
        direction = lib_mean - cons_mean
        print(f"Method: Mean Difference")
    elif method == "logistic":
        X = np.vstack([cons_emb, lib_emb])
        y = np.array([0]*len(cons_emb) + [1]*len(lib_emb))
        clf = LogisticRegression(max_iter=200, random_state=42)
        clf.fit(X, y)
        direction = clf.coef_.reshape(1, -1)
        direction = direction / (np.linalg.norm(direction) + 1e-10)
        print(f"Method: Logistic Regression")
    
    return torch.tensor(direction, dtype=torch.float32)

if 'cons_emb' in locals() and 'lib_emb' in locals() and len(cons_emb) > 0 and len(lib_emb) > 0:
    print("\n=== Computing Steering Directions ===")
    steering_direction = compute_steering_directions(cons_emb, lib_emb, method="logistic")
    print(f"Steering direction shape: {steering_direction.shape}")

In [None]:
class SteeringHook:
    """Hook that modifies hidden states by adding a scaled direction vector"""
    def __init__(self, direction, coefficient=1.0):
        self.direction = direction.to(device)
        self.coefficient = coefficient
    
    def __call__(self, module, input, output):
        if isinstance(output, torch.Tensor):
            return output + self.coefficient * self.direction
        elif hasattr(output, 'last_hidden_state'):
            output.last_hidden_state = output.last_hidden_state + self.coefficient * self.direction
            return output
        return output

def apply_steering_to_model(model, steering_direction, coefficient=1.0, target_type="decoder"):
    """Register steering hooks on specified model layers."""
    hook = SteeringHook(steering_direction, coefficient)
    handles = []
    
    if target_type == "decoder":
        if hasattr(model, 'language_model'):
            decoder = model.language_model
        elif hasattr(model, 'model'):
            decoder = model.model
        else:
            decoder = model
        
        if hasattr(decoder, 'layers'):
            layer_list = decoder.layers
            num_layers = len(layer_list)
            target_indices = [num_layers // 2, num_layers - 1]
            
            for idx in target_indices:
                if idx < len(layer_list):
                    h = layer_list[idx].register_forward_hook(hook)
                    handles.append(h)
                    print(f"  Registered steering hook on decoder layer {idx}")
    
    return handles, hook

print("✓ Steering hook classes defined")

In [None]:
# Generate examples with and without steering
if 'steering_direction' in locals():
    print("\n=== Generation Examples ===")
    
    test_topics = ["abortion rights", "climate change"]
    
    for steering_type, coef in [("BASELINE", 0.0), ("LIBERAL", 1.0), ("CONSERVATIVE", -1.0)]:
        print(f"\n--- {steering_type} (coefficient={coef}) ---")
        
        if coef != 0.0:
            handles, _ = apply_steering_to_model(model, steering_direction, coefficient=coef)
        
        for topic in test_topics:
            prompt = f"What is your view on {topic}?"
            inputs = processor(text=prompt, return_tensors="pt", padding=True).to(device)
            
            with torch.no_grad():
                outputs = model.generate(**inputs, max_new_tokens=80)
            
            text = processor.decode(outputs[0], skip_special_tokens=True)
            print(f"\nTopic: {topic}")
            print(f"Response: {text[:200]}...")
        
        if coef != 0.0:
            for h in handles: h.remove()
    
    print("\n✅ Generation examples complete")

In [None]:
# Save directions to disk
import pickle
import json
from datetime import datetime

if 'steering_direction' in locals():
    print("\n=== Saving Steering Directions ===")
    
    directions_dir = Path("../directions")
    directions_dir.mkdir(parents=True, exist_ok=True)
    
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    direction_path = directions_dir / f"conservative_liberal_direction_{timestamp}.pkl"
    with open(direction_path, 'wb') as f:
        pickle.dump({
            'direction': steering_direction.cpu().numpy(),
            'direction_shape': steering_direction.shape,
            'computation_method': 'logistic_regression',
            'timestamp': timestamp,
            'num_conservative_images': len(cons_emb),
            'num_liberal_images': len(lib_emb),
        }, f)
    
    metadata = {
        'created_at': timestamp,
        'method': 'logistic_regression',
        'direction_file': str(direction_path),
        'model': 'Qwen/Qwen3-VL-32B',
        'political_axis': 'conservative -> liberal',
    }
    
    metadata_path = directions_dir / f"metadata_{timestamp}.json"
    with open(metadata_path, 'w') as f:
        json.dump(metadata, f, indent=2)
    
    print(f"✅ Direction saved to: {direction_path}")
    print(f"✅ Metadata saved to: {metadata_path}")