## **Step 0**: Log in to Hugging Face

In [1]:
try:
    from huggingface_hub import notebook_login
    notebook_login()
except ImportError:
    print("huggingface_hub not found. Please log in using 'huggingface-cli login' in your terminal.")

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svâ€¦

## **Step 1**: Install Dependencies

In [2]:
%pip install transformers torch scikit-learn accelerate tqdm pandas openpyxl numpy -q

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


## **Step 2**: Imports and Helper Classes

This cell defines all the necessary functions and classes: the model wrapper (for generation), the steering hook, and the vector calculation functions.

In [3]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from transformers import AutoTokenizer, AutoModelForCausalLM
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA
from tqdm import tqdm
import sys
import argparse

# --- Lightweight Model Wrapper (for Generation) ---
class ModelSteeringWrapper:
    def __init__(self, model_name: str):
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16,
            device_map="auto"
        )
        self.device = self.model.device
        self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token

        self._layers_attr_path = self._find_layer_attr_path()
        self.num_layers = len(self._get_layers_list())
        print(f"[ModelSteeringWrapper] Model loaded. Path: {self._layers_attr_path}, Layers: {self.num_layers}")

    def _find_layer_attr_path(self):
        candidates = [["model", "layers"], ["transformer", "h"], ["model", "decoder", "layers"]]
        for path in candidates:
            cur = self.model
            valid = True
            for p in path:
                if hasattr(cur, p): cur = getattr(cur, p)
                else: valid = False; break
            if valid and isinstance(cur, (list, nn.ModuleList)): return path
        raise AttributeError("Could not find transformer layer list in model.")

    def _get_layers_list(self):
        cur = self.model
        for p in self._layers_attr_path: cur = getattr(cur, p)
        return list(cur)

    def generate(self, prompt: str, max_new_tokens: int = 150, **kwargs) -> str:
        tok = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
        input_token_len = tok.input_ids.shape[1]
        out = self.model.generate(**tok, max_new_tokens=max_new_tokens, pad_token_id=self.tokenizer.pad_token_id, **kwargs)
        full_tokens = out[0]
        new_tokens = full_tokens[input_token_len:]
        generated_text = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
        return generated_text.strip()

# --- Style Vector Extraction Methods ---
def compute_mean_difference(pos: np.ndarray, neg: np.ndarray) -> np.ndarray:
    diff = (pos - neg).mean(axis=0)
    return diff / (np.linalg.norm(diff) + 1e-12)

def compute_logistic_regression(pos: np.ndarray, neg: np.ndarray) -> np.ndarray:
    X = np.vstack([pos, neg])
    y = np.concatenate([np.ones(len(pos)), np.zeros(len(neg))])
    clf = LogisticRegression(max_iter=1000).fit(X, y)
    w = clf.coef_.reshape(-1)
    return w / (np.linalg.norm(w) + 1e-12)

def compute_pca_vector(pos: np.ndarray, neg: np.ndarray) -> np.ndarray:
    diffs = pos - neg
    pca = PCA(n_components=1).fit(np.vstack([diffs, -diffs]))
    vec = pca.components_[0]
    return vec / (np.linalg.norm(vec) + 1e-12)

# --- Steering Hook Class ---
class SteeringHook:
    def __init__(self, model, layer_path, layer_idx, style_vector, multiplier):
        self.model, self.layer_path, self.layer_idx = model, layer_path, layer_idx
        self.style_vector_cpu = torch.from_numpy(style_vector).float() * multiplier
        self.handle = None
        self._register_hook()

    def _get_layer_module(self):
        cur = self.model
        for p in self.layer_path: cur = getattr(cur, p)
        idx = self.layer_idx if self.layer_idx >= 0 else len(cur) + self.layer_idx
        return cur[idx]

    def _hook(self, module, input, output):
        tensor_output = output[0] if isinstance(output, tuple) else output
        add_vec = self.style_vector_cpu.to(tensor_output.device, dtype=tensor_output.dtype)
        modified_tensor = tensor_output + add_vec.view(1, 1, -1)
        return (modified_tensor,) + output[1:] if isinstance(output, tuple) else modified_tensor

    def _register_hook(self):
        self.handle = self._get_layer_module().register_forward_hook(self._hook)

    def remove(self):
        if self.handle: self.handle.remove()

## **Step 3**: Load Activations, Compute Vectors, and Run Test

This is the main driver cell. It loads the saved activations, calculates the three vectors, and generates a steered response for the *first email* in the spreadsheet using each method.

In [4]:
def run_inference_test(model_name: str, layer_index: int, xlsx_path: str, activations_path: str):
    
    # --- 1. Load Activations and Compute Vectors ---
    try:
        data = np.load(activations_path)
        pos_arr = data['pos_acts']
        neg_arr = data['neg_acts']
        print(f"Successfully loaded activations from '{activations_path}'")
    except Exception as e:
        print(f"Error loading '{activations_path}'. Please run the activation extraction script first.")
        print(f"Error details: {e}")
        return

    print("Computing style vectors...")
    style_vectors = {
        "mean": compute_mean_difference(pos_arr, neg_arr),
        "logreg": compute_logistic_regression(pos_arr, neg_arr),
        "pca": compute_pca_vector(pos_arr, neg_arr)
    }
    print("All style vectors computed.")

    # --- 2. Load the First Row from Excel for the Test --- 
    try:
        df = pd.read_excel(xlsx_path, nrows=2)
        test_row = df.iloc[1]
    except Exception as e:
        print(f"Error reading Excel file '{xlsx_path}': {e}")
        return

   # --- Check for correct columns (case-sensitive) ---
    NEUTRAL_COL = 'response_Neutral' # Or 'resonse_neutral'?
    STYLED_COL = 'response_styled'
    FACT_COLS = ['date', 'Time', 'Venue', 'OccasionType', 'Host', 'Event']
    required_cols = FACT_COLS + [NEUTRAL_COL, STYLED_COL]
    
    if not all(col in df.columns for col in required_cols):
        print("Error: Missing one of the required columns in your Excel file.")
        print(f"Found: {df.columns.to_list()}")
        return

    # --- Dynamically build the prompt from the first row ---
    test_row = df.iloc[1]

    # Get facts from the row
    event = str(test_row.get('Event', 'N/A'))
    date = str(test_row.get('date', 'N/A'))
    time_ = str(test_row.get('Time', 'N/A'))
    venue = str(test_row.get('Venue', 'N/A'))
    host = str(test_row.get('Host', 'N/A'))

    # 1. Build the test_query in the exact format requested
    test_query = f"Draft an email invitation for the {event}, scheduled for {date}, at {time_} in the {venue}. The event is hosted and sent by {host}."

    # 2. Get the ideal response from the file
    ideal_response = str(test_row.get(STYLED_COL))

    # 3. Extract the subject line from the ideal response to build the final prompt
    try:
        subject_line = ideal_response.split('\n')[0]
    except Exception:
        subject_line = "Subject: Invitation" # Fallback

    prompt = f"{test_query}\n\n{subject_line}\n\n"

    # --- 3. Load Model --- 
    print("Loading Llama 2 model... (This may take a few minutes)")
    ae = ModelSteeringWrapper(model_name)

    # --- 4. Run Steering Demonstration --- 
    print("\n" + "="*50)
    print("Steering Demonstration for First Email")
    print("="*50)
    print(f"Test Query:\n{prompt}")
    print("\n--- Ideal Styled Response (from file) ---")
    print(ideal_response)

    # Use the multiplier you found was best (e.g., 3.0 or 0.5)
    # Or just a standard one to compare them (e.g., 2.0)
    MULTIPLIER = 3.0 

    for method, style_vec in style_vectors.items():
        print(f"\n--- Steered Generated Response (Live, Method: {method.upper()}) ---")
        
        hook = SteeringHook(ae.model, ae._layers_attr_path, layer_index, style_vec, MULTIPLIER)
        try:
            steered_out = ae.generate(prompt, temperature=0.7, do_sample=True, top_p=0.9)
            print(steered_out)
        finally:
            hook.remove()

if __name__ == "__main__":
    # This allows running from a notebook cell
    if 'ipykernel' in sys.modules: sys.argv = sys.argv[:1]

    parser = argparse.ArgumentParser()
    parser.add_argument("--model", type=str, default="meta-llama/Llama-2-7b-hf")
    parser.add_argument("--layer", type=int, default=-15)
    parser.add_argument("--xlsx_file", type=str, default="generated_email_responses (1).xlsx")
    parser.add_argument("--activations_file", type=str, default="activations.npz")
    args = parser.parse_args()

    print(f"Running inference: model={args.model}, layer={args.layer}")
    run_inference_test(args.model, args.layer, args.xlsx_file, args.activations_file)
    print("\nTest finished.")

Running inference: model=meta-llama/Llama-2-7b-hf, layer=-15
Successfully loaded activations from 'activations.npz'
Computing style vectors...
All style vectors computed.
Loading Llama 2 model... (This may take a few minutes)


`torch_dtype` is deprecated! Use `dtype` instead!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.


[ModelSteeringWrapper] Model loaded. Path: ['model', 'layers'], Layers: 32

Steering Demonstration for First Email
Test Query:
Draft an email invitation for the Live SouJava- Microservice Patterns - Implemented by Eclipse Microprofile, scheduled for 2020-04-20 00:00:00, at 07:00:00 in the San-Francisco. The event is hosted and sent by Product School Bangalore.

Subject: You're Invited! Live SouJava: Microservice Patterns in San-Francisco!



--- Ideal Styled Response (from file) ---
Subject: You're Invited! Live SouJava: Microservice Patterns in San-Francisco!

Hi there,

We'd be delighted if you could join us for a special Product School Bangalore event!

Live SouJava: Microservice Patterns - Implemented by Eclipse Microprofile

It's happening on April 20th at 7:00 AM in San-Francisco. We think you'll find this blend of technology and business insights super valuable.

Hope to see you there!

Warmly,

The Product School Bangalore Team

--- Steered Generated Response (Live, Method: MEA