## **Step 0**: Log in to Hugging Face (Required)

In [3]:
!pip install huggingface_hub
!pip install ipywidgets
!pip install --upgrade transformers huggingface_hub accelerate

Collecting huggingface_hub
  Using cached huggingface_hub-1.1.2-py3-none-any.whl.metadata (13 kB)
Collecting hf-xet<2.0.0,>=1.2.0 (from huggingface_hub)
  Using cached hf_xet-1.2.0-cp37-abi3-win_amd64.whl.metadata (5.0 kB)
Collecting httpx<1,>=0.23.0 (from huggingface_hub)
  Using cached httpx-0.28.1-py3-none-any.whl.metadata (7.1 kB)
Collecting pyyaml>=5.1 (from huggingface_hub)
  Using cached pyyaml-6.0.3-cp311-cp311-win_amd64.whl.metadata (2.4 kB)
Collecting shellingham (from huggingface_hub)
  Using cached shellingham-1.5.4-py2.py3-none-any.whl.metadata (3.5 kB)
Collecting tqdm>=4.42.1 (from huggingface_hub)
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting typer-slim (from huggingface_hub)
  Using cached typer_slim-0.20.0-py3-none-any.whl.metadata (16 kB)
Collecting anyio (from httpx<1,>=0.23.0->huggingface_hub)
  Using cached anyio-4.11.0-py3-none-any.whl.metadata (4.1 kB)
Collecting certifi (from httpx<1,>=0.23.0->huggingface_hub)
  Using cached certifi-202


[notice] A new release of pip is available: 24.0 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting ipywidgets
  Using cached ipywidgets-8.1.8-py3-none-any.whl.metadata (2.4 kB)
Collecting widgetsnbextension~=4.0.14 (from ipywidgets)
  Using cached widgetsnbextension-4.0.15-py3-none-any.whl.metadata (1.6 kB)
Collecting jupyterlab_widgets~=3.0.15 (from ipywidgets)
  Using cached jupyterlab_widgets-3.0.16-py3-none-any.whl.metadata (20 kB)
Using cached ipywidgets-8.1.8-py3-none-any.whl (139 kB)
Using cached jupyterlab_widgets-3.0.16-py3-none-any.whl (914 kB)
Using cached widgetsnbextension-4.0.15-py3-none-any.whl (2.2 MB)
Installing collected packages: widgetsnbextension, jupyterlab_widgets, ipywidgets
Successfully installed ipywidgets-8.1.8 jupyterlab_widgets-3.0.16 widgetsnbextension-4.0.15



[notice] A new release of pip is available: 24.0 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting transformers
  Using cached transformers-4.57.1-py3-none-any.whl.metadata (43 kB)
Collecting accelerate
  Using cached accelerate-1.11.0-py3-none-any.whl.metadata (19 kB)
Collecting huggingface_hub
  Using cached huggingface_hub-0.36.0-py3-none-any.whl.metadata (14 kB)
Collecting regex!=2019.12.17 (from transformers)
  Using cached regex-2025.11.3-cp311-cp311-win_amd64.whl.metadata (41 kB)
Collecting requests (from transformers)
  Using cached requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting tokenizers<=0.23.0,>=0.22.0 (from transformers)
  Using cached tokenizers-0.22.1-cp39-abi3-win_amd64.whl.metadata (6.9 kB)
Collecting safetensors>=0.4.3 (from transformers)
  Using cached safetensors-0.6.2-cp38-abi3-win_amd64.whl.metadata (4.1 kB)
Collecting charset_normalizer<4,>=2 (from requests->transformers)
  Using cached charset_normalizer-3.4.4-cp311-cp311-win_amd64.whl.metadata (38 kB)
Collecting urllib3<3,>=1.21.1 (from requests->transformers)
  Using cached urllib3


[notice] A new release of pip is available: 24.0 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
# You must log in again in a new notebook
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

## **Step 1**: Install Dependencies

In [5]:
!pip install transformers torch scikit-learn accelerate tqdm pandas openpyxl rouge-score nltk matplotlib -q


[notice] A new release of pip is available: 24.0 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


## **Step 2**: Importing libraries

In [6]:
import pandas as pd
import re
import os
import json
from typing import List, Dict, Tuple, Callable, Optional
from dataclasses import dataclass
import numpy as np
import torch
from torch import nn
from transformers import AutoTokenizer, AutoModelForCausalLM
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA
from tqdm import tqdm
import sys
import argparse

# --- Import Scoring Libraries ---
from rouge_score import rouge_scorer
from nltk.translate.meteor_score import meteor_score
import nltk

nltk.download('wordnet', quiet=True)
nltk.download('punkt', quiet=True)
nltk.download('punkt_tab', quiet=True)

True

## **Step 3**: Lightweight Model Wrapper (Generation Only)

In [7]:
class ModelGenerator:
    def __init__(self, model_name: str):
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16,
            device_map="auto"
        )
        self.device = self.model.device
        self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token

        self._layers_attr_path = self._find_layer_attr_path()
        self.num_layers = len(self._get_layers_list())
        print(f"[ModelGenerator] Model loaded. Path: {self._layers_attr_path}, Layers: {self.num_layers}")

    def _find_layer_attr_path(self):
        candidates = [["model", "layers"], ["transformer", "h"], ["model", "decoder", "layers"]]
        for path in candidates:
            cur = self.model
            valid = True
            for p in path:
                if hasattr(cur, p): cur = getattr(cur, p)
                else: valid = False; break
            if valid and isinstance(cur, (list, nn.ModuleList)): return path
        raise AttributeError("Could not find transformer layer list in model.")

    def _get_layers_list(self):
        cur = self.model
        for p in self._layers_attr_path: cur = getattr(cur, p)
        return list(cur)

    def generate(self, prompt: str, max_new_tokens: int = 150, **kwargs) -> str:
        tok = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
        out = self.model.generate(**tok, max_new_tokens=max_new_tokens, pad_token_id=self.tokenizer.pad_token_id, **kwargs)
        full_text = self.tokenizer.decode(out[0], skip_special_tokens=True)
        if full_text.startswith(prompt):
             return full_text[len(prompt):].strip()
        return full_text.strip()

## **Step 4**: Vector & Hook Definitions

In [8]:
def compute_mean_difference(pos: np.ndarray, neg: np.ndarray) -> np.ndarray:
    diff = (pos - neg).mean(axis=0)
    return diff / (np.linalg.norm(diff) + 1e-12)

def compute_logistic_regression(pos: np.ndarray, neg: np.ndarray) -> np.ndarray:
    X = np.vstack([pos, neg])
    y = np.concatenate([np.ones(len(pos)), np.zeros(len(neg))])
    clf = LogisticRegression(max_iter=1000).fit(X, y)
    w = clf.coef_.reshape(-1)
    return w / (np.linalg.norm(w) + 1e-12)

def compute_pca_vector(pos: np.ndarray, neg: np.ndarray) -> np.ndarray:
    diffs = pos - neg
    pca = PCA(n_components=1).fit(np.vstack([diffs, -diffs]))
    vec = pca.components_[0]
    return vec / (np.linalg.norm(vec) + 1e-12)

class SteeringHook:
    def __init__(self, model, layer_path, layer_idx, style_vector, multiplier):
        self.model, self.layer_path, self.layer_idx = model, layer_path, layer_idx
        self.style_vector_cpu = torch.from_numpy(style_vector).float() * multiplier
        self.handle = None
        self._register_hook()

    def _get_layer_module(self):
        cur = self.model
        for p in self.layer_path: cur = getattr(cur, p)
        idx = self.layer_idx if self.layer_idx >= 0 else len(cur) + self.layer_idx
        return cur[idx]

    def _hook(self, module, input, output):
        tensor_output = output[0] if isinstance(output, tuple) else output
        add_vec = self.style_vector_cpu.to(tensor_output.device, dtype=tensor_output.dtype)
        modified_tensor = tensor_output + add_vec.view(1, 1, -1)
        return (modified_tensor,) + output[1:] if isinstance(output, tuple) else modified_tensor

    def _register_hook(self):
        self.handle = self._get_layer_module().register_forward_hook(self._hook)

    def remove(self):
        if self.handle: self.handle.remove()

## **Step 5**: Data Loading Function

In [None]:
def load_data_for_training_and_testing(file_path: str):
    """
    Loads data from the XLSX file, creates dynamic prompts,
    and returns a training set (all 40) and a test set (first 20).
    """
    try:
        df = pd.read_excel(file_path)
    except Exception as e:
        print(f"An error occurred while reading the Excel file: {e}")
        return None, None

    # --- Check your column names here ---
    NEUTRAL_COL = 'response_Neutral'
    STYLED_COL = 'response_styled'
    # -----------------------------------

    required_cols = ['date', 'Time', 'Venue', 'OccasionType', 'Host', 'Event', NEUTRAL_COL, STYLED_COL]
    if not all(col in df.columns for col in required_cols):
        print(f"Error: Missing one of the required columns. Found: {df.columns}")
        return None, None

    print(f"INFO: Loaded {len(df)} examples from the file.")

    train_examples = []
    test_examples = []

    for idx, row in df.iterrows():
        date = row.get('date', 'N/A')
        time_ = row.get('Time', 'N/A')
        venue = row.get('Venue', 'N/A')
        occasion = row.get('OccasionType', 'N/A')
        host = row.get('Host', 'N/A')
        event = row.get('Event', 'N/A')
        
        neutral_email = row.get(NEUTRAL_COL)
        styled_email = row.get(STYLED_COL)

        if pd.isna(neutral_email) or pd.isna(styled_email):
            continue

        prompt = f"""
Write an email inviting participants to the following event.
Ensure the email tone matches the style instruction and stays under 100 words.

Event Details:
- Date: {date}
- Time: {time_}
- Venue: {venue}
- Occasion Type: {occasion}
- Host: {host}
- Event: {event}
"""
        # Add to training set (all examples)
        train_examples.append((prompt, styled_email, neutral_email))

        # Add to test set (first 20 examples)
        if idx < 20:
            test_examples.append((prompt, styled_email))

    print(f"Using {len(train_examples)} for training and {len(test_examples)} for testing.")
    
    train_hist = {"user_1": train_examples} 
    test_hist = {"user_1": test_examples} 

    return train_hist, test_hist

## **Step 6**: Run Evaluation

In [10]:
if __name__ == "__main__":
    if 'ipykernel' in sys.modules: sys.argv = sys.argv[:1]

    parser = argparse.ArgumentParser()
    parser.add_argument("--model", type=str, default="meta-llama/Llama-2-7b-hf")
    parser.add_argument("--layer", type=int, default=-15)
    parser.add_argument("--xlsx_file", type=str, default="generated_email_responses (1).xlsx")
    parser.add_argument("--input_file", type=str, default="activations.npz")
    args = parser.parse_args()

    print(f"Running Evaluation: model={args.model}, layer={args.layer}")

    # 1. Load test data (we only need the test set here)
    _, test_hist = load_data_for_training_and_testing(args.xlsx_file)
    if not test_hist:
        print("Halting execution due to data loading error.")
    else:
        # 2. Load the saved activations
        try:
            data = np.load(args.input_file)
            pos_arr = data['pos_acts']
            neg_arr = data['neg_acts']
            print(f"Activations loaded successfully from '{args.input_file}'")
        except FileNotFoundError:
            print(f"Error: The file '{args.input_file}' was not found.")
            print("Please run the '1_Extract_Activations.ipynb' script first.")
            sys.exit(1)
        except Exception as e:
            print(f"An error occurred loading activations: {e}")
            sys.exit(1)

        # 3. Compute Style Vectors
        user_style_vectors = {
            "mean": compute_mean_difference(pos_arr, neg_arr),
            "logreg": compute_logistic_regression(pos_arr, neg_arr),
            "pca": compute_pca_vector(pos_arr, neg_arr)
        }
        print("[Pipeline] Computed style vectors.")

        # 4. Load the model for generation
        ae_gen = ModelGenerator(args.model)

        # 5. Run Automated Evaluation
        print("\n" + "="*50)
        print(f"Automated Evaluation on {len(test_hist['user_1'])} Test Examples")
        print("="*50)
        
        scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
        scores = {
            "mean": {"rougeL": [], "meteor": []},
            "logreg": {"rougeL": [], "meteor": []},
            "pca": {"rougeL": [], "meteor": []}
        }
        
        test_examples = test_hist["user_1"]
        
        for (inp_prompt, ideal_user_email) in tqdm(test_examples, desc="Evaluating on test set"):
            
            ideal_email_cleaned = ideal_user_email.replace(inp_prompt, "").strip()
            ideal_tokens = nltk.word_tokenize(ideal_email_cleaned)
            if not ideal_tokens: ideal_tokens = [""]

            for method, style_vec in user_style_vectors.items():
                hook = SteeringHook(ae_gen.model, ae_gen._layers_attr_path, args.layer, style_vec, 2.0)
                try:
                    steered_out_full = ae_gen.generate(inp_prompt, temperature=0.7, do_sample=True, top_p=0.9)
                    steered_out_cleaned = steered_out_full.replace(inp_prompt, "").strip()

                    rouge_scores = scorer.score(ideal_user_email, steered_out_full)
                    scores[method]["rougeL"].append(rouge_scores['rougeL'].fmeasure)
                    
                    steered_tokens = nltk.word_tokenize(steered_out_cleaned)
                    if not steered_tokens: steered_tokens = [""]
                    meteor = meteor_score([ideal_tokens], steered_tokens)
                    scores[method]["meteor"].append(meteor)
                    
                finally:
                    hook.remove()

        # 6. Print Final Results
        print("\n" + "="*50)
        print("Evaluation Results (Average F1-Score)")
        print("="*50)
        print(f"{'Method':<10} | {'Avg. ROUGE-L':<15} | {'Avg. METEOR':<15}")
        print("-" * 42)
        
        for method in scores.keys():
            avg_rouge = np.mean(scores[method]['rougeL'])
            avg_meteor = np.mean(scores[method]['meteor'])
            print(f"{method.upper():<10} | {avg_rouge:<15.4f} | {avg_meteor:<15.4f}")

        print("\nPipeline finished successfully.")

Running Evaluation: model=meta-llama/Llama-2-7b-hf, layer=-15
INFO: Loaded 41 examples from the file.
Using 41 for training and 20 for testing.
Activations loaded successfully from 'activations.npz'
[Pipeline] Computed style vectors.


`torch_dtype` is deprecated! Use `dtype` instead!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the disk and cpu.


[ModelGenerator] Model loaded. Path: ['model', 'layers'], Layers: 32

Automated Evaluation on 20 Test Examples


Evaluating on test set: 100%|██████████| 20/20 [2:48:59<00:00, 506.97s/it]  


Evaluation Results (Average F1-Score)
Method     | Avg. ROUGE-L    | Avg. METEOR    
------------------------------------------
MEAN       | 0.1635          | 0.1608         
LOGREG     | 0.1523          | 0.1609         
PCA        | 0.1795          | 0.1847         

Pipeline finished successfully.



