## **Step 0**: Log in to Hugging Face

In [None]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

## **Step 1**: Install Dependencies

In [2]:
!pip install transformers torch scikit-learn accelerate tqdm pandas openpyxl rouge-score nltk matplotlib -q

## **Step 2**: Imports and NLTK Download

In [3]:
import pandas as pd
import re
import os
import json
from typing import List, Dict, Tuple, Callable, Optional
from dataclasses import dataclass
import numpy as np
import torch
from torch import nn
from transformers import AutoTokenizer, AutoModelForCausalLM
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA
from tqdm import tqdm
import sys
import argparse
import nltk

# Download NLTK data needed for METEOR
nltk.download('wordnet', quiet=True)
nltk.download('punkt', quiet=True)
nltk.download('punkt_tab', quiet=True)

True

## **Step 3**: Model Wrapper (`ActivationExtractor`)

In [6]:
class ActivationExtractor:
    def __init__(self, model_name: str):
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16,
            device_map="auto"
        )
        self.device = self.model.device
        self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token

        self._layers_attr_path = self._find_layer_attr_path()
        self.num_layers = len(self._get_layers_list())
        print(f"[ActivationExtractor] Model loaded. Path: {self._layers_attr_path}, Layers: {self.num_layers}")

    def _find_layer_attr_path(self):
        candidates = [["model", "layers"], ["transformer", "h"], ["model", "decoder", "layers"]]
        for path in candidates:
            cur = self.model
            valid = True
            for p in path:
                if hasattr(cur, p): cur = getattr(cur, p)
                else: valid = False; break
            if valid and isinstance(cur, (list, nn.ModuleList)): return path
        raise AttributeError("Could not find transformer layer list in model.")

    def _get_layers_list(self):
        cur = self.model
        for p in self._layers_attr_path: cur = getattr(cur, p)
        return list(cur)

    def _resolve_layer_idx(self, idx: int):
        L = self.num_layers
        if idx < 0: idx = L + idx
        assert 0 <= idx < L, f"layer_index {idx} out of range"
        return idx
    
    @torch.no_grad()
    def get_activation_for_pair(self, input_text: str, output_text: str, layer_index: int) -> np.ndarray:
        idx = self._resolve_layer_idx(layer_index)
        concat = f"{input_text.strip()} {output_text.strip()}"
        tok = self.tokenizer(concat, return_tensors="pt").to(self.model.device)
        outputs = self.model(**tok, output_hidden_states=True, return_dict=True)
        hs = outputs.hidden_states[idx + 1]
        return hs[0, -1, :].detach().cpu().numpy()

## **Step 4**: Data Loading Function

In [None]:
def load_data_for_training_and_testing(file_path: str):
    try:
        df = pd.read_excel(file_path)
    except Exception as e:
        print(f"An error occurred while reading the Excel file: {e}")
        return None, None

    NEUTRAL_COL = 'response_Neutral'
    STYLED_COL = 'response_styled'
    
    required_cols = ['date', 'Time', 'Venue', 'OccasionType', 'Host', 'Event', NEUTRAL_COL, STYLED_COL]
    if not all(col in df.columns for col in required_cols):
        print("Error: Missing one of the required columns.")
        print(f"Script needs: {required_cols}")
        print(f"Found in file: {df.columns.to_list()}")
        return None, None

    print(f"INFO: Loaded {len(df)} examples from the file.")

    train_examples = []
    test_examples = []

    for idx, row in df.iterrows():
        date = row.get('date', 'N/A')
        time_ = row.get('Time', 'N/A')
        venue = row.get('Venue', 'N/A')
        occasion = row.get('OccasionType', 'N/A')
        host = row.get('Host', 'N/A')
        event = row.get('Event', 'N/A')
        
        neutral_email = row.get(NEUTRAL_COL)
        styled_email = row.get(STYLED_COL)

        if pd.isna(neutral_email) or pd.isna(styled_email):
            continue

        prompt = f"""
Write an email inviting participants to the following event.
Ensure the email tone matches the style instruction and stays under 100 words.

Event Details:
- Date: {date}
- Time: {time_}
- Venue: {venue}
- Occasion Type: {occasion}
- Host: {host}
- Event: {event}
"""
        # Add to training set (all examples)
        train_examples.append((prompt, styled_email, neutral_email))

        # Add to test set (first 20 examples)
        if idx < 20:
            test_examples.append((prompt, styled_email))

    print(f"Using {len(train_examples)} for training and {len(test_examples)} for testing.")
    
    train_hist = {"user_1": train_examples} 
    test_hist = {"user_1": test_examples} 

    return train_hist, test_hist

## **Step 5**: Run Extraction

In [None]:
if __name__ == "__main__":
    # This fix is for Jupyter notebooks
    if 'ipykernel' in sys.modules:
        sys.argv = sys.argv[:1]

    parser = argparse.ArgumentParser()
    parser.add_argument("--model", type=str, default="meta-llama/Llama-2-7b-hf")
    parser.add_argument("--layer", type=int, default=-15)
    parser.add_argument("--xlsx_file", type=str, default="generated_email_responses (1).xlsx")
    parser.add_argument("--output_file", type=str, default="activations.npz")
    args = parser.parse_args()

    print(f"Running Activation Extraction: model={args.model}, layer={args.layer}")
    
    # 1. Load data
    train_hist, test_hist = load_data_for_training_and_testing(args.xlsx_file)
    if not train_hist:
        print("Halting execution due to data loading error.")
    else:
        # 2. Load model
        ae = ActivationExtractor(args.model)

        # 3. Extract Activations on all 40 examples
        user_id = "user_1"
        examples = train_hist[user_id]
        
        print(f"\n[Pipeline] Extracting activations for '{user_id}' with {len(examples)} examples...")
        pos_acts, neg_acts = [], []
        for (inp_prompt, user_out, neutral_out) in tqdm(examples, desc="Extracting training activations"):
            pos_acts.append(ae.get_activation_for_pair(inp_prompt, user_out, args.layer))
            neg_acts.append(ae.get_activation_for_pair(inp_prompt, neutral_out, args.layer))

        # 4. Save the activations to a file
        pos_arr = np.vstack(pos_acts)
        neg_arr = np.vstack(neg_acts)
        
        np.savez_compressed(args.output_file, pos_acts=pos_arr, neg_acts=neg_arr)
        
        print(f"\n[SUCCESS] Activations saved successfully to '{args.output_file}'")
        #print("You can now run the '2_Evaluate_From_Saved.ipynb' notebook.")

Running Activation Extraction: model=meta-llama/Llama-2-7b-hf, layer=-15
INFO: Loaded 41 examples from the file.
Using 41 for training and 20 for testing.


`torch_dtype` is deprecated! Use `dtype` instead!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the disk and cpu.


[ActivationExtractor] Model loaded. Path: ['model', 'layers'], Layers: 32

[Pipeline] Extracting activations for 'user_1' with 41 examples...


Extracting training activations: 100%|██████████| 41/41 [56:00<00:00, 81.96s/it] 


[SUCCESS] Activations saved successfully to 'activations.npz'
You can now run the '2_Evaluate_From_Saved.ipynb' notebook.



