In [1]:
import sys
import torch as t
from torch import Tensor
import torch.nn as nn
import torch.nn.functional as F
from pathlib import Path
import numpy as np
import einops
from jaxtyping import Int, Float
import functools
from tqdm import tqdm
from IPython.display import display
from transformer_lens.hook_points import HookPoint
from transformer_lens import (
    utils,
    HookedTransformer,
    HookedTransformerConfig,
    FactoredMatrix,
    ActivationCache,
)
import circuitsvis as cv

from optim_hunter.plotly_utils import imshow, hist, plot_comp_scores, plot_logit_attribution, plot_loss_difference

# Saves computation time, since we don't need it for the contents of this notebook
t.set_grad_enabled(False)

#device = t.device("cuda:0,1" if t.cuda.is_available() else "cpu")
device = t.device("cuda:0" if t.cuda.is_available() else "cpu")
# device = t.device("cpu")

MAIN = __name__ == "__main__"

In [2]:
# Load directly from model path https://github.com/TransformerLensOrg/TransformerLens/issues/691
from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer

MODEL_TYPE = "meta-llama/Meta-Llama-3-8B-Instruct"
MODEL_PATH = "/home/freiza/optim_hunter/.models/Llama-3.1-8B-Instruct/"

if MODEL_PATH:
    tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
    hf_model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, low_cpu_mem_usage=True,
                                                     #quantization_config=BitsAndBytesConfig(load_in_4bit=True), 
                                                     #torch_dtype = t.float32, 
                                                     #device_map = "cuda:0"
                                                     )

    tokenizer.padding_side = 'left'
    tokenizer.pad_token = tokenizer.eos_token

    model = HookedTransformer.from_pretrained(
        MODEL_TYPE,
        hf_model=hf_model,
        device="cuda",
        n_devices=2,
        fold_ln=False,
        fold_value_biases=False,
        enter_writing_weights=False,
        center_unembed=False,
        dtype=t.bfloat16,
        default_padding_side='left',
        tokenizer=tokenizer
    )

    #model = model.to("cuda" if t.cuda.is_available() else "cpu")
    #model.generate("The capital of Germany is", max_new_tokens=20, temperature=0)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]



Loaded pretrained model meta-llama/Meta-Llama-3-8B-Instruct into HookedTransformer


In [3]:
def prepare_prompt(x_train, y_train, x_test):
    """
    Prepare the prompt without using LangChain while maintaining exact same format
    """
    # Get input variables (features)
    input_variables = x_train.columns.to_list()

    # Create examples list of dicts combining x and y values
    examples = [{**x1, y_train.name: x2} for x1, x2 in zip(x_train.to_dict('records'), y_train)]

    # Create the template for examples
    template = [f"{feature}: {{{feature}}}" for feature in x_train.columns]
    template.append(f"{y_train.name}: {{{y_train.name}}}")
    template = "\n".join(template)

    # Create suffix (test case format)
    suffix = [f"{feature}: {{{feature}}}" for feature in x_train.columns]
    suffix.append(f"{y_train.name}:")
    suffix = "\n".join(suffix)

    # Format all examples using the template
    formatted_examples = [template.format(**example) for example in examples]
    examples_text = "\n\n".join(formatted_examples)

    # Format the test case using the suffix
    test_case = suffix.format(**x_test.to_dict('records')[0])

    # Add instruction prefix
    prefix_instruction = 'The task is to provide your best estimate for "Output". Please provide that and only that, without any additional text.\n\n\n\n\n'

    # Combine everything
    final_prompt = f"{prefix_instruction}{examples_text}\n\n{test_case}"

    return final_prompt

In [4]:
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

def linear_regression(x_train, x_test, y_train, y_test, random_state=1):
    model = LinearRegression()
    model.fit(x_train, y_train)
    y_predict = model.predict(x_test)
    y_test    = y_test.to_numpy()

    return y_predict

def mlp(x_train, x_test, y_train, y_test, random_state=1):
    """
    Multi-Layer Perceptron
    """
    model = MLPRegressor(hidden_layer_sizes=(100, ), activation='relu', solver='lbfgs', random_state=random_state)
    model.fit(x_train, y_train)
    y_predict = model.predict(x_test)
    y_test    = y_test.to_numpy()

    return y_predict

def gradient_boosting(x_train, x_test, y_train, y_test, random_state=1):
    """
    Gradient Boosting Regressor
    """
    model = GradientBoostingRegressor(random_state=random_state)
    model.fit(x_train, y_train)
    y_predict = model.predict(x_test)
    y_test    = y_test.to_numpy()

    return y_predict

def random_forest(x_train, x_test, y_train, y_test, random_state=1):
    """
    Random Forest Regressor
    """
    model = RandomForestRegressor(max_depth=3, random_state=random_state)
    model.fit(x_train, y_train)
    y_predict = model.predict(x_test)
    y_test    = y_test.to_numpy()

    return y_predict

In [5]:
#############################
## Friedman #2 Dataset
##############################
# Here, we will use Friedman #2
from sklearn.datasets import make_friedman2
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

def get_dataset1(random_state=1):

    # The data from sklearn
    r_data, r_values = make_friedman2(n_samples=51, noise=0, random_state=random_state)

    # Create a dataframe; Not mandatory, but makes things easier
    df = pd.DataFrame({**{f'Feature {i}': r_data[:, i] for i in range(r_data.shape[1])}, 'Output': r_values})
    x = df.drop(['Output'], axis=1)
    y = df['Output']

    # Round the values to 2 decimal places
    # Not mandatory, but helps to: (1) Keep the costs low, (2) Work with the same numbers of examples with models that have a smaller context (e.g., Yi, Llama, etc)
    x = np.round(x, 2)
    y = np.round(y, 2)

    # Do a random split
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=1, random_state=random_state)


    x_train = x_train.iloc[:50]
    y_train = y_train.iloc[:50]
    x_test  = x_test.iloc[:1]
    y_test  = y_test.iloc[:1]

    return x_train, y_train, x_test, y_test

In [6]:
def slice_dataset(x_train, y_train, x_test, y_test, n=10):
    """
    Slice the first n items from each dataset while preserving DataFrame structure
    
    Args:
        x_train (pd.DataFrame): Training features
        y_train (pd.Series): Training labels
        x_test (pd.DataFrame): Test features
        y_test (pd.Series): Test labels
        n (int): Number of items to keep
        
    Returns:
        tuple: (x_train_slice, y_train_slice, x_test_slice, y_test_slice)
    """
    x_train_slice = x_train.iloc[:n]
    y_train_slice = y_train.iloc[:n]
    x_test_slice = x_test.iloc[:n]
    y_test_slice = y_test.iloc[:n]
    
    return x_train_slice, y_train_slice, x_test_slice, y_test_slice

In [7]:
def generate_linreg_tokens(
    model: HookedTransformer,
    dataset,
    seq_len = 5,
    batch: int = 1
) -> Int[Tensor, "batch full_seq_len"]:
    '''
    Generates a sequence of linear regression ICL tokens

    Outputs are:
        linreg_tokens: [batch, 1+linreg]
    '''
    prefix = (t.ones(batch, 1) * model.tokenizer.bos_token_id).long().to(device)
    
    # Create list to store tokens for each batch
    batch_tokens = []
    
    # Generate tokens for each batch with different random seeds
    for i in range(batch):
        # TODO fix so that we can use random_state=i
        x_train, y_train, x_test, y_test = get_dataset1(random_state=1)
        x_train, y_train, x_test, y_test =  slice_dataset(x_train, y_train, x_test, y_test, seq_len)
        prompt = prepare_prompt(x_train, y_train, x_test)
        tokens = model.to_tokens(prompt, truncate=True)
        batch_tokens.append(tokens[0])
    
    # Stack all batches together
    linreg_tokens = t.stack(batch_tokens).to(device)
    
    # Add prefix to each batch
    linreg_tokens = t.cat([prefix, linreg_tokens], dim=-1).to(device)
    return linreg_tokens

def run_and_cache_model_linreg_tokens(model: HookedTransformer, seq_len: int, batch: int = 1) -> tuple[Tensor, Tensor, ActivationCache]:
    '''
    Generates a sequence of linear regression ICL tokens, and runs the model on it, returning (tokens, logits, cache)

    Should use the `generate_linreg_tokens` function above

    Outputs are:
        linreg_tokens: [batch, 1+linreg]
        linreg_logits: [batch, 1+linreg, d_vocab]
        linreg_cache: The cache of the model run on linreg_tokens
    '''
    linreg_tokens = generate_linreg_tokens(model, get_dataset1, seq_len, batch)
    linreg_logits, linreg_cache = model.run_with_cache(linreg_tokens)
    return linreg_tokens, linreg_logits, linreg_cache

In [8]:
seq_len = 25
batch = 1
(linreg_tokens, linreg_logits, linreg_cache) = run_and_cache_model_linreg_tokens(model, seq_len, batch)

In [9]:
def visualize_pattern_hook(
    pattern: Float[Tensor, "batch head_index dest_pos source_pos"],
    hook: HookPoint,
):
    layer_num = hook.layer()
    print("Layer: ", layer_num)
    
    # Create the visualization
    attention_vis = cv.attention.attention_patterns(
        tokens=model.to_str_tokens(linreg_tokens[0]),
        attention=pattern.mean(0)
    )
    
    # Save as HTML file
    filename = f"../docs/attention_pattern_layer_{layer_num}.html"
    with open(filename, "w") as f:
        f.write(attention_vis._repr_html_())
    
    # Still display in notebook for interactive viewing
    display(attention_vis)

for induction_head_layer in range(model.cfg.n_layers):
    model.run_with_hooks(
        linreg_tokens,
        return_type=None, # For efficiency, we don't need to calculate the logits
        fwd_hooks=[
            (utils.get_act_name("pattern", induction_head_layer), visualize_pattern_hook)
        ]
    )

Layer:  0


AttributeError: 'RenderedHTML' object has no attribute 'html'