In [1]:
from taker import Model
from datetime import datetime
import json
from os.path import exists

import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.decomposition import PCA

  from .autonotebook import tqdm as notebook_tqdm


## Create helper function

In [2]:
import sys, os
class HiddenPrints:
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = open(os.devnull, 'w')

    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout

## Load model

In [3]:
model_name = "microsoft/phi-3-mini-4k-instruct"
m = Model(model_name, dtype="int4")

Loading checkpoint shards: 100%|██████████| 2/2 [00:03<00:00,  1.88s/it]
We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)
You are not running the flash-attention implementation, expect numerical differences.


Loaded model 'microsoft/phi-3-mini-4k-instruct' with int4:
- Added 512 hooks across 32 layers




In [4]:
m.show_details()

 - n_layers : 32
 - d_model  : 3072
 - n_heads  : 32
 - d_head   : 96
 - d_mlp    : 8192


In [6]:
idlist = m.get_ids(".\n\n").squeeze().tolist()
print(idlist)
print(m.tokenizer.convert_ids_to_tokens(idlist))

idlist = m.get_ids("Sunday dinners.\n\n").squeeze().tolist()
print(idlist)
print(m.tokenizer.convert_ids_to_tokens(idlist))

[869, 13, 13]
['▁.', '<0x0A>', '<0x0A>']
[16340, 270, 16697, 29889, 13, 13]
['▁Sunday', '▁d', 'inners', '.', '<0x0A>', '<0x0A>']


## Create prompts

In [8]:
prompt = """Write a short blog post about a recipe and the inspiration behind it.
 Do not include a title.
 Only reveal the dish after the story.
 Start with short story and then move to the recipe.
 To re-iterate, do not include a title."""


story = """
\n Once upon a time, in a quaint little village nestled between rolling hills and verdant fields, there lived an elderly woman named Agnes. Agnes was known for her warm smile and her legendary Sunday dinners that brought the entire neighborhood together. Her recipes were family heirlooms, passed down through generations, with each family adding their own touch to the final dish.

One crisp autumn evening, Agnes was reminiscing about her childhood, and how her grandmother used to gather everyone around the dinner table, sharing stories and laughter. These were the moments that shaped her, the memories that she passed on to her own children and grandchildren.

Inspired by her grandmother's legacy, Agnes decided to create a new dish that would encapsulate the essence of those cherished gatherings. She wanted something that was comforting and nourishing, a dish that could be prepared with love and shared with others. After days of experimentation, she finally created a recipe that she believed truly captured the spirit of her family's Sunday dinners.\n\n"""

prompt_original = prompt + story
print(prompt_original)

Write a short blog post about a recipe and the inspiration behind it.
 Do not include a title.
 Only reveal the dish after the story.
 Start with short story and then move to the recipe.
 To re-iterate, do not include a title.

 Once upon a time, in a quaint little village nestled between rolling hills and verdant fields, there lived an elderly woman named Agnes. Agnes was known for her warm smile and her legendary Sunday dinners that brought the entire neighborhood together. Her recipes were family heirlooms, passed down through generations, with each family adding their own touch to the final dish.

One crisp autumn evening, Agnes was reminiscing about her childhood, and how her grandmother used to gather everyone around the dinner table, sharing stories and laughter. These were the moments that shaped her, the memories that she passed on to her own children and grandchildren.

Inspired by her grandmother's legacy, Agnes decided to create a new dish that would encapsulate the essence

In [33]:
n_tokens_to_transfer = 2

idlist_original = m.get_ids(prompt_original).squeeze().tolist()
tokens_original = m.tokenizer.convert_ids_to_tokens(idlist_original)
n_tokens_original = len(tokens_original)
tokens_to_transfer = tokens_original[-n_tokens_to_transfer:]
string_to_transfer = m.tokenizer.convert_tokens_to_string(tokens_to_transfer)

# # dummy prompt for testing
# prompt_new = '................'*400

prefix = ""
prompt_new = prefix + string_to_transfer
print(f"{prompt_new=}")
idlist_new = m.get_ids(prompt_new).squeeze().tolist()
tokens_new = m.tokenizer.convert_ids_to_tokens(idlist_new)
n_tokens_new = len(tokens_new)

token_index_map = {
    n_tokens_original - n_tokens_to_transfer + i: n_tokens_new - n_tokens_to_transfer + i
    for i in range(n_tokens_to_transfer)
}

# do sense check
for index_original, index_new in token_index_map.items():
    assert tokens_original[index_original] == tokens_new[index_new]

prompt_new='\n\n'


## Transfer activations

In [34]:
# RESET HOOKS BEFORE TRANSPLANTING NEXT SET OF ACTIVATIONS
for h in m.hooks.neuron_replace.values():
    h.reset()

print(m.hooks.neuron_replace)

{'layer_0_attn_pre_out': NeuronReplace(), 'layer_0_mlp_pre_out': NeuronReplace(), 'layer_1_attn_pre_out': NeuronReplace(), 'layer_1_mlp_pre_out': NeuronReplace(), 'layer_2_attn_pre_out': NeuronReplace(), 'layer_2_mlp_pre_out': NeuronReplace(), 'layer_3_attn_pre_out': NeuronReplace(), 'layer_3_mlp_pre_out': NeuronReplace(), 'layer_4_attn_pre_out': NeuronReplace(), 'layer_4_mlp_pre_out': NeuronReplace(), 'layer_5_attn_pre_out': NeuronReplace(), 'layer_5_mlp_pre_out': NeuronReplace(), 'layer_6_attn_pre_out': NeuronReplace(), 'layer_6_mlp_pre_out': NeuronReplace(), 'layer_7_attn_pre_out': NeuronReplace(), 'layer_7_mlp_pre_out': NeuronReplace(), 'layer_8_attn_pre_out': NeuronReplace(), 'layer_8_mlp_pre_out': NeuronReplace(), 'layer_9_attn_pre_out': NeuronReplace(), 'layer_9_mlp_pre_out': NeuronReplace(), 'layer_10_attn_pre_out': NeuronReplace(), 'layer_10_mlp_pre_out': NeuronReplace(), 'layer_11_attn_pre_out': NeuronReplace(), 'layer_11_mlp_pre_out': NeuronReplace(), 'layer_12_attn_pre_out'

In [35]:
activations_original = m.get_midlayer_activations(prompt_original)

for original_index, new_index in token_index_map.items():
    for name, hook in m.hooks.neuron_replace.items():
        # name is of the form "layer_{layer_number}_{layer_type}_pre_out"
        _, layer_number, layer_type, _, _ = name.split("_")
        layer_number = int(layer_number)
        hook.add_token(new_index, activations_original[layer_type][0, layer_number, original_index])

## create outputs

In [38]:
current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
filename = f"../results/{current_time}_agnes_multi_token_transfer_LA.jsonl"

if not exists(filename):
    with open(filename, "w") as f:
        pass

In [39]:
max_new_tokens = 200
temperature = 0.2

# # test on single output
# output = m.generate(prompt_new, max_new_tokens, temperature=temperature)
# print(output[1])

with HiddenPrints():
    for i in range(20):
        output = m.generate(prompt_new, max_new_tokens, temperature=temperature)
        
        data = {
            "temperature": temperature,
            "max_new_tokens": max_new_tokens,
            "model": model_name,
            "transplant_layers": (0,m.cfg.n_layers + 1),
            "transferred_token_num": n_tokens_to_transfer,
            "orig_prompt": prompt_original,
            "transplant_prompt": prompt_new,
            "output": output[1],
        }

        with open(filename, "a") as file:
            file.write(json.dumps(data) + "\n")