## Set up dependencies.

In [1]:
!pip install transformers torch torchinfo tqdm



In [2]:
from pathlib import Path
from tqdm import tqdm
import os
from datasets import load_dataset, Dataset, IterableDataset

## Set up a class to serve as a namespace for our exports.

In [1]:
class PhiProbeCommons:
    # We'll fill this in with utilities as they're defined below.
    pass

## Download the Dolma v1.6_sample dataset (~16GB)

In [2]:
def _data():
    return load_dataset('allenai/dolma', name='v1_6-sample', trust_remote_code=True, streaming=True, split="train")

PhiProbeCommons.data = _data

## Load phi-2 and set up a method that gets the middle layer's activations.

In [4]:
import torch
import sys
from transformers import AutoModelForCausalLM, AutoTokenizer

device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.set_default_device(device)

PhiProbeCommons.phi = AutoModelForCausalLM.from_pretrained("microsoft/phi-2", torch_dtype=torch.float16, trust_remote_code=True)
PhiProbeCommons.tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True)

# Suppress warnings about tokenising long passages. 
# We're going to handle splitting the text ourselves.
PhiProbeCommons.tokenizer.model_max_length = sys.maxsize

2024-07-25 17:36:31.821871: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-25 17:36:31.859638: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [6]:
def _tokenize(text):
    return PhiProbeCommons.tokenizer(text, return_tensors="pt", return_attention_mask=False)['input_ids']

PhiProbeCommons.tokenize = _tokenize

In [7]:
@torch.no_grad()
def _get_activations(tokens):
    """
    Takes a tensor of shape e.g. (B, T) containing a batch of token strings,
    and outputs a tensor of shape (B, T, 2560) containing the activations
    of the middle layer (hidden layer 15) at each token.
    """
    out = PhiProbeCommons.phi(tokens, output_hidden_states=True, return_dict=True)
    hidden_states = out.hidden_states
    middle_layer = hidden_states[16]
    return middle_layer.float()

PhiProbeCommons.get_activations = _get_activations

In [8]:
def _get_token_blocks(text, block_size):
    """
    Given a text, tokenizes it and returns a tensor of shape 
    (N, BS) containing the text as a series of blocks of 
    tokens of size BS.
    N is the number of blocks contained within the text.   
    """    
    block_size = 100
    tokens = PhiProbeCommons.tokenize(text).view(-1)
    truncated_length = (len(tokens) // block_size) * block_size
    tokens = tokens[:truncated_length]
    block_tensor = tokens.view(-1, block_size)
    blocks = torch.split(block_tensor, 1, dim=0)    
    return [b.view(-1) for b in blocks]

In [9]:
def _all_blocks(split, block_size=100):    
    for i, example in enumerate(PhiProbeCommons.data()):
        is_test_example = i % 10 == 0
        if split == 'train' and is_test_example or split != 'train' and not is_test_example:
            continue
        for block in _get_token_blocks(example['text'], block_size=block_size):                        
            yield block

In [10]:
def _batch_iterable(iterable, batch_size):
    current_batch = []
    for item in iterable:
        current_batch.append(item)
        if len(current_batch) == batch_size:
            yield current_batch
            current_batch = []

def _all_activations(split, block_size=100, minibatch_size=5, start_block=0, parallelism=10, workahead_cache_size=50):
    """
    Generator yielding minibatches of training examples.
    Each example is returned as a tuple (tokens, activations).
     - tokens is a tensor of dimention (MB, BS) where MB 
       is the specified minibatch size, and BS is the block size.
     - activations is a tensor of dimension (MB, BS, 2560)
    """    
    global phi
    assert workahead_cache_size % parallelism == 0
    assert workahead_cache_size % minibatch_size == 0
    assert parallelism % minibatch_size == 0
    blocks = islice(_all_blocks(split, block_size=block_size), start_block, None)
    for block_megabatch in _batch_iterable(blocks, workahead_cache_size):  
        PhiProbeCommons.phi = PhiProbeCommons.phi.to("cuda")
        minibatches = []        
        for inputs in torch.stack(block_megabatch).split(parallelism):            
            activations = PhiProbeCommons.get_activations(inputs)      
            minibatch_inputs = inputs.split(minibatch_size)
            minibatch_activations = activations.split(minibatch_size)
            minibatches.extend(zip(minibatch_inputs, minibatch_activations))
        PhiProbeCommons.phi = PhiProbeCommons.phi.to("cpu")
        
        for minibatch_input, minibatch_activation in minibatches:
            yield minibatch_input, minibatch_activation

PhiProbeCommons.all_activations = _all_activations

## Load Our Pretrained Autoencoder (if available)

In [12]:
PhiProbeCommons._sae = None

def _load_sae():
    if PhiProbeCommons._sae is None and Path('sae.pt').exists():
        PhiProbeCommons._sae = torch.load('sae.pt').to(device)
    return PhiProbeCommons._sae

PhiProbeCommons.sae = _load_sae