In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from baa import get_llm_memory_usage, device_map, AccuracyBenchmark

In [None]:
model_name = "HuggingFaceTB/SmolLM-135M"

In [None]:
model = AutoModelForCausalLM.from_pretrained(model_name, device_map=device_map)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load the test set
dataset = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")
benchmark = AccuracyBenchmark(model, tokenizer, dataset)

In [None]:
print(model)

In [None]:
# Hidden states is a dict and each **linear** layer will create its own list
# with the outputs of its hidden state
# the key is the name of the layer
hidden_states = {}

for name, module in model.named_modules():
    if isinstance(module, nn.Linear):
        setattr(module, "custom_name", name)


def get_hidden_states_output(module, input, output):
    if isinstance(module, nn.Linear):
        layer_name = module.custom_name
        if layer_name not in hidden_states:
            hidden_states[layer_name] = []
        hidden_states[layer_name].append(output.detach().cpu())


# Register the hook
for name, module in model.named_modules():
    if isinstance(module, nn.Linear):
        module.register_forward_hook(get_hidden_states_output)

# Run the benchmark
benchmark.evaluate(sample_size=100)

In [None]:
for layer_name, outputs in hidden_states.items():
    print(f"Layer {layer_name} has {len(outputs)} outputs")