In [1]:
import torch
from model_analyzer import ModelAnalyzer
from activation_extraction import (
    ActivationRecord,
    save_activations,
    load_activations,
    compare_activations,
    get_activation_statistics
)
from intervention import (
    InterventionHandler,
    ActivationPatch,
    SteeringVector,
    create_steering_vector as _create_steering_vector
)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print("Initializing ModelAnalyzer...")
device = "cuda" if torch.cuda.is_available() else "cpu"
analyzer = ModelAnalyzer("../models/Llama-3.2-1B", device=device)
analyzer.load_model()

analyzer.print_architecture_summary()

Initializing ModelAnalyzer...
Initializing ModelAnalyzer for ..\models\Llama-3.2-1B
Device: cuda
[OK] ModelAnalyzer ready
Loading model from ..\models\Llama-3.2-1B...
[OK] Model loaded on cuda

MODEL ARCHITECTURE SUMMARY

Model Type: llama
Number of Layers: 16
Hidden Size: 2048
Attention Heads: 32
Vocabulary Size: 128256
Max Position Embeddings: 131072

Total Parameters: 1,235,814,400
Trainable Parameters: 1,235,814,400

Layer Names:
  0: model.layers.0
  1: model.layers.1
  2: model.layers.2
  3: model.layers.3
  4: model.layers.4
  5: model.layers.5
  6: model.layers.6
  7: model.layers.7
  8: model.layers.8
  9: model.layers.9
  10: model.layers.10
  11: model.layers.11
  12: model.layers.12
  13: model.layers.13
  14: model.layers.14
  15: model.layers.15



In [None]:
generated_text = analyzer.generate(
    prompt="352+946=",
    max_new_tokens=10
)

record = analyzer.extract_activations(text=generated_text, 
    layer_names=None,
    layer_indices=None,
    include_attention=True,
    return_logits=False,
    metadata=None
)


In [84]:
print(record.prompt)
print(record.tokens)
print(record.token_ids)
print(record.metadata)


352+16346=1, 349.99+16346=
['<|begin_of_text|>', '352', '+', '163', '46', '=', '1', ',', 'Ġ', '349', '.', '99', '+', '163', '46', '=']
[128000, 16482, 10, 9892, 2790, 28, 16, 11, 220, 18634, 13, 1484, 10, 9892, 2790, 28]
{'num_tokens': 16, 'num_layers': 17, 'model_name': 'llama'}


In [None]:
lens_view = analyzer.logit_lens_on_activation(
    activation=record,
    token_position=4, 
    layer_indices=None,
    top_k=50,
    apply_ln=True
)

for layer_idx in lens_view['layers']:
    print(f"Layer {layer_idx}")
    print(lens_view['layers'][layer_idx]['top_k_tokens'])
    print(lens_view['layers'][layer_idx]['top_k_probs'])
    #print(lens_view['layers'][layer_idx]['logits'])


Layer 0
['46', '47', '45', '44', '48', '43', '42', '49', '36', '41', '56', '54', '38', '37', '58', '51', '52', '53', '39', '34', '66', '57', '40', '55', '86', '50', '26', '046', '62', '59', '35', '33', '61', '96', '146', '466', '76', '74', '32', '68', '92', '69', '63', '462', '73', '64', '29', '346', '67', '60']
[1.5480285583180375e-05, 1.1995482964266557e-05, 1.1683249795169104e-05, 1.1416409506637137e-05, 1.1237934813834727e-05, 1.117999909183709e-05, 1.093542141461512e-05, 1.090335445042001e-05, 1.0827027836057823e-05, 1.0729344467108604e-05, 1.0611451216391288e-05, 1.0588569239189383e-05, 1.0540360563027207e-05, 1.0449082765262574e-05, 1.041555151459761e-05, 1.0394593118689954e-05, 1.0351340279157739e-05, 1.033456101140473e-05, 1.0328190910513513e-05, 1.0233124157821294e-05, 1.0208938874711748e-05, 1.0195968570769764e-05, 1.0158469194720965e-05, 1.0128045687451959e-05, 1.0087830560223665e-05, 1.007502851280151e-05, 1.006979618978221e-05, 1.005217472993536e-05, 1.0036068488261662e-0

In [18]:
activations = load_activations("activations/activation_records.pt", format="pt")
print(f"Loaded {len(activations)} activation records.")

print(type(activations))

for i, act in enumerate(activations):
    print(f"Activation Record {i}:")
    print(f"  Prompt: {act.prompt.split('=')[0]}=")


Loaded 53 record(s) from activations\activation_records.pt (PyTorch format)
Loaded 53 activation records.
<class 'list'>
Activation Record 0:
  Prompt: 1+1=
Activation Record 1:
  Prompt: 1+2=
Activation Record 2:
  Prompt: 1+3=
Activation Record 3:
  Prompt: 1+4=
Activation Record 4:
  Prompt: 1+5=
Activation Record 5:
  Prompt: 2+1=
Activation Record 6:
  Prompt: 2+2=
Activation Record 7:
  Prompt: 2+3=
Activation Record 8:
  Prompt: 2+4=
Activation Record 9:
  Prompt: 2+5=
Activation Record 10:
  Prompt: 3+1=
Activation Record 11:
  Prompt: 3+2=
Activation Record 12:
  Prompt: 3+3=
Activation Record 13:
  Prompt: 3+4=
Activation Record 14:
  Prompt: 3+5=
Activation Record 15:
  Prompt: 4+1=
Activation Record 16:
  Prompt: 4+2=
Activation Record 17:
  Prompt: 4+3=
Activation Record 18:
  Prompt: 4+4=
Activation Record 19:
  Prompt: 4+5=
Activation Record 20:
  Prompt: 5+1=
Activation Record 21:
  Prompt: 5+2=
Activation Record 22:
  Prompt: 5+3=
Activation Record 23:
  Prompt: 5+4=
A

In [25]:
act = activations[50]

print()
print(act.prompt)
print("=====================")
print(act.tokens)
print(act.metadata)
print([act.layer_activations[i].shape for i in act.layer_activations])
#print([act.attention_weights[i].shape for i in act.attention_weights])



lens_view = analyzer.logit_lens_on_activation(
    activation=act,
    token_position=4, 
    layer_indices=None,
    top_k=50,
    apply_ln=True
)

for layer_idx in lens_view['layers']:
    print(f"Layer {layer_idx}")
    print(lens_view['layers'][layer_idx]['top_k_tokens'])
    print(lens_view['layers'][layer_idx]['top_k_probs'])
    #print(lens_view['layers'][layer_idx]['logits'])


336+639=1209 (mod 10)
n = 0:4
f = function(n)
   
['<|begin_of_text|>', '336', '+', '639', '=', '120', '9', 'Ġ(', 'mod', 'Ġ', '10', ')Ċ', 'n', 'Ġ=', 'Ġ', '0', ':', '4', 'Ċ', 'f', 'Ġ=', 'Ġfunction', '(n', ')Ċ', 'ĠĠĠ']
{'num_tokens': 25, 'num_layers': 17, 'model_name': 'llama'}
[torch.Size([1, 25, 2048]), torch.Size([1, 25, 2048]), torch.Size([1, 25, 2048]), torch.Size([1, 25, 2048]), torch.Size([1, 25, 2048]), torch.Size([1, 25, 2048]), torch.Size([1, 25, 2048]), torch.Size([1, 25, 2048]), torch.Size([1, 25, 2048]), torch.Size([1, 25, 2048]), torch.Size([1, 25, 2048]), torch.Size([1, 25, 2048]), torch.Size([1, 25, 2048]), torch.Size([1, 25, 2048]), torch.Size([1, 25, 2048]), torch.Size([1, 25, 2048]), torch.Size([1, 25, 2048])]
Layer 0
['=', ' =', '-', '(', '/', '[', '+', '>', '="', ' ', '<', '=-', ':', '==', '*', '=\\', '=.', '=(', '=\n', "='", ',', '}=', '_', '.', '|', '&', ' (', '=s', '\n', '\\', ')=', '1', '=[', '?', '={', ';', '=true', '@', '=$', '=p', '=m', ')', '!', '=new', '{',