# LLaMA Token Analyzer - Complete Example

This notebook demonstrates the complete functionality of the LLaMA Token Analyzer framework.


In [3]:
import os

os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"

In [4]:
from returns.result import Success, Failure
from work.llama_token_analyzer.visualization.main import visualize
from work.llama_token_analyzer import (
    ModelManager,
    TokenAnalyzer,
    TokenAnalysisStorage
)

import logging

logging.basicConfig(level=logging.INFO)


## Initialize Components


In [5]:
model_config = {
    "llm_id": "meta-llama/Llama-3.2-3B-Instruct",
    "device": "auto",
    "torch_dtype": "float16"
}

model_result = ModelManager.initialize(model_config)

# Initialize other components
storage = TokenAnalysisStorage(base_path="work/notebooks/output")

match model_result:
    case Success(manager):
        analyzer = TokenAnalyzer(manager)
        analyze = analyzer.create_analysis_pipeline(storage)
    case Failure(error):
        raise RuntimeError(f"Failed to load model: {error}")

INFO:llama_token_analyzer.core.model:Loading model meta-llama/Llama-3.2-3B-Instruct
INFO:llama_token_analyzer.core.model:Device map: mps


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

## Single Prompt Analysis


In [None]:
prompt = "Write a story with a plottwist"

analysis_result = analyze(prompt)

match analysis_result:
    case Success(r):
        visualize(r, storage=storage)
    case Failure(error):
        raise RuntimeError(f"Analysis pipeline failed: {error}")


Calculating token association:  12%|█▏        | 81/689 [04:36<4:36:02, 27.24s/token]

## Batch Analysis


### Data

In [None]:
# Analyze multiple prompts
difficult_prompts = [
    "111,111,111 × 111,111,111 = 12,345,678,987,654,321",
    "Write a tiny story where every word is longer than the previous one",  # Forces increasing token lengths
    "Replace all vowels with 'z' in: 'The quick brown fox'",
    # Tests character-level manipulation, creates unusual consonant clusters
    "Write a sentence using only words that are palindromes",  # Forces rare word choices like "noon", "deed", "madam"
    "Count down from 5 using only words starting with that number's first letter",
    # Forces constrained vocabulary with numbers: "five", "four", "three"...
    "Create a 6-word story using ascending ASCII characters",  # Forces character code progression
    "Make a sentence where each word starts with 'th'",  # Creates unusual token repetitions
    "Write words that rhyme with 'xyz'",  # Tests handling of impossible constraints
    "Spell HELLO using words that start with each letter backwards",  # Forces specific initials in reverse
    "Create a sentence where each word contains double letters",
    # Forces unusual word patterns: "book", "teeth", "little"
    "Write a question using only four-letter words",  # Forces fixed token lengths
]

### Processing

In [None]:
prompts = [
    *difficult_prompts,
]
# Analyze prompts in batch
analysis_result_batch = analyze(prompts)
match analysis_result_batch:
    case Success(all_results):
        [visualize(single_result, storage=storage) for single_result in all_results]
    case Failure(error):
        raise RuntimeError(f"Batch analysis failed: {error}")