# Model Emphasis and Analysis Demo

This notebook demonstrates the key features of the model emphasis, analysis, and generation capabilities.

In [1]:
import mlx.core as mx
import mlx.nn as nn

from mi_experiments.utils.loading import load

import re
from dataclasses import dataclass
from typing import Dict, Optional, Union

from mlx_lm.tokenizer_utils import TokenizerWrapper


  from .autonotebook import tqdm as notebook_tqdm


In [2]:

# Load the model
model, tokenizer = load("mlx-community/Mistral-7B-Instruct-v0.3-4bit")

# Utility functions
def create_additive_causal_mask(N: int, offset: int = 0):
    rinds = mx.arange(offset + N)
    linds = mx.arange(offset, offset + N) if offset else rinds
    mask = linds[:, None] < rinds[None]
    return mask * -1e9

@dataclass
class ModelArgs:
    model_type: str
    hidden_size: int
    num_hidden_layers: int
    intermediate_size: int
    num_attention_heads: int
    rms_norm_eps: float
    vocab_size: int
    head_dim: Optional[int] = None
    max_position_embeddings: Optional[int] = None
    num_key_value_heads: Optional[int] = None
    attention_bias: bool = False
    mlp_bias: bool = False
    rope_theta: float = 10000
    rope_traditional: bool = False
    rope_scaling: Optional[Dict[str, Union[float, str]]] = None
    tie_word_embeddings: bool = True

    @classmethod
    def from_model(cls, model: nn.Module):
        return cls(
            model_type=model.model_type,
            hidden_size=model.args.hidden_size,
            num_hidden_layers=model.args.num_hidden_layers,
            intermediate_size=model.args.intermediate_size,
            num_attention_heads=model.args.num_attention_heads,
            rms_norm_eps=model.args.rms_norm_eps,
            vocab_size=model.args.vocab_size,
            head_dim=model.head_dim,
            num_key_value_heads=model.n_kv_heads,
            tie_word_embeddings=model.args.tie_word_embeddings
        )

# Extract model arguments
model_args = ModelArgs.from_model(model)

# Ensure tokenizer is wrapped
if not isinstance(tokenizer, TokenizerWrapper):
    tokenizer = TokenizerWrapper(tokenizer)

def format_prompt(prompt: str) -> str:
    return f"[INST] {prompt} [/INST]"

def extract_generated_text(full_text: str) -> str:
    match = re.search(r'\[/INST\]\s*(.*)', full_text, re.DOTALL)
    return match.group(1).strip() if match else ""

Fetching 7 files: 100%|██████████| 7/7 [00:00<00:00, 146070.29it/s]


In [3]:
import string
import random
from mi_experiments.inference.batch_manager import batch_generate

capital_letters = string.ascii_uppercase
distinct_pairs = [(a, b) for i, a in enumerate(capital_letters) for b in capital_letters[i + 1:]]

num_prompts = 10
prompt_template = "Think of a real word containing both the letters {l1} and {l2}. Then, say 3 sentences which use the word."
prompts_raw = [prompt_template.format(l1=p[0], l2=p[1]) for p in random.sample(distinct_pairs, num_prompts)]
prompt_template_2 = "Come up with a real English word containing both the letters {l1} and {l2}. No acronyms. Then, give 3 complete sentences which use the word."
prompts_raw_2 = [prompt_template_2.format(l1=p[0], l2=p[1]) for p in random.sample(distinct_pairs, num_prompts)]

response = batch_generate(model, tokenizer, prompts=prompts_raw[:10]+prompts_raw_2[:10], max_tokens=20, verbose=True, temp=0.0)
# print(response)

Prompt: 276.779 tokens-per-second
Generation: 5926.328 tokens-per-second
Prompt: Think of a real word containing both the letters C and F. Then, say 3 sentences which use the word.
1. The cat chased the fox through the forest.
2. The chef cooked
Prompt: Think of a real word containing both the letters R and Y. Then, say 3 sentences which use the word.
Sure, here are three sentences using the word "Rhythm":

1. "The rhythm
Prompt: Think of a real word containing both the letters B and S. Then, say 3 sentences which use the word.
Sure, here are three sentences using the word "Basket":

1. "Basket
Prompt: Think of a real word containing both the letters N and V. Then, say 3 sentences which use the word.
Sure, here are three sentences using the word "November":

1. "November
Prompt: Think of a real word containing both the letters F and V. Then, say 3 sentences which use the word.
Sure, here are three sentences using the word "future":

1. "The future of
Prompt: Think of a real word contai

In [5]:
# Generate with emphasis on specific layers
emphasis_config = {
    'layers': {
        '0': 1.0,  # Boost first layer
        '1': 1.0,  # Reduce second layer
        '30': 1.0,  # Zero out thirtyith layer
    },
    'heads': {
        '0': {'0': 1.0, '1': 0.5},  # Modify attention heads in first layer
    },
    'neurons': {
        '1': {'10': 0.0, '20': 2.0}  # Modify neurons in second layer
    }
}

# Apply emphasis configuration
model.set_emphasis_config(emphasis_config)

# Generate with modified model
response_emphasized = batch_generate(
    model, 
    tokenizer, 
    prompts=prompts_raw[:10]+prompts_raw_2[:10],
    max_tokens=20,
    verbose=True,
    temp=0.0
)


Prompt: 254.875 tokens-per-second
Generation: 5562.947 tokens-per-second
Prompt: Think of a real word containing both the letters C and F. Then, say 3 sentences which use the word.
1. The cat chased the fox through the forest.
2. The chef cooked
Prompt: Think of a real word containing both the letters R and Y. Then, say 3 sentences which use the word.
Sure, here are three sentences using the word "Rhythm":

1. "The rhythm
Prompt: Think of a real word containing both the letters B and S. Then, say 3 sentences which use the word.
Sure, here are three sentences using the word "Basket":

1. "John loves
Prompt: Think of a real word containing both the letters N and V. Then, say 3 sentences which use the word.
Sure, here are three sentences using the word "November":

1. "November
Prompt: Think of a real word containing both the letters F and V. Then, say 3 sentences which use the word.
Sure, here are three sentences using the word "future":

1. "The future of
Prompt: Think of a real word co