# IOI Circuit Analysis - Code Questions Solutions

This notebook contains solutions and auto-checks for the code-based exam questions.

## CQ1: Budget Constraint Verification and Component Analysis

**Task**: Write code to verify the budget constraint calculation. Given the final circuit composition (31 attention heads and 12 MLPs), compute the total dimensional write and verify it equals 11,200. Also calculate what percentage of the budget is used by attention heads vs MLPs.

In [None]:
# TODO: Calculate the budget for the IOI circuit
# Given information:
# - 31 attention heads in the circuit
# - 12 MLPs in the circuit
# - Each attention head writes: d_model / n_heads = 768 / 12 = 64 dimensions
# - Each MLP writes: d_model = 768 dimensions

# TODO: Calculate total dimensions written by attention heads
n_heads = 31
dims_per_head = 64
total_head_dims = # YOUR CODE HERE

# TODO: Calculate total dimensions written by MLPs
n_mlps = 12
dims_per_mlp = 768
total_mlp_dims = # YOUR CODE HERE

# TODO: Calculate total budget
total_budget = # YOUR CODE HERE

# TODO: Calculate percentages
head_percentage = # YOUR CODE HERE
mlp_percentage = # YOUR CODE HERE

# Print results
print(f"Attention heads: {total_head_dims} dimensions ({head_percentage:.2f}%)")
print(f"MLPs: {total_mlp_dims} dimensions ({mlp_percentage:.2f}%)")
print(f"Total budget: {total_budget} dimensions")
print(f"Budget constraint (≤11,200): {'PASS' if total_budget <= 11200 else 'FAIL'}")

In [None]:
# SOLUTION
n_heads = 31
dims_per_head = 64
total_head_dims = n_heads * dims_per_head

n_mlps = 12
dims_per_mlp = 768
total_mlp_dims = n_mlps * dims_per_mlp

total_budget = total_head_dims + total_mlp_dims

head_percentage = (total_head_dims / total_budget) * 100
mlp_percentage = (total_mlp_dims / total_budget) * 100

print(f"Attention heads: {total_head_dims} dimensions ({head_percentage:.2f}%)")
print(f"MLPs: {total_mlp_dims} dimensions ({mlp_percentage:.2f}%)")
print(f"Total budget: {total_budget} dimensions")
print(f"Budget constraint (≤11,200): {'PASS' if total_budget <= 11200 else 'FAIL'}")

In [None]:
# AUTO-CHECK for CQ1
assert total_budget == 11200, f"Expected total budget of 11,200, got {total_budget}"
assert abs(head_percentage - 17.71) < 0.01, f"Expected head percentage ~17.71%, got {head_percentage:.2f}%"
assert abs(mlp_percentage - 82.29) < 0.01, f"Expected MLP percentage ~82.29%, got {mlp_percentage:.2f}%"
assert total_head_dims == 1984, f"Expected 1,984 dims from heads, got {total_head_dims}"
assert total_mlp_dims == 9216, f"Expected 9,216 dims from MLPs, got {total_mlp_dims}"
print("✓ All checks passed for CQ1!")

## CQ2: Layer Distribution Analysis of Attention Heads

**Task**: Write code to analyze the layer distribution of the 31 attention heads in the circuit. Count how many heads are in each layer (0-11) and identify which layer has the most heads. Then determine if there's a trend showing more heads in later layers.

In [None]:
# TODO: Analyze the layer distribution of attention heads in the circuit
# The circuit contains these attention heads (from the documentation):
circuit_heads = [
    "a0.h1", "a0.h10", "a0.h5", "a0.h6",
    "a1.h11",
    "a3.h0", "a3.h6",
    "a6.h0",
    "a7.h3", "a7.h9",
    "a8.h10", "a8.h2", "a8.h3", "a8.h5", "a8.h6",
    "a9.h0", "a9.h2", "a9.h6", "a9.h7", "a9.h8", "a9.h9",
    "a10.h0", "a10.h1", "a10.h10", "a10.h2", "a10.h3", "a10.h6", "a10.h7",
    "a11.h10", "a11.h6", "a11.h8"
]

# TODO: Count heads per layer
layer_counts = {}
for head in circuit_heads:
    # Extract layer number from head name (e.g., "a0.h1" -> layer 0)
    layer = # YOUR CODE HERE
    if layer not in layer_counts:
        layer_counts[layer] = 0
    layer_counts[layer] += 1

# TODO: Find the layer with most heads
max_layer = # YOUR CODE HERE
max_count = # YOUR CODE HERE

# TODO: Calculate early vs late layer distribution
# Early layers: 0-3, Late layers: 9-11
early_layers_count = # YOUR CODE HERE
late_layers_count = # YOUR CODE HERE

# Print results
print("Layer distribution:")
for layer in sorted(layer_counts.keys()):
    print(f"  Layer {layer}: {layer_counts[layer]} heads")
print(f"\nLayer with most heads: Layer {max_layer} ({max_count} heads)")
print(f"\nEarly layers (0-3): {early_layers_count} heads")
print(f"Late layers (9-11): {late_layers_count} heads")
print(f"Trend: {'More heads in later layers' if late_layers_count > early_layers_count else 'More heads in early layers'}")

In [None]:
# SOLUTION
circuit_heads = [
    "a0.h1", "a0.h10", "a0.h5", "a0.h6",
    "a1.h11",
    "a3.h0", "a3.h6",
    "a6.h0",
    "a7.h3", "a7.h9",
    "a8.h10", "a8.h2", "a8.h3", "a8.h5", "a8.h6",
    "a9.h0", "a9.h2", "a9.h6", "a9.h7", "a9.h8", "a9.h9",
    "a10.h0", "a10.h1", "a10.h10", "a10.h2", "a10.h3", "a10.h6", "a10.h7",
    "a11.h10", "a11.h6", "a11.h8"
]

layer_counts = {}
for head in circuit_heads:
    # Extract layer number from head name (e.g., "a0.h1" -> layer 0)
    layer = int(head.split('.')[0][1:])  # Remove 'a' prefix and convert to int
    if layer not in layer_counts:
        layer_counts[layer] = 0
    layer_counts[layer] += 1

# Find the layer with most heads
max_layer = max(layer_counts, key=layer_counts.get)
max_count = layer_counts[max_layer]

# Calculate early vs late layer distribution
early_layers_count = sum(layer_counts.get(i, 0) for i in range(0, 4))  # Layers 0-3
late_layers_count = sum(layer_counts.get(i, 0) for i in range(9, 12))  # Layers 9-11

print("Layer distribution:")
for layer in sorted(layer_counts.keys()):
    print(f"  Layer {layer}: {layer_counts[layer]} heads")
print(f"\nLayer with most heads: Layer {max_layer} ({max_count} heads)")
print(f"\nEarly layers (0-3): {early_layers_count} heads")
print(f"Late layers (9-11): {late_layers_count} heads")
print(f"Trend: {'More heads in later layers' if late_layers_count > early_layers_count else 'More heads in early layers'}")

In [None]:
# AUTO-CHECK for CQ2
assert max_layer == 10, f"Expected layer 10 to have most heads, got layer {max_layer}"
assert max_count == 7, f"Expected max count of 7 heads, got {max_count}"
assert early_layers_count == 7, f"Expected 7 heads in early layers (0-3), got {early_layers_count}"
assert late_layers_count == 16, f"Expected 16 heads in late layers (9-11), got {late_layers_count}"
assert layer_counts[0] == 4, f"Expected 4 heads in layer 0, got {layer_counts.get(0, 0)}"
assert layer_counts[9] == 6, f"Expected 6 heads in layer 9, got {layer_counts.get(9, 0)}"
assert layer_counts[11] == 3, f"Expected 3 heads in layer 11, got {layer_counts.get(11, 0)}"
print("✓ All checks passed for CQ2!")

## CQ3: Minimal Circuit Budget Calculation

**Task**: Write code to simulate what would happen if you wanted to create a minimal IOI circuit using only the top-1 head from each functional category (Duplicate Token: a3.h0, S-Inhibition: a8.h6, Name-Mover: a9.h9) plus all 12 MLPs. Calculate the total budget used and how much budget remains unused compared to the full circuit.

In [None]:
# TODO: Calculate budget for a minimal IOI circuit
# Minimal circuit composition:
# - Top Duplicate Token Head: a3.h0
# - Top S-Inhibition Head: a8.h6
# - Top Name-Mover Head: a9.h9
# - All 12 MLPs (m0 through m11)

# Constants
DIMS_PER_HEAD = 64
DIMS_PER_MLP = 768
BUDGET_LIMIT = 11200
FULL_CIRCUIT_BUDGET = 11200  # From the documentation

# TODO: Calculate minimal circuit budget
minimal_heads = 3  # One from each category
minimal_mlps = 12
minimal_budget = # YOUR CODE HERE

# TODO: Calculate remaining budget
budget_remaining = # YOUR CODE HERE

# TODO: Calculate savings compared to full circuit
budget_saved = # YOUR CODE HERE

# Print results
print(f"Minimal Circuit Composition:")
print(f"  - Attention heads: {minimal_heads} heads × {DIMS_PER_HEAD} dims = {minimal_heads * DIMS_PER_HEAD} dims")
print(f"  - MLPs: {minimal_mlps} MLPs × {DIMS_PER_MLP} dims = {minimal_mlps * DIMS_PER_MLP} dims")
print(f"\nMinimal circuit budget: {minimal_budget} dimensions")
print(f"Budget remaining: {budget_remaining} dimensions")
print(f"Budget saved vs full circuit: {budget_saved} dimensions")
print(f"\nBudget constraint (≤11,200): {'PASS' if minimal_budget <= BUDGET_LIMIT else 'FAIL'}")

In [None]:
# SOLUTION
DIMS_PER_HEAD = 64
DIMS_PER_MLP = 768
BUDGET_LIMIT = 11200
FULL_CIRCUIT_BUDGET = 11200

minimal_heads = 3
minimal_mlps = 12
minimal_budget = (minimal_heads * DIMS_PER_HEAD) + (minimal_mlps * DIMS_PER_MLP)

budget_remaining = BUDGET_LIMIT - minimal_budget

budget_saved = FULL_CIRCUIT_BUDGET - minimal_budget

print(f"Minimal Circuit Composition:")
print(f"  - Attention heads: {minimal_heads} heads × {DIMS_PER_HEAD} dims = {minimal_heads * DIMS_PER_HEAD} dims")
print(f"  - MLPs: {minimal_mlps} MLPs × {DIMS_PER_MLP} dims = {minimal_mlps * DIMS_PER_MLP} dims")
print(f"\nMinimal circuit budget: {minimal_budget} dimensions")
print(f"Budget remaining: {budget_remaining} dimensions")
print(f"Budget saved vs full circuit: {budget_saved} dimensions")
print(f"\nBudget constraint (≤11,200): {'PASS' if minimal_budget <= BUDGET_LIMIT else 'FAIL'}")

In [None]:
# AUTO-CHECK for CQ3
assert minimal_budget == 9408, f"Expected minimal budget of 9,408 dims, got {minimal_budget}"
assert budget_remaining == 1792, f"Expected 1,792 dims remaining, got {budget_remaining}"
assert budget_saved == 1792, f"Expected 1,792 dims saved, got {budget_saved}"
assert minimal_budget <= BUDGET_LIMIT, "Minimal circuit exceeds budget limit!"
print("✓ All checks passed for CQ3!")