# IOI Circuit Analysis - Code Question Solutions

This notebook contains the code questions with their solutions and auto-check cells.

**Instructions**: Run each cell from top to bottom. The auto-check cells validate the expected outputs.

---

## CQ1: Write code to verify the write budget of the IOI circuit. Given the circuit nodes list, count the attention heads and MLPs, then calculate the total budget used and utilization percentage.

**Reference**: Section 4. Results - Budget Verification

### Student Stub

In [None]:
# CQ1: Circuit Budget Verification
# Your task is to verify the write budget of the identified IOI circuit.

# Given circuit data:
circuit_nodes = [
    "input",
    "a0.h1", "a0.h10", "a0.h5", "a0.h6",
    "a1.h11",
    "a10.h0", "a10.h1", "a10.h10", "a10.h2", "a10.h3", "a10.h6", "a10.h7",
    "a11.h10", "a11.h6", "a11.h8",
    "a3.h0", "a3.h6",
    "a6.h0",
    "a7.h3", "a7.h9",
    "a8.h10", "a8.h2", "a8.h3", "a8.h5", "a8.h6",
    "a9.h0", "a9.h2", "a9.h6", "a9.h7", "a9.h8", "a9.h9",
    "m0", "m1", "m2", "m3", "m4", "m5", "m6", "m7", "m8", "m9", "m10", "m11"
]

# Model configuration
D_HEAD = 64  # dimensions per attention head
D_MODEL = 768  # dimensions per MLP
BUDGET_LIMIT = 11200  # maximum allowed dimensions

# TODO: Count the number of attention heads (nodes starting with 'a')
n_attention_heads = None  # Your code here

# TODO: Count the number of MLPs (nodes starting with 'm')
n_mlps = None  # Your code here

# TODO: Calculate total write budget used
total_budget = None  # Your code here

# TODO: Calculate budget utilization percentage
utilization_percentage = None  # Your code here

# Print results
print(f"Attention heads: {n_attention_heads}")
print(f"MLPs: {n_mlps}")
print(f"Total budget used: {total_budget}")
print(f"Budget utilization: {utilization_percentage:.1f}%")


### SOLUTION

In [None]:
# CQ1: Circuit Budget Verification - SOLUTION

# Given circuit data:
circuit_nodes = [
    "input",
    "a0.h1", "a0.h10", "a0.h5", "a0.h6",
    "a1.h11",
    "a10.h0", "a10.h1", "a10.h10", "a10.h2", "a10.h3", "a10.h6", "a10.h7",
    "a11.h10", "a11.h6", "a11.h8",
    "a3.h0", "a3.h6",
    "a6.h0",
    "a7.h3", "a7.h9",
    "a8.h10", "a8.h2", "a8.h3", "a8.h5", "a8.h6",
    "a9.h0", "a9.h2", "a9.h6", "a9.h7", "a9.h8", "a9.h9",
    "m0", "m1", "m2", "m3", "m4", "m5", "m6", "m7", "m8", "m9", "m10", "m11"
]

# Model configuration
D_HEAD = 64  # dimensions per attention head
D_MODEL = 768  # dimensions per MLP
BUDGET_LIMIT = 11200  # maximum allowed dimensions

# Count the number of attention heads (nodes starting with 'a')
n_attention_heads = sum(1 for node in circuit_nodes if node.startswith('a'))

# Count the number of MLPs (nodes starting with 'm')
n_mlps = sum(1 for node in circuit_nodes if node.startswith('m'))

# Calculate total write budget used
total_budget = n_attention_heads * D_HEAD + n_mlps * D_MODEL

# Calculate budget utilization percentage
utilization_percentage = (total_budget / BUDGET_LIMIT) * 100

# Print results
print(f"Attention heads: {n_attention_heads}")
print(f"MLPs: {n_mlps}")
print(f"Total budget used: {total_budget}")
print(f"Budget utilization: {utilization_percentage:.1f}%")


---

## CQ2: Write code to analyze the layer distribution of attention heads in the circuit. Parse each head name to extract its layer, count heads per layer, find the layer with most heads, and count heads in early (0-3) and late (9-11) layers.

**Reference**: Section 4. Results - Layer Distribution and Section 5. Analysis - Key Observations

### Student Stub

In [None]:
# CQ2: Layer Distribution Analysis
# Your task is to analyze the layer distribution of attention heads in the circuit.

# Given attention head nodes (extracted from circuit):
attention_heads = [
    "a0.h1", "a0.h10", "a0.h5", "a0.h6",
    "a1.h11",
    "a10.h0", "a10.h1", "a10.h10", "a10.h2", "a10.h3", "a10.h6", "a10.h7",
    "a11.h10", "a11.h6", "a11.h8",
    "a3.h0", "a3.h6",
    "a6.h0",
    "a7.h3", "a7.h9",
    "a8.h10", "a8.h2", "a8.h3", "a8.h5", "a8.h6",
    "a9.h0", "a9.h2", "a9.h6", "a9.h7", "a9.h8", "a9.h9"
]

# TODO: Parse the layer number from each head name (format: a{layer}.h{head})
# Create a dictionary counting heads per layer
layer_counts = {}  # Your code here

# TODO: Find which layer has the maximum number of heads
max_layer = None  # Your code here
max_count = None  # Your code here

# TODO: Calculate the total number of heads in early layers (0-3)
early_layer_count = None  # Your code here

# TODO: Calculate the total number of heads in late layers (9-11)
late_layer_count = None  # Your code here

# Print results
print(f"Heads per layer: {dict(sorted(layer_counts.items()))}")
print(f"Layer with most heads: {max_layer} ({max_count} heads)")
print(f"Early layer heads (0-3): {early_layer_count}")
print(f"Late layer heads (9-11): {late_layer_count}")


### SOLUTION

In [None]:
# CQ2: Layer Distribution Analysis - SOLUTION

# Given attention head nodes (extracted from circuit):
attention_heads = [
    "a0.h1", "a0.h10", "a0.h5", "a0.h6",
    "a1.h11",
    "a10.h0", "a10.h1", "a10.h10", "a10.h2", "a10.h3", "a10.h6", "a10.h7",
    "a11.h10", "a11.h6", "a11.h8",
    "a3.h0", "a3.h6",
    "a6.h0",
    "a7.h3", "a7.h9",
    "a8.h10", "a8.h2", "a8.h3", "a8.h5", "a8.h6",
    "a9.h0", "a9.h2", "a9.h6", "a9.h7", "a9.h8", "a9.h9"
]

# Parse the layer number from each head name (format: a{layer}.h{head})
layer_counts = {}
for head in attention_heads:
    layer = int(head.split('.')[0][1:])  # Extract layer number
    layer_counts[layer] = layer_counts.get(layer, 0) + 1

# Find which layer has the maximum number of heads
max_layer = max(layer_counts.keys(), key=lambda x: layer_counts[x])
max_count = layer_counts[max_layer]

# Calculate the total number of heads in early layers (0-3)
early_layer_count = sum(layer_counts.get(i, 0) for i in range(4))

# Calculate the total number of heads in late layers (9-11)
late_layer_count = sum(layer_counts.get(i, 0) for i in range(9, 12))

# Print results
print(f"Heads per layer: {dict(sorted(layer_counts.items()))}")
print(f"Layer with most heads: {max_layer} ({max_count} heads)")
print(f"Early layer heads (0-3): {early_layer_count}")
print(f"Late layer heads (9-11): {late_layer_count}")


---

## CQ3: Write code to analyze the attention scores across the three head types. Calculate mean scores for each type, identify which type has the highest mean, count heads with high selectivity (>0.5), and find the overall top head.

**Reference**: Section 3.3 Analysis Pipeline - Attention Pattern Analysis

### Student Stub

In [None]:
# CQ3: Attention Score Ranking Analysis
# Your task is to analyze and compare attention scores across the three head types.

import numpy as np

# Top attention scores from the documentation
duplicate_token_scores = {
    "a3.h0": 0.7191,
    "a1.h11": 0.6613,
    "a0.h5": 0.6080,
    "a0.h1": 0.5152,
    "a0.h10": 0.2359
}

s_inhibition_scores = {
    "a8.h6": 0.7441,
    "a7.h9": 0.5079,
    "a8.h10": 0.3037,
    "a8.h5": 0.2852,
    "a9.h7": 0.2557
}

name_mover_scores = {
    "a9.h9": 0.7998,
    "a10.h7": 0.7829,
    "a9.h6": 0.7412,
    "a11.h10": 0.6369,
    "a10.h0": 0.3877
}

# TODO: Calculate the mean attention score for each head type
mean_duplicate = None  # Your code here
mean_s_inhibition = None  # Your code here
mean_name_mover = None  # Your code here

# TODO: Determine which head type has the highest mean attention score
highest_mean_type = None  # Your code here

# TODO: Count how many heads in each category have attention > 0.5 (high selectivity)
high_selectivity_duplicate = None  # Your code here
high_selectivity_s_inhibition = None  # Your code here
high_selectivity_name_mover = None  # Your code here

# TODO: Find the overall top head across all categories
all_heads = {**duplicate_token_scores, **s_inhibition_scores, **name_mover_scores}
top_head = None  # Your code here
top_score = None  # Your code here

# Print results
print(f"Mean duplicate token score: {mean_duplicate:.4f}")
print(f"Mean S-inhibition score: {mean_s_inhibition:.4f}")
print(f"Mean name mover score: {mean_name_mover:.4f}")
print(f"Highest mean type: {highest_mean_type}")
print(f"High selectivity counts (>0.5): Duplicate={high_selectivity_duplicate}, S-Inhibition={high_selectivity_s_inhibition}, Name-Mover={high_selectivity_name_mover}")
print(f"Top head overall: {top_head} ({top_score:.4f})")


### SOLUTION

In [None]:
# CQ3: Attention Score Ranking Analysis - SOLUTION

import numpy as np

# Top attention scores from the documentation
duplicate_token_scores = {
    "a3.h0": 0.7191,
    "a1.h11": 0.6613,
    "a0.h5": 0.6080,
    "a0.h1": 0.5152,
    "a0.h10": 0.2359
}

s_inhibition_scores = {
    "a8.h6": 0.7441,
    "a7.h9": 0.5079,
    "a8.h10": 0.3037,
    "a8.h5": 0.2852,
    "a9.h7": 0.2557
}

name_mover_scores = {
    "a9.h9": 0.7998,
    "a10.h7": 0.7829,
    "a9.h6": 0.7412,
    "a11.h10": 0.6369,
    "a10.h0": 0.3877
}

# Calculate the mean attention score for each head type
mean_duplicate = np.mean(list(duplicate_token_scores.values()))
mean_s_inhibition = np.mean(list(s_inhibition_scores.values()))
mean_name_mover = np.mean(list(name_mover_scores.values()))

# Determine which head type has the highest mean attention score
means = {"Duplicate Token": mean_duplicate, "S-Inhibition": mean_s_inhibition, "Name Mover": mean_name_mover}
highest_mean_type = max(means.keys(), key=lambda x: means[x])

# Count how many heads in each category have attention > 0.5 (high selectivity)
high_selectivity_duplicate = sum(1 for v in duplicate_token_scores.values() if v > 0.5)
high_selectivity_s_inhibition = sum(1 for v in s_inhibition_scores.values() if v > 0.5)
high_selectivity_name_mover = sum(1 for v in name_mover_scores.values() if v > 0.5)

# Find the overall top head across all categories
all_heads = {**duplicate_token_scores, **s_inhibition_scores, **name_mover_scores}
top_head = max(all_heads.keys(), key=lambda x: all_heads[x])
top_score = all_heads[top_head]

# Print results
print(f"Mean duplicate token score: {mean_duplicate:.4f}")
print(f"Mean S-inhibition score: {mean_s_inhibition:.4f}")
print(f"Mean name mover score: {mean_name_mover:.4f}")
print(f"Highest mean type: {highest_mean_type}")
print(f"High selectivity counts (>0.5): Duplicate={high_selectivity_duplicate}, S-Inhibition={high_selectivity_s_inhibition}, Name-Mover={high_selectivity_name_mover}")
print(f"Top head overall: {top_head} ({top_score:.4f})")


---

## Auto-Check Validation

The following cells validate the expected outputs from each code question.

In [None]:
# Auto-check for CQ1: Circuit Budget Verification
print("=== CQ1 Validation ===")

# Expected values
expected_heads = 31
expected_mlps = 12
expected_budget = 11200
expected_utilization = 100.0

# Calculate actuals
circuit_nodes = [
    "input",
    "a0.h1", "a0.h10", "a0.h5", "a0.h6",
    "a1.h11",
    "a10.h0", "a10.h1", "a10.h10", "a10.h2", "a10.h3", "a10.h6", "a10.h7",
    "a11.h10", "a11.h6", "a11.h8",
    "a3.h0", "a3.h6",
    "a6.h0",
    "a7.h3", "a7.h9",
    "a8.h10", "a8.h2", "a8.h3", "a8.h5", "a8.h6",
    "a9.h0", "a9.h2", "a9.h6", "a9.h7", "a9.h8", "a9.h9",
    "m0", "m1", "m2", "m3", "m4", "m5", "m6", "m7", "m8", "m9", "m10", "m11"
]

actual_heads = sum(1 for node in circuit_nodes if node.startswith('a'))
actual_mlps = sum(1 for node in circuit_nodes if node.startswith('m'))
actual_budget = actual_heads * 64 + actual_mlps * 768
actual_utilization = (actual_budget / 11200) * 100

# Validate
assert actual_heads == expected_heads, f"Heads mismatch: {actual_heads} != {expected_heads}"
assert actual_mlps == expected_mlps, f"MLPs mismatch: {actual_mlps} != {expected_mlps}"
assert actual_budget == expected_budget, f"Budget mismatch: {actual_budget} != {expected_budget}"
assert abs(actual_utilization - expected_utilization) < 0.1, f"Utilization mismatch: {actual_utilization} != {expected_utilization}"

print("✓ All CQ1 checks passed!")


In [None]:
# Auto-check for CQ2: Layer Distribution Analysis
print("=== CQ2 Validation ===")

# Expected values
expected_max_layer = 10
expected_max_count = 7
expected_early_count = 7
expected_late_count = 16

# Calculate actuals
attention_heads = [
    "a0.h1", "a0.h10", "a0.h5", "a0.h6",
    "a1.h11",
    "a10.h0", "a10.h1", "a10.h10", "a10.h2", "a10.h3", "a10.h6", "a10.h7",
    "a11.h10", "a11.h6", "a11.h8",
    "a3.h0", "a3.h6",
    "a6.h0",
    "a7.h3", "a7.h9",
    "a8.h10", "a8.h2", "a8.h3", "a8.h5", "a8.h6",
    "a9.h0", "a9.h2", "a9.h6", "a9.h7", "a9.h8", "a9.h9"
]

layer_counts = {}
for head in attention_heads:
    layer = int(head.split('.')[0][1:])
    layer_counts[layer] = layer_counts.get(layer, 0) + 1

actual_max_layer = max(layer_counts.keys(), key=lambda x: layer_counts[x])
actual_max_count = layer_counts[actual_max_layer]
actual_early_count = sum(layer_counts.get(i, 0) for i in range(4))
actual_late_count = sum(layer_counts.get(i, 0) for i in range(9, 12))

# Validate
assert actual_max_layer == expected_max_layer, f"Max layer mismatch: {actual_max_layer} != {expected_max_layer}"
assert actual_max_count == expected_max_count, f"Max count mismatch: {actual_max_count} != {expected_max_count}"
assert actual_early_count == expected_early_count, f"Early count mismatch: {actual_early_count} != {expected_early_count}"
assert actual_late_count == expected_late_count, f"Late count mismatch: {actual_late_count} != {expected_late_count}"

print("✓ All CQ2 checks passed!")


In [None]:
# Auto-check for CQ3: Attention Score Ranking Analysis
import numpy as np
print("=== CQ3 Validation ===")

# Expected values (with tolerance)
expected_mean_duplicate = 0.5479
expected_mean_s_inhibition = 0.4193
expected_mean_name_mover = 0.6697
expected_highest_type = "Name Mover"
expected_top_head = "a9.h9"
expected_top_score = 0.7998

# Calculate actuals
duplicate_token_scores = {"a3.h0": 0.7191, "a1.h11": 0.6613, "a0.h5": 0.6080, "a0.h1": 0.5152, "a0.h10": 0.2359}
s_inhibition_scores = {"a8.h6": 0.7441, "a7.h9": 0.5079, "a8.h10": 0.3037, "a8.h5": 0.2852, "a9.h7": 0.2557}
name_mover_scores = {"a9.h9": 0.7998, "a10.h7": 0.7829, "a9.h6": 0.7412, "a11.h10": 0.6369, "a10.h0": 0.3877}

actual_mean_duplicate = np.mean(list(duplicate_token_scores.values()))
actual_mean_s_inhibition = np.mean(list(s_inhibition_scores.values()))
actual_mean_name_mover = np.mean(list(name_mover_scores.values()))

means = {"Duplicate Token": actual_mean_duplicate, "S-Inhibition": actual_mean_s_inhibition, "Name Mover": actual_mean_name_mover}
actual_highest_type = max(means.keys(), key=lambda x: means[x])

all_heads = {**duplicate_token_scores, **s_inhibition_scores, **name_mover_scores}
actual_top_head = max(all_heads.keys(), key=lambda x: all_heads[x])
actual_top_score = all_heads[actual_top_head]

# Validate (with tolerance for floating point)
tolerance = 0.001
assert abs(actual_mean_duplicate - expected_mean_duplicate) < tolerance, f"Duplicate mean mismatch"
assert abs(actual_mean_s_inhibition - expected_mean_s_inhibition) < tolerance, f"S-inhibition mean mismatch"
assert abs(actual_mean_name_mover - expected_mean_name_mover) < tolerance, f"Name mover mean mismatch"
assert actual_highest_type == expected_highest_type, f"Highest type mismatch: {actual_highest_type} != {expected_highest_type}"
assert actual_top_head == expected_top_head, f"Top head mismatch: {actual_top_head} != {expected_top_head}"
assert abs(actual_top_score - expected_top_score) < tolerance, f"Top score mismatch"

print("✓ All CQ3 checks passed!")
