In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

import torch
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
from typing import Dict, List, Tuple, Optional

In [2]:
def get_device():
    # if torch.backends.mps.is_available():
    #     return torch.device("mps")
    if torch.cuda.is_available():
        return torch.device("cuda")
    else:
        return torch.device("cpu")

In [3]:
def load_model():
    device = get_device()
    tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Llama-8B")
    model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
                                                 torch_dtype=torch.bfloat16,
                                                 device_map=device)
    model.eval()
    return tokenizer, model

tokenizer, model = load_model()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
def prepare_prompt(prompt, tokenizer):
    # Apply the chat template to format the prompt properly
    messages = [
        {"role": "user", "content": prompt}
    ]

    # Format the prompt using the chat template
    formatted_prompt = tokenizer.apply_chat_template(
        messages, 
        tokenize=False,
        add_generation_prompt=True
    )

    return formatted_prompt

In [41]:
def generate_text(model, tokenizer, inputs, max_length=10000):
    # Generate text
    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_length=max_length,
            num_return_sequences=1,
            temperature=0.7,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
            return_dict_in_generate=True,
            output_scores=True,
            return_legacy_cache=True 
        )
    
    # Get the generated token IDs
    generated_ids = output.sequences[0]
    
    # Decode the generated text
    generated_text = tokenizer.decode(generated_ids, skip_special_tokens=True)
    
    return generated_text

In [46]:


# Simplified version for easier debugging
def simple_patching_experiment(clean_prompt, corrupted_prompt, layer_to_patch=None):
    """
    Simplified function to patch a single layer for debugging.
    """
    device = get_device()
    
    print("Loading model...")
    tokenizer, model = load_model()
    model = model.to(device)
    
    print("Preparing prompts...")
    clean_inputs = prepare_prompt(clean_prompt, tokenizer)
    corrupted_inputs = prepare_prompt(corrupted_prompt, tokenizer)
    
    # Move inputs to the appropriate device
    clean_inputs = {k: v.to(device) for k, v in clean_inputs.items()}
    corrupted_inputs = {k: v.to(device) for k, v in corrupted_inputs.items()}
    
    print("Generating from clean prompt (baseline)...")
    clean_generation = generate_text(model, tokenizer, clean_inputs)
    
    print("Generating from corrupted prompt (baseline)...")
    corrupted_generation = generate_text(model, tokenizer, corrupted_inputs)
    
    print("\n----- RESULTS -----")
    print("CLEAN PROMPT GENERATION:")
    print(clean_generation)
    print("\nCORRUPTED PROMPT GENERATION:")
    print(corrupted_generation)
    
    # If no layer to patch, we're done with baseline
    if layer_to_patch is None:
        return
    
    print(f"\nPatching layer {layer_to_patch}...")
    
    # Collect clean activations
    clean_activations = {}
    
    def save_clean_activation(module, input, output, layer_idx):
        # For Llama, output is a tuple where first element is hidden state
        if isinstance(output, tuple):
            clean_activations[layer_idx] = output[0].detach().clone()
        else:
            clean_activations[layer_idx] = output.detach().clone()
        return output
    
    # Register clean hook
    print("Collecting clean activations...")
    clean_hook = model.model.layers[layer_to_patch].register_forward_hook(
        lambda mod, inp, out: save_clean_activation(mod, inp, out, layer_to_patch)
    )
    
    # Run forward pass with clean prompt
    with torch.no_grad():
        model(**clean_inputs)
    
    # Remove clean hook
    clean_hook.remove()
    
    # Define patching hook
    def patch_hook(module, input, output):
        if isinstance(output, tuple):
            hidden_state = output[0]
            other = output[1:]
            
            # Patch with clean activations
            clean_activation = clean_activations[layer_to_patch]
            min_len = min(clean_activation.size(1), hidden_state.size(1))
            
            patched_hidden = hidden_state.clone()
            patched_hidden[:, :min_len, :] = clean_activation[:, :min_len, :]
            
            # Return new tuple with patched hidden state
            return (patched_hidden,) + other
        else:
            # Unlikely case, but handle for completeness
            clean_activation = clean_activations[layer_to_patch]
            min_len = min(clean_activation.size(1), output.size(1))
            
            patched_output = output.clone()
            patched_output[:, :min_len, :] = clean_activation[:, :min_len, :]
            
            return patched_output
    
    # Register patching hook
    print("Setting up patching hook...")
    patch_hook_handle = model.model.layers[layer_to_patch].register_forward_hook(patch_hook)
    
    # Generate with patching
    print("Generating with patching...")
    patched_generation = generate_text(model, tokenizer, corrupted_inputs)
    
    # Remove patching hook
    patch_hook_handle.remove()
    
    # Print patched result
    print(f"\nPATCHED GENERATION (LAYER {layer_to_patch}):")
    print(patched_generation)

In [47]:
# Example usage
if __name__ == "__main__":
    clean_prompt = "Problem: X+Y=10, X-Y=4. Hint: Add equations to get 2X=14, X=7, then Y=3. Answer:"
    corrupted_prompt = "Problem: X+Y=10, X-Y=4. Answer:"
    
    # Run baseline only (no patching)
    # simple_patching_experiment(clean_prompt, corrupted_prompt, None)
    
    # Run with patching layer 20
    simple_patching_experiment(clean_prompt, corrupted_prompt, 20)

Loading model...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Preparing prompts...
Generating from clean prompt (baseline)...
Generating from corrupted prompt (baseline)...

----- RESULTS -----
CLEAN PROMPT GENERATION:
<｜User｜>Problem: X+Y=10, X-Y=4. Hint: Add equations to get 2X=14, X=7, then Y=3. Answer: X=7, Y=3.

But what if instead, I tried to subtract the equations: X+Y=10, (X-Y)=4. If I subtract (X-Y) from (X+Y), I get 2Y=6, so Y=3, then X=10-3=7.

Wait, so both methods give the same answer. Hmm. So, maybe subtracting the equations isn't really different from adding them. Is that always true?

Wait, let me test another example. Let's say I have two equations: 3A + 2B = 15 and 4A - B = 10. If I subtract the second equation from the first, I get (3A + 2B) - (4A - B) = 15 -10, which is -A + 3B =5. But if I add them, I get 7A + 3B =25. So, subtracting gives a different result than adding. So, in that case, subtracting equations can lead to a different equation. So, why in the first problem, subtracting gave the same answer as adding?

In the o

KeyboardInterrupt: 

In [13]:
def get_residual_stream(model, tokenizer, prompt, layer_idx, token_position=None):
    """
    Extract the residual stream for a specific layer and token position in a prompt.
    
    Args:
        model: The language model (DeepSeek-R1-Distill-Llama-8B)
        tokenizer: The tokenizer for the model
        prompt: The input text prompt
        layer_idx: The specific layer to extract residuals from
        token_position: The specific token position to extract residuals for.
                       If None, returns residuals for all tokens.
    
    Returns:
        Tensor containing the residual stream for the specified layer
    """
    residual_stream = None
    
    def hook_fn(module, input, output):
        nonlocal residual_stream
        # For DeepSeek models, the residual stream is the first element of the input tuple
        if isinstance(input, tuple):
            hidden_states = input[0]
        else:
            hidden_states = input
            
        # Store the residual stream for this layer
        if token_position is not None:
            # Extract only the specified token position
            residual_stream = hidden_states[:, token_position, :].detach().clone()
        else:
            # Store the full sequence
            residual_stream = hidden_states.detach().clone()
    
    # Register hook only for the specified layer
    hook = model.model.layers[layer_idx].register_forward_hook(hook_fn)
    
    # Process the input through the model
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        model(**inputs)
    
    # Remove the hook
    hook.remove()
    
    return residual_stream


In [38]:
# Example usage:


prompt = "Problem: X+Y=10, X-Y=4. Answers"
prompt = prepare_prompt(prompt, tokenizer)
print(prompt)

residuals = get_residual_stream(model=model, tokenizer=tokenizer, prompt=prompt, layer_idx=15, token_position=-2)  # Get residual for last token


<｜begin▁of▁sentence｜><｜User｜>Problem: X+Y=10, X-Y=4. Answers<｜Assistant｜><think>



In [5]:
import torch
import matplotlib.pyplot as plt
import numpy as np
from typing import Dict, List, Any, Tuple, Optional

device = get_device()

def activation_patching(
    clean_prompt: str,
    corrupted_prompt: str,
    model,
    tokenizer,
    max_new_tokens: int = 100,
    layers_to_patch: Optional[List[int]] = None,
):
    """
    Perform activation patching analysis to understand where model reasoning changes
    between a clean prompt and a corrupted prompt (with additional information like a hint).
    
    Args:
        clean_prompt: The base prompt without additional information
        corrupted_prompt: The prompt with additional information (e.g., a hint)
        model: The language model to analyze
        tokenizer: The tokenizer for the model
        max_new_tokens: Maximum tokens to generate
        device: Device to run the model on
        layers_to_patch: Specific layers to patch (if None, patch all layers)
        
    Returns:
        Dictionary with all patching results and analysis
    """
    # Format the prompts if chat template is available
    if hasattr(tokenizer, 'apply_chat_template'):
        formatted_clean = prepare_prompt(clean_prompt, tokenizer)
        formatted_corrupted = prepare_prompt(corrupted_prompt, tokenizer)
    else:
        formatted_clean = clean_prompt
        formatted_corrupted = corrupted_prompt
    
    print(f"Clean prompt: {formatted_clean}")
    print(f"Corrupted prompt: {formatted_corrupted}")
    
    # Generate outputs for both prompts
    print("Generating outputs for both prompts...")
    clean_output = generate_text(model, tokenizer, formatted_clean, max_new_tokens)
    corrupted_output = generate_text(model, tokenizer, formatted_corrupted, max_new_tokens)
    
    print(f"Clean output: {clean_output['generated_text']}")
    print(f"Corrupted output: {corrupted_output['generated_text']}")
    
    # Extract residual streams from both prompts
    print("Extracting residual streams...")
    clean_residuals = extract_all_residual_streams(model, tokenizer, formatted_clean)
    corrupted_residuals = extract_all_residual_streams(model, tokenizer, formatted_corrupted)
    
    # Determine which layers to patch
    num_layers = len(model.model.layers)
    if layers_to_patch is None:
        layers_to_patch = list(range(num_layers))
    
    # Perform patching at each specified layer
    print("Running patching experiments...")
    patching_results = []
    
    for layer_idx in layers_to_patch:
        print(f"Patching at layer {layer_idx}...")
        patched_output = patch_and_generate(
            model,
            tokenizer,
            formatted_clean,
            layer_idx,
            clean_residuals,
            corrupted_residuals,
            max_new_tokens
        )
        
        # Calculate similarity metrics
        clean_sim = calculate_similarity(patched_output['generated_text'], clean_output['generated_text'])
        corrupted_sim = calculate_similarity(patched_output['generated_text'], corrupted_output['generated_text'])
        
        patching_results.append({
            'layer': layer_idx,
            'patched_output': patched_output,
            'clean_similarity': clean_sim,
            'corrupted_similarity': corrupted_sim
        })
    
    # Return comprehensive results
    return {
        'clean_prompt': clean_prompt,
        'corrupted_prompt': corrupted_prompt,
        'clean_output': clean_output,
        'corrupted_output': corrupted_output,
        'patching_results': patching_results,
        'num_layers': num_layers
    }


def extract_all_residual_streams(model, tokenizer, prompt):
    """
    Extract residual streams from all layers for a given prompt.
    
    Returns:
        Dictionary mapping layer indices to residual streams
    """
    # Tokenize the prompt
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    # Dictionary to store residual streams
    residual_streams = {}
    
    # Define hook function to capture residual streams
    def create_hook(layer_idx):
        def hook_fn(module, input_tensors, output):
            # Extract the residual stream (input hidden states)
            if isinstance(input_tensors, tuple):
                hidden_states = input_tensors[0]
            else:
                hidden_states = input_tensors
            
            # Store a copy
            residual_streams[layer_idx] = hidden_states.detach().clone()
        
        return hook_fn
    
    # Register hooks for all layers
    hooks = []
    for idx, layer in enumerate(model.model.layers):
        hook = layer.register_forward_hook(create_hook(idx))
        hooks.append(hook)
    
    # Process input through the model
    with torch.no_grad():
        model(**inputs)
    
    # Remove all hooks
    for hook in hooks:
        hook.remove()
    
    return residual_streams


def patch_and_generate(
    model, 
    tokenizer, 
    prompt, 
    layer_idx, 
    clean_residuals, 
    corrupted_residuals, 
    max_new_tokens=100
):
    """
    Generate text with the residual stream at a specific layer
    patched from the corrupted prompt residual stream.
    
    This optimized implementation patches only the prompt tokens once
    and removes the hook after patching to improve performance.
    """
    # Tokenize the prompt
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    input_ids = inputs['input_ids']
    
    # Initialize with the input tokens
    generated_ids = input_ids.clone()
    
    # Get the corrupted residual stream for this layer
    corrupted_residual = corrupted_residuals[layer_idx]
    
    # Original prompt length
    prompt_length = input_ids.shape[1]
    
    # Hook reference for removal
    hook_ref = [None]
    
    # Define patching hook that removes itself after patching
    def patching_hook(module, input_tensors):
        # Get current hidden states
        if isinstance(input_tensors, tuple):
            hidden_states = input_tensors[0]
        else:
            hidden_states = input_tensors
        
        # Create patched hidden states
        patched_states = hidden_states.clone()
        
        # Determine how much to patch
        min_seq_len = min(
            prompt_length,
            corrupted_residual.shape[1],
            patched_states.shape[1]
        )
        
        # Patch the prompt portion
        patched_states[:, :min_seq_len, :] = corrupted_residual[:, :min_seq_len, :]
        
        # Remove the hook now that patching is complete
        if hook_ref[0] is not None:
            hook_ref[0].remove()
            hook_ref[0] = None  # Indicate hook is removed
        
        # Return patched hidden states
        if isinstance(input_tensors, tuple):
            return (patched_states,) + input_tensors[1:]
        else:
            return patched_states
    
    # Register the hook
    hook = model.model.layers[layer_idx].register_forward_pre_hook(patching_hook)
    hook_ref[0] = hook
    
    try:
        # Generate tokens one by one
        with torch.no_grad():
            for _ in range(max_new_tokens):
                # Create attention mask
                attention_mask = torch.ones((1, generated_ids.shape[1]), device=model.device)
                
                # Forward pass through the model
                outputs = model(
                    input_ids=generated_ids,
                    attention_mask=attention_mask
                )
                
                # Get the next token prediction
                next_token_logits = outputs.logits[0, -1, :]
                next_token = torch.argmax(next_token_logits, dim=-1).unsqueeze(0).unsqueeze(0)
                
                # Append the token to the sequence
                generated_ids = torch.cat([generated_ids, next_token], dim=1)
                
                # Check if EOS token was generated
                if next_token.item() == tokenizer.eos_token_id:
                    break
    finally:
        # Make sure the hook is removed if it hasn't been already
        if hook_ref[0] is not None:
            hook_ref[0].remove()
    
    # Decode the full output
    full_output = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    
    # Extract just the generated part
    input_length = input_ids.shape[1]
    generated_tokens = generated_ids[0, input_length:]
    generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)
    
    return {
        'full_output': full_output,
        'generated_text': generated_text,
        'tokens': generated_ids[0].tolist()
    }


def generate_text(model, tokenizer, prompt, max_new_tokens=100):
    """Generate text without patching"""
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            pad_token_id=tokenizer.eos_token_id
        )
    
    # Decode the full output
    full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Extract just the generated part (without the prompt)
    input_length = inputs['input_ids'].shape[1]
    generated_tokens = outputs[0, input_length:]
    generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)
    
    return {
        'full_output': full_output,
        'generated_text': generated_text,
        'tokens': outputs[0].tolist()
    }


def calculate_similarity(text1, text2):
    """Calculate text similarity using word overlap (Jaccard similarity)"""
    words1 = set(text1.lower().split())
    words2 = set(text2.lower().split())
    
    intersection = len(words1.intersection(words2))
    union = len(words1.union(words2))
    
    if union == 0:
        return 0.0
    
    return intersection / union


def visualize_patching_results(results, save_path=None):
    """
    Visualize the effect of patching at different layers.
    
    Args:
        results: The results dictionary from activation_patching
        save_path: Optional path to save the figure
    """
    # Extract data for plotting
    layers = [r['layer'] for r in results['patching_results']]
    clean_sim = [r['clean_similarity'] for r in results['patching_results']]
    corrupted_sim = [r['corrupted_similarity'] for r in results['patching_results']]
    
    # Create plot
    plt.figure(figsize=(12, 6))
    
    plt.plot(layers, clean_sim, 'b-', label='Similarity to Clean Output', marker='o')
    plt.plot(layers, corrupted_sim, 'r-', label='Similarity to Corrupted Output', marker='x')
    
    plt.title('Effect of Residual Stream Patching Across Model Layers')
    plt.xlabel('Layer Index')
    plt.ylabel('Output Similarity')
    plt.legend()
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.xticks(np.arange(0, max(layers)+1, 2))
    plt.ylim(0, 1.05)
    
    # Annotate significant changes
    for i in range(1, len(corrupted_sim)):
        change = corrupted_sim[i] - corrupted_sim[i-1]
        if abs(change) > 0.1:  # Threshold for significant change
            plt.annotate(
                f'Δ={change:.2f}',
                xy=(layers[i], corrupted_sim[i]),
                xytext=(layers[i], corrupted_sim[i] + 0.05),
                arrowprops=dict(arrowstyle='->', lw=1.5),
                ha='center'
            )
    
    plt.tight_layout()
    
    # Save if requested
    if save_path:
        plt.savefig(save_path)
    
    plt.show()
    
    # Print summary
    print("\nPatching Analysis Summary:")
    print(f"Clean prompt: {results['clean_prompt']}")
    print(f"Clean output: {results['clean_output']['generated_text']}")
    print(f"Corrupted prompt: {results['corrupted_prompt']}")
    print(f"Corrupted output: {results['corrupted_output']['generated_text']}")
    
    # Find layer with maximum effect
    max_effect_idx = np.argmax(corrupted_sim)
    max_layer = results['patching_results'][max_effect_idx]['layer']
    max_output = results['patching_results'][max_effect_idx]['patched_output']['generated_text']
    print(f"\nLayer with maximum effect: {max_layer}")
    print(f"Output at maximum effect layer: {max_output}")
    
    # Find layers with significant jumps
    significant_layers = []
    for i in range(1, len(corrupted_sim)):
        change = corrupted_sim[i] - corrupted_sim[i-1]
        if abs(change) > 0.1:  # Threshold for significant change
            significant_layers.append((layers[i], change))
    
    if significant_layers:
        print("\nLayers with significant changes in output:")
        for layer, change in significant_layers:
            direction = "toward corrupted output" if change > 0 else "toward clean output"
            print(f"Layer {layer}: Change of {change:.2f} {direction}")
            
    # Find layer where reasoning "flips" (midpoint between clean and corrupted)
    flip_distances = [abs(clean - corrupted) for clean, corrupted in zip(clean_sim, corrupted_sim)]
    flip_layer_idx = np.argmin(flip_distances)
    flip_layer = layers[flip_layer_idx]
    flip_output = results['patching_results'][flip_layer_idx]['patched_output']['generated_text']
    print(f"\nLayer where output is most balanced between clean/corrupted: {flip_layer}")
    print(f"Output at balance point: {flip_output}")


def prepare_prompt(prompt, tokenizer):
    """
    Format the prompt using the chat template if available
    
    Args:
        prompt: The raw prompt text
        tokenizer: The tokenizer for the model
    
    Returns:
        Formatted prompt ready for model input
    """
    # Apply the chat template to format the prompt properly
    messages = [
        {"role": "user", "content": prompt}
    ]
    
    # Format the prompt using the chat template if available
    if hasattr(tokenizer, 'apply_chat_template'):
        formatted_prompt = tokenizer.apply_chat_template(
            messages, 
            tokenize=False,
            add_generation_prompt=True
        )
    else:
        # Fallback for tokenizers without chat templates
        formatted_prompt = prompt
        
    return formatted_prompt





# # Additional utility function for easy patching across multiple examples
# def patch_multiple_examples(examples, model, tokenizer, max_new_tokens=100, save_dir="results"):
#     """
#     Run activation patching on multiple example pairs
    
#     Args:
#         examples: List of (clean_prompt, corrupted_prompt) tuples
#         model: The model to analyze
#         tokenizer: The tokenizer for the model
#         max_new_tokens: Maximum tokens to generate
#         save_dir: Directory to save visualization results
    
#     Returns:
#         List of results dictionaries
#     """
#     import os
#     os.makedirs(save_dir, exist_ok=True)
    
#     all_results = []
    
#     for i, (clean_prompt, corrupted_prompt) in enumerate(examples):
#         print(f"\n\nExample {i+1}/{len(examples)}")
#         print("=" * 80)
        
#         # Run activation patching
#         results = activation_patching(
#             clean_prompt=clean_prompt,
#             corrupted_prompt=corrupted_prompt,
#             model=model,
#             tokenizer=tokenizer,
#             max_new_tokens=max_new_tokens
#         )
        
#         # Save visualization
#         save_path = os.path.join(save_dir, f"example_{i+1}_patching.png")
#         visualize_patching_results(results, save_path=save_path)
        
#         all_results.append(results)
    
#     return all_results


# if __name__ == "__main__":
#     example_patching_analysis()

In [8]:
# Example usage
def example_patching_analysis(model, tokenizer):
    """Example demonstrating how to use the activation patching framework"""

    tokenizer = tokenizer
    model = model
    
    # Define clean and corrupted prompts
    clean_prompt = "Solve the quadratic equation: 3x^2 - 12x + 9 = 0"
    corrupted_prompt = "Solve the quadratic equation: 3x^2 - 12x + 9 = 0 \n Answer:"
    
    # Run activation patching
    results = activation_patching(
        clean_prompt=clean_prompt,
        corrupted_prompt=corrupted_prompt,
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=200,
        layers_to_patch=[21]      # Patch every other layer to save time
    )
    
    # Visualize results
    visualize_patching_results(results, save_path="patching_results.png")
    
    return results


In [9]:
example_patching_analysis(model, tokenizer)

Clean prompt: <｜begin▁of▁sentence｜><｜User｜>Solve the quadratic equation: 3x^2 - 12x + 9 = 0<｜Assistant｜><think>

Corrupted prompt: <｜begin▁of▁sentence｜><｜User｜>Solve the quadratic equation: 3x^2 - 12x + 9 = 0 
 Answer:<｜Assistant｜><think>

Generating outputs for both prompts...
Clean output: I start by examining the quadratic equation \(3x^2 - 12x + 9 = 0\). I notice that all the coefficients are divisible by 3, so I simplify the equation by dividing each term by 3, which gives me \(x^2 - 4x + 3 = 0\).

Next, I identify the coefficients: \(a = 1\), \(b = -4\), and \(c = 3\). I calculate the discriminant using the formula \(\Delta = b^2 - 4ac\), which equals \((-4)^2 - 4(1)(3) = 16 - 12 = 4\).

Since the discriminant is a perfect square, I know there are two real solutions. I apply the quadratic formula: \(x = \frac{-b \pm \sqrt{\Delta}}{2a}\). Substituting the values, I get \(x = \frac{
Corrupted output: To solve the quadratic equation \(3x^2 - 12x + 9 = 0\), I'll start by identifying 

KeyboardInterrupt: 