In [None]:
# === Environment Setup ===
import os, sys, math, time, random, json, textwrap, warnings, logging
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from IPython.display import Image
# line_profiler is a powerful tool for performance analysis.
# It needs to be installed: pip install line_profiler ipdb
%load_ext line_profiler

# --- Configuration ---
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams.update({'font.size': 12, 'figure.figsize': (8, 5), 'figure.dpi': 130})
np.set_printoptions(suppress=True, linewidth=120, precision=4)
# Configure basic logging to print to the console
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# --- Utility Functions ---
def note(msg, **kwargs):
    print(f"\n📝 {textwrap.fill(msg, width=100, subsequent_indent='   ')}", **kwargs)
def sec(title):
    print(f"\n{100*'='}\n| {title.upper()} |\n{100*'='}")

note("Environment initialized.")

# Part 1: Foundations
## Chapter 1.16: A Guide to Effective Debugging and Profiling

### Introduction: Debugging as a Scientific Process

An unspoken truth of computational work is that you will spend far more time debugging your code than writing it. A bug is simply a difference between what you *think* your code is doing and what it is *actually* doing. Therefore, **debugging is the process of closing that gap in understanding.** It is not a haphazard process of randomly changing things; it is a systematic, scientific investigation into the behavior of a program.

This chapter provides a guide to the tools and, more importantly, the mindset required for effective debugging. We will frame this process as a direct application of the scientific method:
1.  **Observe & Characterize:** The program fails. What is the exact failure? (Reading the traceback).
2.  **Isolate & Reproduce:** Create a minimal, reproducible example (MRE) that triggers the bug reliably.
3.  **Formulate a Hypothesis:** Based on the evidence, what do you think is causing the bug?
4.  **Experiment & Inspect:** Test your hypothesis by inspecting the program's state with a debugger or logging.
5.  **Fix & Verify:** Implement a fix and, crucially, write a test to confirm the fix and prevent regressions.

### The Case Study: A Buggy Asset Pricing Simulation

Throughout this chapter, we will investigate a function designed to simulate the price path of a simple binomial asset pricing model. The function is seeded with several bugs that we will uncover and fix.

In [None]:
def simulate_asset_price_buggy(p0, mu, sigma, T, dt, seed=None):
    """Simulates an asset price path using a simple binomial model.
    
    Args:
        p0 (float): Initial price.
        mu (float): Expected return (drift).
        sigma (float): Volatility.
        T (int): Total time periods.
        dt (float): Time step size.
        seed (int, optional): Random seed for reproducibility.
    """
    rng = np.random.default_rng(seed)
    n_steps = int(T / dt)
    price_path = np.zeros(n_steps)
    price_path[0] = p0
    
    # Bug 1: Off-by-one error in loop range
    for t in range(n_steps):
        # Bug 2: Incorrect random sampling (should be -1 or 1)
        z = rng.standard_normal(1) 
        # Bug 3: Incorrect formula for price update
        price_path[t+1] = price_path[t] * (mu*dt + sigma*z*np.sqrt(dt))
        
    return price_path

### Step 1: Observe and Characterize the Failure (The Traceback)
The first step is to run the code and observe the failure. The primary tool for this is the **traceback**.

A traceback is not an error message to be feared; it is the single most important piece of evidence you have. It tells you the *type* of error (`IndexError`), *where* it happened (line 22), and the sequence of function calls that led to it. **Always read a traceback from the bottom up.**

In [None]:
sec("Observation 1: The First Crash")
note("Calling the function with standard parameters...")
try:
    simulate_asset_price_buggy(p0=100, mu=0.05, sigma=0.2, T=1, dt=0.1, seed=42)
except IndexError as e:
    print(f"  --> Caught expected error: {e}")
    note("The traceback points to `price_path[t+1] = ...`. This is an `IndexError`, meaning we tried to access an index that doesn't exist. This suggests the loop runs one time too many.")

### Step 2: Formulate a Hypothesis and Experiment

- **Hypothesis 1:** The loop `for t in range(n_steps)` runs from `t = 0` to `t = n_steps - 1`. On the last iteration, `t+1` becomes `n_steps`, which is an invalid index for an array of size `n_steps` (valid indices are `0` to `n_steps - 1`).
- **Experiment:** The loop should only run up to the second-to-last element. We can fix this by changing the loop range to `range(n_steps - 1)`.
- **Tool:** Simple code modification.

In [None]:
def simulate_asset_price_v2(p0, mu, sigma, T, dt, seed=None):
    """Version 2: Fixed the loop range."""
    rng = np.random.default_rng(seed)
    n_steps = int(T / dt)
    price_path = np.zeros(n_steps)
    price_path[0] = p0
    
    # Fix 1: Corrected loop range
    for t in range(n_steps - 1):
        z = rng.standard_normal(1) 
        # The formula is still wrong!
        price_path[t+1] = price_path[t] * (mu*dt + sigma*z*np.sqrt(dt))
        
    return price_path

sec("Experiment 1: Testing the Fix")
path_v2 = simulate_asset_price_v2(p0=100, mu=0.05, sigma=0.2, T=1, dt=0.1, seed=42)
note("Function now runs without crashing. Let's inspect the output.")
print(path_v2)

### Step 3: Observe a New Failure (The Silent Bug)
The code no longer crashes, but is the result correct? A key part of debugging is verifying the output. The formula for a simple geometric Brownian motion price update is $P_{t+1} = P_t \cdot e^{(\mu - \frac{1}{2}\sigma^2)\Delta t + \sigma \sqrt{\Delta t} Z_t}$, where $Z_t \sim N(0,1)$. A simpler approximation is $P_{t+1} = P_t (1 + \mu \Delta t + \sigma \sqrt{\Delta t} Z_t)$. Our current formula is `price_path[t] * (mu*dt + sigma*z*np.sqrt(dt))`, which is missing the `1 + ...` term, causing the price to plummet towards zero.

- **Observation:** The prices are nonsensical, quickly becoming negative or near-zero. This is a "silent bug"—the code runs but produces the wrong answer.

### Step 4: Deeper Investigation (The Interactive Debugger)
When a result is incorrect, we need to inspect the program's internal state. The best tool for this is an **interactive debugger**.

- **Tool:** The `breakpoint()` function (Python 3.7+) drops you into a debugger session (`pdb`). For a better experience, install `ipdb` (`pip install ipdb`), and `breakpoint()` will use it automatically.
- **Hypothesis 2:** The term `(mu*dt + sigma*z*np.sqrt(dt))` is a small number, and repeatedly multiplying by it drives the price down.
- **Experiment:** Place `breakpoint()` inside the loop and inspect the variables.

**Essential Debugger Commands:**
- `p <expression>`: **Print** the value of an expression (e.g., `p price_path[t]`).
- `n`: Execute the **next** line.
- `s`: **Step** into a function call.
- `c`: **Continue** execution until the next breakpoint or the end.
- `l`: **List** source code around the current line.
- `q`: **Quit** the debugger.

To use the debugger, you would add `breakpoint()` to the code and run the cell. The execution will pause, and a `ipdb>` prompt will appear. You can then type commands to investigate. After confirming the formula is wrong, we can fix it.

In [None]:
def simulate_asset_price_v3(p0, mu, sigma, T, dt, seed=None):
    """Version 3: Corrected the price update formula."""
    rng = np.random.default_rng(seed)
    n_steps = int(T / dt)
    price_path = np.zeros(n_steps)
    price_path[0] = p0
    
    for t in range(n_steps - 1):
        z = rng.standard_normal(1)
        # Fix 2: Corrected the formula to include the '1 + ...' term.
        price_path[t+1] = price_path[t] * (1 + mu*dt + sigma*z*np.sqrt(dt))
        
    return price_path

sec("Experiment 2: Testing the Corrected Formula")
path_v3 = simulate_asset_price_v3(p0=100, mu=0.05, sigma=0.2, T=1, dt=0.01, seed=42)
note("The path now looks much more reasonable.")
plt.figure(figsize=(10, 4))
plt.plot(path_v3)
plt.title("Simulated Asset Price Path")
plt.xlabel("Time Steps")
plt.ylabel("Price")
plt.show()

### Step 5: Proactive Debugging: Assertions and Logging
After fixing the bugs, we should add defensive measures to catch future errors.

##### Assertions for Invariants
**Assertions** declare conditions (**invariants**) you believe must be true. If an assertion fails, it raises an `AssertionError`, stopping the program immediately. This is invaluable for catching bugs early and documenting your code's assumptions.

> **When to Assert:** Use assertions to check for programmer errors (e.g., a volatility parameter that is negative) or to validate internal state. Use `try/except` blocks to handle expected runtime errors (e.g., a file not found).

##### Logging for Diagnostics
The `logging` module is the professional standard for diagnostics. It allows you to log messages at different severity levels (`DEBUG`, `INFO`, `WARNING`, `ERROR`), control verbosity, and direct output to files.

In [None]:
def simulate_asset_price_final(p0, mu, sigma, T, dt, seed=None):
    """Final, robust version with assertions and logging."""
    logging.info(f"Starting simulation with p0={p0}, mu={mu}, sigma={sigma}")
    
    # Pre-condition assertions for valid economic parameters
    assert p0 > 0, "Initial price must be positive."
    assert sigma >= 0, "Volatility cannot be negative."
    assert T > 0 and dt > 0, "Time horizon and step must be positive."
    
    rng = np.random.default_rng(seed)
    n_steps = int(T / dt)
    price_path = np.zeros(n_steps + 1) # Correct size to hold n_steps + initial price
    price_path[0] = p0
    
    for t in range(n_steps):
        z = rng.standard_normal()
        price_path[t+1] = price_path[t] * (1 + mu*dt + sigma*z*np.sqrt(dt))
        # Post-condition: check for non-physical results like negative prices
        if price_path[t+1] <= 0:
            logging.warning(f"Price became non-positive at step {t+1}. Clamping to zero.")
            price_path[t+1] = 0
            break # Stop simulation if price hits zero
            
    logging.info("Simulation finished.")
    return price_path

sec("Final Verification")
final_path = simulate_asset_price_final(p0=100, mu=0.05, sigma=0.2, T=5, dt=0.01, seed=42)
note("Testing with invalid data (will raise AssertionError):")
try:
    simulate_asset_price_final(p0=100, mu=0.05, sigma=-0.1, T=1, dt=0.1)
except AssertionError as e:
    print(f"  Caught expected error: {e}")

### 6. Profiling for Performance
Once your code is correct, you may need to make it fast. **Profiling** is the process of measuring the execution time of different parts of your program to identify performance bottlenecks.

> **Donald Knuth's Dictum**: "Premature optimization is the root of all evil." Do not try to optimize code before you know it is correct and you have identified, through profiling, where the actual bottlenecks are.

**Tools for Profiling in Jupyter:**
- **`%timeit`**: A line magic for quickly measuring the execution time of a single statement.
- **`%prun`**: A line magic that runs code with the standard Python profiler (`cProfile`) and shows a detailed report of function calls, execution times, and number of calls.
- **`%lprun` (from `line_profiler`)**: A more granular profiler that measures the time spent on each individual line of code within a function. This is the best tool for pinpointing the exact line that is causing a bottleneck.


In [None]:
sec("Profiling a Vectorized vs. Loop-based Simulation")

def simulate_vectorized(p0, mu, sigma, T, dt, n_paths, seed=None):
    """A fully vectorized simulation, no Python loops."""
    rng = np.random.default_rng(seed)
    n_steps = int(T / dt)
    # Generate all random shocks at once
    z = rng.standard_normal((n_steps, n_paths))
    # Calculate all returns at once
    returns = 1 + mu*dt + sigma*z*np.sqrt(dt)
    # Cumulatively multiply to get the path
    price_paths = p0 * np.vstack([np.ones(n_paths), np.cumprod(returns, axis=0)])
    return price_paths

note("Profiling the vectorized implementation:")
%timeit simulate_vectorized(p0=100, mu=0.05, sigma=0.2, T=1, dt=0.01, n_paths=1000)

note("\nUsing %lprun to see the line-by-line cost:")
%lprun -f simulate_vectorized simulate_vectorized(p0=100, mu=0.05, sigma=0.2, T=1, dt=0.01, n_paths=1000)

### 7. Exercises

1.  **Read a Traceback:** Analyze the following code and the resulting traceback. What is the error type? On which line did it occur? What is the likely cause, and how would you fix it?
    ```python
    def calculate_portfolio_return(prices, weights):
        # prices is a (T x N) matrix, weights is a (N,) vector
        returns = prices.pct_change().dropna()
        port_return = returns @ weights
        return port_return
    # Traceback: ...
    # ValueError: shapes (99,5) and (99,5) not aligned: 5 (dim 1) != 99 (dim 0)
    ```

2.  **`ipdb` Practice:** Take the original `simulate_asset_price_buggy` function. Place a `breakpoint()` inside the loop. Call the function with `T=1, dt=0.5` (so `n_steps=2`). Use the debugger commands (`p t`, `p price_path`, `n`, `c`) to step through the function's two iterations and observe how the `price_path` array is updated, and confirm exactly when the `IndexError` occurs.

3.  **Find the Bug:** The following function is supposed to solve a household's consumption-savings problem for two periods, but it contains a logical bug. The utility should be higher in the second scenario. Use debugging techniques to find and fix the bug.
    ```python
    def solve_c_s(y1, y2, r, beta):
        # Solves for optimal c1, c2 given income y1, y2
        # Utility is log(c1) + beta*log(c2)
        # Bug is in this formula!
        c1 = (y1 + y2/r) / (1 + beta)
        c2 = (y1 + y2/r) * (1+r) / (1+beta) * beta
        return np.log(c1) + beta * np.log(c2)

    # Scenario 1: Smooth income
    u1 = solve_c_s(y1=100, y2=100, r=0.05, beta=0.95)
    # Scenario 2: Volatile income (but same total PV)
    u2 = solve_c_s(y1=50, y2=155, r=0.05, beta=0.95)
    print(f"Utility 1: {u1:.2f}, Utility 2: {u2:.2f}") # u2 should be higher!
    ```

4.  **Profiling:** Create two versions of a function that calculates the standard deviation of a large array: one that implements the formula $\sigma = \sqrt{\frac{1}{N}\sum_{i=1}^N (x_i - \mu)^2}$ manually using Python loops, and one that just calls `np.std()`. Use `%timeit` to show that the NumPy version is orders of magnitude faster. Then use `%lprun` on your manual version to see how much time is spent on each line.

### 8. Professional Code, Part II: Testing with Pytest

#### 8.1 Why Test Economic Models?

In computational economics, the correctness of our code is paramount. A small bug in an optimization routine, a misspecified boundary condition in a dynamic model, or a non-reproducible stochastic simulation can lead to incorrect economic conclusions, flawed policy recommendations, and non-replicable research. 

**Automated testing** is the practice of writing code that checks the correctness of other code. It provides a safety net that allows us to:
- **Verify Correctness:** Ensure our algorithms are implemented correctly according to theory.
- **Prevent Regressions:** Confidently refactor and improve code, knowing that our tests will catch any new bugs we might introduce.
- **Improve Code Design:** Writing testable code often leads to better, more modular designs.
- **Facilitate Collaboration and Replication:** Tests serve as a form of documentation and a guarantee to others that the code works as expected.

For a PhD student, a robust test suite is a crucial component of a reproducible research compendium. It provides strong evidence that the computational results of a paper are reliable.

#### 8.2 The Testing Mindset: Arrange, Act, Assert

A good test is simple, isolated, and follows the **Arrange-Act-Assert** pattern:

1.  **Arrange:** Set up the necessary preconditions and inputs. This might involve creating mock data, defining model parameters, or instantiating an object.
2.  **Act:** Execute the specific piece of code you want to test. This is typically a single function call or method invocation.
3.  **Assert:** Check that the outcome of the action is what you expected. If the actual outcome does not match the expected outcome, the test fails.

#### 8.3 Introduction to `pytest`

`pytest` is a mature, feature-rich, and easy-to-use testing framework for Python. It has become the de facto standard for Python testing due to its simple syntax and powerful features.

##### Installation and Basic Usage

Install `pytest` via pip:
```bash
pip install pytest
```

**Key Conventions:**
- Test files should be named `test_*.py` or `*_test.py`.
- Test functions inside these files must be named `test_*`.

`pytest` will automatically discover and run these tests. To run your tests, simply navigate to your project's root directory in the terminal and run the command:
```bash
pytest
```

#### 8.4 Practical Example 1: Testing a Simple Function

In [None]:
import pytest

def production_function(K, L, alpha=0.33):
    """Calculates output using a Cobb-Douglas production function: Y = K^alpha * L^(1-alpha)."""
    if K < 0 or L < 0:
        raise ValueError("Capital and Labor inputs must be non-negative.")
    return (K**alpha) * (L**(1-alpha))

def test_production_function_normal_inputs():
    # Arrange
    K, L, alpha = 100, 200, 0.33
    expected_output = (100**alpha) * (200**(1-alpha))
    # Act
    actual_output = production_function(K, L, alpha)
    # Assert
    assert np.isclose(actual_output, expected_output)

def test_production_function_zero_input():
    # Arrange, Act, Assert
    assert production_function(0, 100) == 0
    assert production_function(100, 0) == 0

def test_production_function_negative_input():
    # Arrange, Act, Assert
    with pytest.raises(ValueError):
        production_function(-1, 100)
    with pytest.raises(ValueError):
        production_function(100, -1)

# Simulate running the tests
test_production_function_normal_inputs()
test_production_function_zero_input()
test_production_function_negative_input()
print("Cobb-Douglas tests passed!")

#### 8.5 Practical Example 2: Testing Numerical Accuracy in an Optimizer

In [None]:
def simple_optimizer(objective_grad, initial_x, learning_rate=0.1, tol=1e-6, max_iter=1000):
    """A simple gradient descent optimizer."""
    x = initial_x
    for _ in range(max_iter):
        grad = objective_grad(x)
        if abs(grad) < tol:
            break
        x = x - learning_rate * grad
    return x

def objective_grad(x):
    """Gradient of the function f(x) = (x-5)^2, which is 2*(x-5)."""
    return 2 * (x - 5)

def test_optimizer_finds_minimum():
    # Arrange
    initial_x = 0.0
    known_minimum = 5.0
    
    # Act
    found_minimum = simple_optimizer(objective_grad, initial_x)
    
    # Assert
    # We use pytest.approx to handle floating point comparisons
    assert found_minimum == pytest.approx(known_minimum, abs=1e-5)

# Simulate running the test
test_optimizer_finds_minimum()
print("Optimizer test passed!")