In [None]:
# === Environment Setup ===
import os, sys, math, time, random, json, textwrap, warnings, timeit
import numpy as np
import numpy.linalg as linalg
from numpy.lib.stride_tricks import as_strided
import matplotlib.pyplot as plt
from scipy import stats

# --- Configuration ---
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams["figure.dpi"] = 130
np.set_printoptions(suppress=True, linewidth=120, precision=4)

# --- Utility Functions ---
def note(msg, **kwargs):
    """Prints a formatted message with a notebook icon."""
    formatted_msg = textwrap.fill(msg, width=100, subsequent_indent='   ')
    print(f"\n📝 {formatted_msg}", **kwargs)
def sec(title):
    """Prints a formatted section title for code blocks."""
    print(f"\n{100*'='}\n| {title.upper()} |\n{100*'='}")

note(f"Environment initialized. NumPy version: {np.__version__}")

# Part 1: Foundations
## Chapter 1.12: NumPy: High-Performance Array Computing

### Introduction: The Bedrock of the Scientific Python Ecosystem

NumPy (Numerical Python) is the fundamental package for scientific computing in Python. It provides a high-performance, multidimensional array object (`ndarray`) and a vast library of tools for working with these arrays. It forms the core of the **Python Scientific Stack**, a collection of essential libraries for data analysis and modeling that also includes `SciPy` (for common scientific operations), `pandas` (for data manipulation), and `Matplotlib` (for plotting).

The reason for NumPy's centrality is its stunning performance. Operations on NumPy arrays are orders of magnitude faster than equivalent operations on native Python lists. This is not a minor optimization; it is a fundamental capability that makes Python viable for computationally intensive work. This performance stems from its core data structure, the `ndarray`, which allows NumPy to execute highly optimized, pre-compiled C and Fortran code under the hood—a concept known as **vectorization**.

In [None]:
sec("Performance: NumPy Array vs. Python List")

N = 1_000_000
py_list = [random.random() for _ in range(N)]
np_array = np.array(py_list)

note("Timing the summation of one million random floats:")
py_time = timeit.timeit('sum(py_list)', globals=globals(), number=100)
np_time = timeit.timeit('np.sum(np_array)', globals=globals(), number=100)
print(f"Python list sum: {py_time:.6f}s")
print(f"NumPy array sum: {np_time:.6f}s")
note(f"NumPy is approximately {py_time / np_time:.1f}x faster.")

### 1. The `ndarray`: Data Types, Memory, and Strides
A NumPy `ndarray` is a sophisticated object that consists of two main parts:
1.  A pointer to a contiguous block of memory holding the raw data values (e.g., 8-byte C floats).
2.  Metadata describing how to interpret the data, including its `dtype` (data type), `shape` (dimensions), and **`strides`** (the number of bytes to step in each dimension to move to the next element).

This is fundamentally different from a Python `list`, which is a collection of pointers to individual Python objects scattered throughout memory. This dense, typed, strided layout is the key to NumPy's performance and flexibility.

#### 1.1 Data Types (`dtype`)
Unlike Python's flexible integer and float types, NumPy requires data to have a specific, fixed-size type. This is inherited from its C implementation and is crucial for performance. You can specify the `dtype` when creating an array. Choosing the right `dtype` can significantly reduce memory consumption, e.g., using `float32` instead of the default `float64` for large machine learning models.

**Common Data Types:**
- `int8`, `uint8`: 8-bit signed/unsigned integers (-128 to 127 / 0 to 255)
- `int32`, `uint32`: 32-bit signed/unsigned integers
- `float32`: 32-bit single-precision float
- `float64`: 64-bit double-precision float (the Python default)
- `bool`: Boolean type storing `True` or `False`

#### 1.2 Integer Overflow: A Critical Pitfall
Because NumPy integers have a fixed size, they can **overflow**. For example, an 8-bit unsigned integer (`uint8`) can only hold values from 0 to 255. Adding 1 to 255 will wrap around to 0 without raising an error. This is a critical pitfall to be aware of when working with fixed-precision integers, especially in cumulative simulations.

In [None]:
sec("Integer Overflow Example")
x = np.array([255], dtype='uint8')
print(f"Original value: {x[0]}")
x[0] += 1 # 255 + 1 wraps around to 0
print(f"Value after adding 1: {x[0]}  <-- Overflow! No error was raised.")

#### 1.3 Strides: The Key to Efficient Views
The `strides` attribute of an array is a tuple indicating the number of bytes to step in each dimension to advance to the next element. Understanding strides is the key to understanding why many NumPy operations (like slicing and transposing) are incredibly fast and memory-efficient.

These operations often do not copy the underlying data buffer. Instead, they create a new `ndarray` object (a **view**) with a different shape or strides metadata that points to the *same* memory block. This means creating a slice or a transpose is nearly instantaneous, regardless of the array's size.

In [None]:
sec("Understanding Strides")
x = np.arange(12, dtype=np.int32).reshape(3, 4)
print(f"Array x:\n{x}\n")
note(f"The data type is 'int32', which means each element takes 4 bytes.")
print(f"Shape of x: {x.shape}")
print(f"Strides of x: {x.strides}")
note("To move to the next row, we must step 16 bytes (4 elements * 4 bytes/element). To move to the next column, we must step 4 bytes (1 element * 4 bytes/element).")

note("Creating a transpose of x creates a VIEW with swapped strides:")
y = x.T
print(f"Array y = x.T:\n{y}\n")
print(f"Shape of y: {y.shape}")
print(f"Strides of y: {y.strides} <-- The strides are swapped!")
print(f"x and y share the same memory buffer: {np.shares_memory(x, y)}")

note("Modifying the view y will also modify the original array x:")
y[0, 0] = 99
print(f"Array x after modifying y:\n{x}")

#### 1.4 Views vs. Copies: A Critical Distinction
Knowing whether an operation returns a view or a copy is crucial for avoiding bugs.
- **Operations that create views (share memory):** Slicing, transposing (`.T`), reshaping (`.reshape`), `np.asarray()` on a NumPy array.
- **Operations that create copies (own memory):** Indexing with integer arrays (fancy indexing) or boolean arrays, arithmetic operations (`+`, `*`, etc.), most ufuncs, `.copy()`, `np.array()`.

When in doubt, you can check with `np.shares_memory(arr1, arr2)`. If you explicitly need a copy, always use the `.copy()` method.

### 2. Array Creation and Manipulation

#### 2.1 Array Creation Functions
NumPy provides several functions for creating arrays from scratch. It's also important to distinguish between `np.array` (which always makes a copy of its input) and `np.asarray` (which avoids a copy if the input is already a NumPy array of the correct dtype).

In [None]:
sec("Array Creation Functions")
a = np.zeros(5)          # Array of five zeros
b = np.ones((2, 3))      # 2x3 array of ones
c = np.full((2, 4), 7.0) # 2x4 array filled with the number 7.0
d = np.eye(4)            # 4x4 identity matrix
e = np.arange(0, 10, 2)  # Start, stop, step
f = np.linspace(0, 1, 6) # 6 points evenly spaced between 0 and 1 (inclusive)

print(f"np.zeros(5):\n{a}")
print(f"\nnp.ones((2, 3)):\n{b}")
print(f"\nnp.arange(0, 10, 2):\n{e}")
print(f"\nnp.linspace(0, 1, 6):\n{f}")

#### 2.2 Modern Random Number Generation
For any simulation or Monte Carlo work, robust random number generation is essential. The modern approach (NumPy 1.17+) is to use a dedicated **Generator** instance, created via `np.random.default_rng()`.

This is preferred over the legacy functions (`np.random.rand()`, `np.random.randn()`) because it provides better statistical properties and makes it much easier to manage reproducibility, especially in parallel computations. You create a generator and then call methods on it.

In [None]:
sec("Modern Random Number Generation")
rng = np.random.default_rng(seed=42) # Create a generator with a specific seed for reproducibility

note("Draw 5 standard normal random numbers:")
print(rng.standard_normal(5))

note("Draw a 2x3 array of integers between 10 and 20 (exclusive of 20):")
print(rng.integers(low=10, high=20, size=(2, 3)))

note("Draw 3x2 samples from a multivariate normal distribution:")
mean = [0, 0]
cov = [[1, 0.5], [0.5, 1]] # Covariance matrix
print(rng.multivariate_normal(mean, cov, size=3))

### 3. Advanced Indexing
Beyond simple slicing, NumPy offers powerful ways to select subsets of data. These methods, known as **advanced indexing**, always return a *copy* of the data, not a view.

#### 3.1 Boolean Indexing
You can use a boolean array of the same shape as the original array to select elements where the boolean array is `True`. This is extremely powerful for filtering data based on a condition.

In [None]:
sec("Boolean Indexing for Economic Data")
rng = np.random.default_rng(seed=123)
# Data: [Country_ID, GDP_Growth, Inflation]
data = np.array([
    [1, 2.5, 1.8],
    [2, -0.5, 0.5],
    [3, 3.1, 2.5],
    [4, 5.5, 4.0],
    [5, 1.0, 3.5]
])

note("Find all countries experiencing a recession (negative GDP growth):")
gdp_growth = data[:, 1]
is_recession = gdp_growth < 0
print(f"Boolean mask for recession: {is_recession}")
recession_countries = data[is_recession]
print(f"Recessionary countries:\n{recession_countries}")

note("\nFind all countries with inflation above 3%:")
high_inflation_mask = data[:, 2] > 3.0
high_inflation_countries = data[high_inflation_mask]
print(f"High inflation countries:\n{high_inflation_countries}")

#### 3.2 Integer Array Indexing ("Fancy" Indexing)
You can use arrays of integers to select specific rows, columns, or elements. This allows for highly flexible data selection.

In [None]:
sec("Fancy Indexing")
x = np.arange(10, 20)
print(f"Original array: {x}")
indices = [0, 2, 8]
print(f"Selecting elements at indices {indices}: {x[indices]}")

X = np.arange(12).reshape(4, 3)
print(f"\nOriginal 2D array:\n{X}")
row_indices = [0, 2, 3]
print(f"Selecting rows {row_indices}:\n{X[row_indices]}")

col_indices = [0, 2]
print(f"\nSelecting rows {row_indices} and columns {col_indices} (produces a sub-matrix):\n{X[np.ix_(row_indices, col_indices)]}")

### 4. Broadcasting and Universal Functions (Ufuncs)

#### 4.1 Broadcasting
Broadcasting describes how NumPy treats arrays with different shapes during arithmetic operations. It's a powerful mechanism that avoids making explicit copies of data.

**Broadcasting Rules:** Two dimensions are compatible if they are equal, or if one of them is 1. NumPy compares shapes element-wise from the right, and if a dimension is missing, it's treated as 1. If the shapes are compatible, the smaller array is virtually 'stretched' to match the larger one for the operation.

![Broadcasting Visualization](../images/png/1.12-broadcasting.png)

In [None]:
sec("Broadcasting to Standardize Data")
# Standardization: (X - mean) / std_dev
rng = np.random.default_rng(42)
X = rng.integers(0, 100, size=(5, 3)) # 5 observations, 3 features
print(f"Original data matrix (5x3):\n{X}\n")

# Calculate column means and std deviations
col_means = X.mean(axis=0)
col_stds = X.std(axis=0)
print(f"Column means (shape {col_means.shape}):\n{col_means}\n")

# To subtract the 1D `col_means` array from the 2D `X` array,
# NumPy broadcasts `col_means` across all 5 rows.
X_standardized = (X - col_means) / col_stds

print(f"Standardized data matrix:\n{X_standardized}\n")
note("The new column means are ~0 and std deviations are ~1.")
print(f"New means: {X_standardized.mean(axis=0)}")
print(f"New stds:  {X_standardized.std(axis=0)}")

#### 4.2 Universal Functions (Ufuncs)
A ufunc is a function that operates on `ndarray`s in an element-by-element fashion. They are essentially fast, vectorized wrappers for simple functions. Examples include `np.sqrt`, `np.exp`, `np.add`, etc. Ufuncs support broadcasting and other advanced features like aggregation.

While ufuncs are fast, sometimes you have a more complex Python function that you want to apply to an array. `np.vectorize` can be used for this, but it is essentially a convenience wrapper for a Python `for` loop and does *not* provide the performance of a true ufunc written in C.

In [None]:
sec("np.vectorize for Convenience, Not Speed")

def policy_rule(income):
    """A simple, scalar Python function."""
    if income < 20000: return 0.10
    if income < 80000: return 0.20
    return 0.35

# Create a vectorized version of the Python function
vectorized_policy = np.vectorize(policy_rule)

incomes = np.array([15000, 50000, 120000])
tax_rates = vectorized_policy(incomes)

print(f"Incomes: {incomes}")
print(f"Tax Rates: {tax_rates}")

### 5. Linear Algebra with `numpy.linalg`
NumPy's `linalg` submodule provides essential linear algebra functionality, including matrix decompositions, eigenvalue calculation, determinants, and equation solvers.

#### 5.1 Application: Understanding OLS with Linear Algebra\n\nA classic application of linear algebra in econometrics is solving for the coefficients of an Ordinary Least Squares (OLS) regression. The linear model is defined as $y = X\\beta + \\epsilon$, and the OLS estimator $\\hat{\\beta}$ that minimizes the sum of squared residuals is given by the famous normal equations:\n$$ \hat{\beta} = (X^T X)^{-1} X^T y $$\n\nThis formula is a cornerstone of econometrics. In this section, we will use NumPy's linear algebra tools to implement this formula from scratch. This is a valuable exercise for understanding the mechanics of OLS. However, it's crucial to distinguish this educational implementation from production-level tools.\n\n**Educational Implementation vs. Production Tools**\n\n- **Direct Implementation:** We can translate the formula directly into NumPy code using `linalg.inv()` for the matrix inverse and `@` for matrix multiplication. While straightforward, this approach can be **numerically unstable** if the matrix $X^T X$ is ill-conditioned (i.e., close to being singular).\n\n- **Robust Solvers:** A much better approach is to use dedicated linear equation solvers like `numpy.linalg.lstsq()`. This function solves the system $X\\beta = y$ in the least-squares sense. It does not explicitly compute the inverse but uses more stable matrix decompositions (like QR or SVD), which are robust to multicollinearity and other numerical issues.\n\nFor any serious econometric analysis, one should use specialized libraries like `statsmodels` or `scikit-learn`. These libraries are built on top of NumPy but provide a comprehensive suite of tools for statistical inference, hypothesis testing, and diagnostics that go far beyond a simple coefficient calculation.\n\nIn this spirit, we will first build the solver from scratch to see how it works, then use the more robust `lstsq` function to demonstrate best practices within NumPy.

In [None]:
sec("Solving for OLS Coefficients: Naive vs. Robust Methods")
rng = np.random.default_rng(seed=123)
n_obs, n_features = 100, 3

# Generate some synthetic data
X = rng.standard_normal((n_obs, n_features))
X = np.c_[np.ones(n_obs), X] # Add a constant (intercept) column
true_beta = np.array([5, 2.5, -1.0, 0.5])
y = X @ true_beta + rng.standard_normal(n_obs) * 2 # Add some noise

# --- Method 1: The Naive, Direct Formula (for education) ---
note("Method 1: Direct formula (X'X)^-1 X'y using linalg.inv()")
beta_naive = linalg.inv(X.T @ X) @ X.T @ y
print(f"Naive Beta Estimates: {beta_naive}")

# --- Method 2: The Robust, Recommended NumPy Solver ---
note("Method 2: Numerically stable `linalg.lstsq()`")
beta_robust, residuals, rank, s = linalg.lstsq(X, y, rcond=None)
print(f"Robust Beta Estimates: {beta_robust}")

note("For well-behaved data, the results are identical. However, `lstsq` is far more reliable in the presence of multicollinearity.")

# --- Full Statistical Inference (requires a library like SciPy/Statsmodels) ---
note("Calculating Standard Errors and t-statistics requires more steps")
# 1. Calculate residual variance
ssr = residuals[0]
df = n_obs - (n_features + 1) # Degrees of freedom
sigma_sq_hat = ssr / df

# 2. Calculate variance-covariance matrix of estimators
var_beta_hat = sigma_sq_hat * linalg.inv(X.T @ X)

# 3. Standard errors are the sqrt of the diagonal elements
se_beta_hat = np.sqrt(np.diag(var_beta_hat))

# 4. t-statistics
t_stats = beta_robust / se_beta_hat

# 5. p-values (using SciPy's t-distribution)
p_values = stats.t.sf(np.abs(t_stats), df=df) * 2

results = pd.DataFrame({
    'True Beta': true_beta,
    'Estimate': beta_robust,
    'Std. Error': se_beta_hat,
    't-statistic': t_stats,
    'p-value': p_values
})
print("\n--- Full OLS Results Table ---")
print(results.round(4))
note("This full analysis is for demonstration. In practice, you would use `statsmodels.OLS(y, X).fit()` to get all of this instantly and robustly.")

#### 5.2 Application: Portfolio Variance
A common task in finance is to calculate the variance of a portfolio of assets. The formula is:
$$ \sigma_p^2 = w^T \Sigma w $$
where $w$ is a column vector of asset weights and $\Sigma$ is the covariance matrix of the asset returns. NumPy's matrix multiplication operator (`@`) makes this calculation trivial.

In [None]:
sec("Calculating Portfolio Variance")
# Assume 3 assets
rng = np.random.default_rng(42)
returns = rng.standard_normal((100, 3)) # 100 days of returns for 3 assets

# Calculate the covariance matrix
Sigma = np.cov(returns, rowvar=False) # rowvar=False means columns are variables
print(f"Covariance Matrix (Sigma):\n{Sigma}\n")

# Define asset weights (must sum to 1)
w = np.array([0.5, 0.3, 0.2])
print(f"Asset weights (w): {w}\n")

# Calculate portfolio variance using the formula
port_variance = w.T @ Sigma @ w
print(f"Portfolio Variance: {port_variance:.4f}")

### 6. Advanced Techniques

#### 6.1 `einsum`: The Language of Tensors
`np.einsum` (Einstein summation) is a powerful and expressive function for performing complex array operations like transposing, multiplying, and summing over axes. It uses a string-based mini-language to define the operation. For example, a matrix-vector product `C = A @ B` can be written as `np.einsum('ij,j->i', A, B)`. This reads as "Matrix A has indices `i` and `j`, vector B has index `j`. The common index `j` is summed over, leaving index `i` for the output."

While it has a learning curve, `einsum` can often replace complex sequences of other NumPy functions with a single, clear, and highly optimized operation.

In [None]:
sec("Expressive Power of `einsum`")
A = np.arange(6).reshape(2, 3)
B = np.arange(6).reshape(3, 2)

note("Example: Trace of a matrix product, tr(A @ B)")
trace_manual = np.trace(A @ B)
# 'ij,ji->' -> multiply A and B, sum over both i and j to get a scalar (the trace)
trace_einsum = np.einsum('ij,ji->', A, B)

print(f"Manual trace: {trace_manual}")
print(f"einsum trace: {trace_einsum}")

#### 6.2 Sliding Window Views with `as_strided`
A common task in time series analysis is to create "sliding windows" over a sequence. A naive approach would involve a loop that creates slices, copying data at each step. A much more advanced and memory-efficient technique is to manually manipulate the strides to create a new *view* of the data that represents the sliding windows without copying any memory.

The `numpy.lib.stride_tricks.as_strided` function is the tool for this. It is a powerful but dangerous function that can easily corrupt memory if used incorrectly. It should be used with extreme care, but it demonstrates the full power of NumPy's strided memory model.

In [None]:
sec("Sliding Window View via Stride Manipulation")

def rolling_window(a, window_size):
    """Creates a rolling window view of an array without copying data."""
    shape = (a.shape[0] - window_size + 1, window_size)
    strides = (a.strides[0], a.strides[0])
    return as_strided(a, shape=shape, strides=strides)

x = np.arange(10)
print(f"Original array: {x}")

note("Creating a rolling window of size 4:")
x_rolled = rolling_window(x, 4)
print(x_rolled)

note("The new array is a view and shares memory with the original:")
print(f"np.shares_memory(x, x_rolled) -> {np.shares_memory(x, x_rolled)}")

note("We can now perform vectorized operations, like a rolling mean, very efficiently:")
rolling_means = x_rolled.mean(axis=1)
print(rolling_means)

### 7. Exercises

1.  **Views vs. Copies:** Create a 10x10 array of zeros. Create a view `A` of the inner 8x8 block and a copy `B` of the same block. Set all elements of `A` to 1 and all elements of `B` to 2. Print the original array. What do you observe and why?

2.  **Broadcasting for Standardization:** Write a function `standardize(X)` that takes a 2D NumPy array `X` and returns a new array where each column has been standardized (subtract the column mean, divide by the column standard deviation). Do this using broadcasting, without any explicit loops. (Hint: `X.mean(axis=0)` will give you the column means as a 1D array, which will broadcast correctly).

3.  **Filtering with Boolean and Fancy Indexing:** Using the economic `data` array from section 3.1, perform the following in a single line of code: Select the `Country_ID` and `Inflation` columns for all countries that are **not** in a recession (i.e., GDP growth is non-negative).

4.  **OLS from Scratch vs. Solver:** Using the OLS data from section 5.1, calculate the $\hat{\beta}$ vector manually using `linalg.inv`. Now, use `linalg.solve(X.T @ X, X.T @ y)` to solve the same system. Compare the results. `linalg.solve` is generally preferred over `linalg.inv` for solving systems of linear equations of the form $Ax=b$. Why might this be the case?

5.  **Random Walk Simulation:** Use the modern `np.random.default_rng()` generator to simulate and plot 20 distinct random walks, each of 1000 steps. 
    - Create a 2D array of shape `(1000, 20)` with random steps (e.g., from a standard normal distribution).
    - Compute the cumulative sum along the `axis=0` to get the paths of the 20 walks.
    - Use Matplotlib's `plt.plot()` to plot all 20 walks on a single graph.

6.  **`einsum` for Quadratic Form:** A quadratic form is a common expression in economics, written as $x^T A x$. 
    - **Task:** Create a random 5x5 matrix `A` and a random vector `x` of size 5. Compute the quadratic form in two ways: first using `@` for matrix multiplication, and second using `np.einsum`. Verify that the results are identical. What is the `einsum` string for this operation?

### 8. Profiling and Performance: Making Code Fast

Writing code that is correct is the first priority. Writing code that is *fast* is often a close second, especially in computational economics where simulations can take hours or even days. **Profiling** is the process of systematically measuring the resource usage of your code—identifying where it spends the most time and consumes the most memory. It is the essential first step to any optimization effort.

A common mistake is to engage in **premature optimization**—guessing where the bottlenecks are and making code more complex in an attempt to speed it up, without any data. The cardinal rule of optimization is: **Measure, don't guess.** Profiling provides the data to guide your optimization efforts effectively.

This section introduces the three main tools for profiling Python code:
1.  **`timeit`:** A standard library module for quickly measuring the execution time of a single line or small snippet of code.
2.  **`cProfile`:** A built-in Python module for detailed, function-level profiling. It tells you how many times each function was called and how much time was spent in it.
3.  **`line_profiler`:** An external library for line-by-line profiling. It tells you how much time was spent on each individual line of code within a function.

#### 8.1 Quick Benchmarking with `timeit`

`timeit` is the perfect tool for micro-benchmarking. It runs a piece of code many times in a loop to get a precise and stable estimate of its execution time. This is ideal for comparing the performance of two different ways of doing the same thing.

In [None]:
sec("Comparing NumPy vs. Pure Python with `timeit`")
x_list = list(range(10000))
x_numpy = np.arange(10000)

py_time = timeit.timeit('sum(x_list)', globals=globals(), number=1000)
np_time = timeit.timeit('np.sum(x_numpy)', globals=globals(), number=1000)
print(f"Pure Python sum: {py_time*1000:.4f} ms")
print(f"NumPy sum:       {np_time*1000:.4f} ms")

#### 8.2 Function-Level Profiling with `cProfile`

For finding bottlenecks in a larger script or program, `timeit` is not enough. `cProfile` provides a detailed report of the entire execution, breaking down the time spent in each function.

The output can be verbose, but the key columns are:
- `ncalls`: The number of times the function was called.
- `tottime`: The total time spent *in* the function, excluding time spent in functions it called.
- `cumtime`: The cumulative time spent in the function *and* all functions it called.

A function with a high `tottime` is a prime candidate for optimization.

In [None]:
import cProfile, pstats
sec("Using cProfile to Analyze a Simulation")

def monte_carlo_pi(n_samples):
    """A simple Monte Carlo simulation to estimate Pi."""
    in_circle = 0
    for _ in range(n_samples):
        x, y = random.random(), random.random()
        if x**2 + y**2 < 1.0:
            in_circle += 1
    return 4 * in_circle / n_samples

def run_simulation():
    """A wrapper function to profile."""
    monte_carlo_pi(100000)

note("Running cProfile on our simulation. We sort the results by 'tottime' to see the biggest bottlenecks.")
profiler = cProfile.Profile()
profiler.enable()
run_simulation()
profiler.disable()
stats = pstats.Stats(profiler).sort_stats('tottime')
stats.print_stats(10) # Print the top 10 bottlenecks

#### 8.3 Line-by-Line Profiling with `line_profiler`

`cProfile` tells you *which functions* are slow. `line_profiler` tells you *which lines within* a slow function are the problem. This is the most granular level of profiling and is invaluable for targeted optimization.

To use it, you must first install it (`pip install line-profiler`) and then use the `%lprun` magic command in Jupyter. You must explicitly tell it which functions to profile using the `-f` flag.

In [None]:
sec("Using line_profiler for Granular Analysis")

try:
    %load_ext line_profiler
    LINE_PROFILER_AVAILABLE = True
except ImportError:
    LINE_PROFILER_AVAILABLE = False

def monte_carlo_pi_vectorized(n_samples):
    """A vectorized version of the Monte Carlo simulation."""
    xy = np.random.rand(n_samples, 2)
    in_circle = np.sum(xy[:, 0]**2 + xy[:, 1]**2 < 1.0)
    return 4 * in_circle / n_samples

if not LINE_PROFILER_AVAILABLE:
    note("line_profiler is not installed. Skipping this example. Run `pip install line-profiler`.")
else:
    note("Running line_profiler. The '% Time' column shows where the code is spending its time.")
    %lprun -f monte_carlo_pi_vectorized monte_carlo_pi_vectorized(100000)