In [None]:
# Initialize Otter
import otter
grader = otter.Notebook("ws6.ipynb")

In [None]:
rng_seed = 88

In [None]:
#imports
import numpy as np
import matplotlib.pyplot as plt
import scipy as sp
#below line allows matplotlib plots to appear in cell output
%matplotlib inline

## **Question 1**: Statistical Distributions - Sampling and Visualization

In this question, you'll work with different statistical distributions by sampling from them and visualizing the results with histograms overlaid with their exact probability density functions (PDFs) or probability mass functions (PMFs).

### Part a: Normal Distribution

Write a function `plot_normal_distribution(n_samples, mean, std, bins, show_plot=False)` that:
1. Generates `n_samples` random samples from a normal (Gaussian) distribution with specified mean and standard deviation
2. Creates a histogram of the samples 
3. Overlays the exact probability density function (PDF)
4. Adds a vertical line at the theoretical mean

**Requirements:**
- Use `np.random.normal(mean, std, n_samples)` to generate samples
- Create histogram using `ax.hist()` with:
  - `bins=bins` parameter
  - `density=True` to normalize the histogram (so it matches the PDF scale)
  - `alpha=0.7` for transparency
  - `color='skyblue'`
  - `edgecolor='black'`
- For the PDF overlay:
  - Create x-values using `np.linspace(mean - 4*std, mean + 4*std, 1000)`
  - Calculate PDF using `scipy.stats.norm.pdf(x, loc=mean, scale=std)`
  - Plot as a red line with `linewidth=2`
- Add a vertical line at the mean using `ax.axvline(mean, color='green', linestyle='--', linewidth=2, label='Mean')`
- Set labels and formatting:
  - X-axis: "Value"
  - Y-axis: "Density"
  - Title: "Normal Distribution: mu={mean:.1f}, sigma={std:.1f}" (use f-string formatting)
  - Add legend with `ax.legend()`
  - Add grid with `alpha=0.3`
- Return the matplotlib figure object

**Parameters:**
- `n_samples`: int, number of random samples to generate
- `mean`: float, mean of the normal distribution
- `std`: float, standard deviation of the normal distribution  
- `bins`: int, number of bins for the histogram
- `show_plot`: bool, default False. If True, call `plt.show()`

**Returns:**
- `fig`: matplotlib figure object

In [None]:
def plot_normal_distribution(n_samples, mean, std, bins, show_plot=False):
    # Generate samples and create plot
    #seeding rng for predictable testing
    rng = np.random.default_rng(rng_seed)
    samples = rng.normal(...)
    
    fig, ax = plt.subplots(figsize=(10, 6))
    
    # Create histogram
    
    # Overlay PDF
    
    # Add mean line
    
    # Labels and formatting
    
    if show_plot:
        plt.show()
    
    return fig

In [None]:
# Cell to view plot
np.random.seed(rng_seed)  # Set seed for reproducibility
plot_normal_distribution(1000, 2.0, 1.5, 30, show_plot=True);

In [None]:
grader.check("q1a")

### Part b: Uniform Distribution

Write a function `plot_uniform_distribution(n_samples, a, b, bins, show_plot=False)` that:
1. Generates `n_samples` random samples from a uniform distribution on the interval [a, b]
2. Creates a histogram of the samples 
3. Overlays the exact probability density function (PDF)
4. Adds a vertical line at the theoretical mean

**Requirements:**
- Use `np.random.uniform(a, b, n_samples)` to generate samples
- Create histogram using `ax.hist()` with:
  - `bins=bins` parameter
  - `density=True` to normalize the histogram
  - `alpha=0.7` for transparency
  - `color='lightcoral'`
  - `edgecolor='black'`
- For the PDF overlay:
  - Create x-values using `np.linspace(a - 0.5*(b-a), b + 0.5*(b-a), 1000)`
  - Calculate PDF using `scipy.stats.uniform.pdf(x, loc=a, scale=b-a)`
  - Plot as a blue line with `linewidth=2`
- Calculate the theoretical mean: `mean = (a + b) / 2`
- Add a vertical line at the mean using `ax.axvline(mean, color='green', linestyle='--', linewidth=2, label='Mean')`
- Set labels and formatting:
  - X-axis: "Value"
  - Y-axis: "Density"  
  - Title: "Uniform Distribution: [a={a:.1f}, b={b:.1f}]" (use f-string formatting)
  - Add legend with `ax.legend()`
  - Add grid with `alpha=0.3`
- Return the matplotlib figure object

**Parameters:**
- `n_samples`: int, number of random samples to generate
- `a`: float, lower bound of the uniform distribution
- `b`: float, upper bound of the uniform distribution
- `bins`: int, number of bins for the histogram
- `show_plot`: bool, default False. If True, call `plt.show()`

**Returns:**
- `fig`: matplotlib figure object

In [None]:
def plot_uniform_distribution(n_samples, a, b, bins, show_plot=False):
    # Generate samples and create plot
    rng = np.random.default_rng(rng_seed)
    samples = rng.uniform(...)
    
    fig, ax = plt.subplots(figsize=(10, 6))
    
    # Create histogram
    
    # Overlay PDF
    
    # Calculate mean and add mean line
    
    # Labels and formatting
    
    if show_plot:
        plt.show()
    
    return fig

In [None]:
# Cell to view plot
np.random.seed(rng_seed)  # Set seed for reproducibility
plot_uniform_distribution(1000, -2.0, 4.0, 25, show_plot=True);

In [None]:
grader.check("q1b")

### Part c: Poisson Distribution

Write a function `plot_poisson_distribution(n_samples, lam, bins, show_plot=False)` that:
1. Generates `n_samples` random samples from a Poisson distribution with parameter λ (lambda)
2. Creates a histogram of the samples 
3. Overlays the exact probability mass function (PMF)
4. Adds a vertical line at the theoretical mean

**Requirements:**
- Use `np.random.poisson(lam, n_samples)` to generate samples
- Create histogram using `ax.hist()` with:
  - `bins=bins` parameter
  - `density=True` to normalize the histogram
  - `alpha=0.7` for transparency
  - `color='lightgreen'`
  - `edgecolor='black'`
- For the PMF overlay:
  - Create integer x-values using `np.arange(0, int(lam + 5*np.sqrt(lam)) + 1)` (this covers the relevant range)
  - Calculate PMF using `scipy.stats.poisson.pmf(x, mu=lam)`
  - Plot as orange points connected by lines using `ax.plot(x, pmf, 'o-', color='orange', linewidth=2, markersize=4, label='PMF')`
- For Poisson distribution, the theoretical mean equals λ
- Add a vertical line at the mean using `ax.axvline(lam, color='green', linestyle='--', linewidth=2, label='Mean')`
- Set labels and formatting:
  - X-axis: "Value"
  - Y-axis: "Density"  
  - Title: "Poisson Distribution: lambda={lam:.1f}" (use f-string formatting)
  - Add legend with `ax.legend()`
  - Add grid with `alpha=0.3`
- Return the matplotlib figure object

**Parameters:**
- `n_samples`: int, number of random samples to generate
- `lam`: float, rate parameter λ of the Poisson distribution
- `bins`: int, number of bins for the histogram
- `show_plot`: bool, default False. If True, call `plt.show()`

**Returns:**
- `fig`: matplotlib figure object

**Note:** Since Poisson is a discrete distribution, we plot the PMF as points connected by lines rather than a continuous curve.

In [None]:
def plot_poisson_distribution(n_samples, lam, bins, show_plot=False):
    # Generate samples and create plot
    rng = np.random.default_rng(rng_seed)
    samples = rng.poisson(...)
    
    fig, ax = plt.subplots(figsize=(10, 6))
    
    # Create histogram
    
    # Overlay PMF
    
    # Add mean line (mean = λ for Poisson)
    
    # Labels and formatting
    
    if show_plot:
        plt.show()
    
    return fig

In [None]:
# Cell to view plot
np.random.seed(rng_seed)  # Set seed for reproducibility
plot_poisson_distribution(1000, 3.5, 12, show_plot=True);

In [None]:
grader.check("q1c")

## **Question 2**: Monte Carlo Estimation of π

In this question, you'll use the Monte Carlo method to estimate the value of π by randomly scattering points in a unit square and determining which ones fall inside a quarter circle.

### Part a: Monte Carlo π Estimator

Write a function `estimate_pi_monte_carlo(n_points)` that estimates the value of π using the Monte Carlo method.

**Method:**
The approach is based on the ratio of areas:
- A unit square has area 1
- A quarter circle with radius 1 inscribed in the unit square has area π/4
- Therefore, the ratio of points inside the quarter circle to total points should approximate π/4

**Algorithm:**
1. Generate `n_points` random points (x, y) uniformly distributed in the unit square [0,1] × [0,1]
2. For each point, check if it falls inside the quarter circle: x² + y² ≤ 1
3. Count how many points fall inside the quarter circle
4. Estimate π using: π ≈ 4 × (points_inside_circle / total_points)

**Requirements:**
- Use the seeded random number generator: `rng = np.random.default_rng(rng_seed)`
- Generate random points using `rng.uniform(0, 1, (n_points, 2))`
- Calculate distances from origin using vectorized operations
- Count points inside the quarter circle (distance ≤ 1)
- Return the estimated value of π

**Parameters:**
- `n_points`: int, number of random points to generate

**Returns:**
- `pi_estimate`: float, estimated value of π

**Mathematical Background:**
For a quarter circle of radius 1:
- Area of quarter circle = π/4
- Area of unit square = 1
- Probability that a random point falls in quarter circle = (π/4) / 1 = π/4
- Therefore: π = 4 × P(point in circle) ≈ 4 × (count_inside / total_points)

In [None]:
def estimate_pi_monte_carlo(n_points):
    # Use seeded random number generator
    rng = np.random.default_rng(rng_seed)
    
    # Generate random points and estimate π
    
    return pi_estimate

In [None]:
# Test the function with a few values
print("Monte Carlo π estimates:")
for n in [100, 1000, 10000, 100000]:
    pi_est = estimate_pi_monte_carlo(n)
    error = abs(pi_est - np.pi)
    print(f"N = {n:6d}: π ≈ {pi_est:.6f}, error = {error:.6f}")
print(f"True π = {np.pi:.6f}")

In [None]:
grader.check("q2a")

### Part b: Convergence Analysis Plot

Write a function `plot_pi_convergence(n_values, show_plot=False)` that creates a plot showing how the Monte Carlo estimate of π converges as the number of sample points increases.

This function should use the `estimate_pi_monte_carlo()` function from Part a to compute π estimates for different values of N and visualize the convergence behavior.

**Requirements:**
- Use the `estimate_pi_monte_carlo()` function from Part a to compute π estimates for each N in `n_values`
- Create a plot with:
  - X-axis: Number of points (log scale) - use `ax.set_xscale('log')`
  - Y-axis: Estimated value of π
  - Plot the estimates as blue circles connected by lines: `ax.plot(n_values, pi_estimates, 'bo-', linewidth=2, markersize=6)`
- Add a horizontal reference line for the true value of π:
  - Use `ax.axhline(np.pi, color='red', linestyle='--', linewidth=2, label='True π')`
- Set labels and formatting:
  - X-axis: "Number of Points"
  - Y-axis: "Estimated π"
  - Title: "Monte Carlo Convergence to π"
  - Add legend with `ax.legend()`
  - Add grid with `alpha=0.3`
- Set y-axis limits to show the convergence clearly: `ax.set_ylim([2.8, 3.5])`
- Return the matplotlib figure object

**Parameters:**
- `n_values`: array-like, list of N values to test (should be integers)
- `show_plot`: bool, default False. If True, call `plt.show()`

**Returns:**
- `fig`: matplotlib figure object

**Note:** The log scale on the x-axis helps visualize how the estimate improves with increasing sample size. You should expect to see the estimates generally converging toward the true value of π ≈ 3.14159 as N increases.

In [None]:
def plot_pi_convergence(n_values, show_plot=False):
    # Calculate π estimates for different N values
    np.random.seed(rng_seed)
    
    # Create plot with log scale x-axis
    fig, ax = plt.subplots(figsize=(10, 6))
    
    # Plot estimates and true π line
    
    # Labels and formatting
    
    if show_plot:
        plt.show()
    
    return fig

In [None]:
# Cell to view plot
np.random.seed(rng_seed)
n_test_values = [100, 300, 1000, 3000, 10000, 30000, 100000]
plot_pi_convergence(n_test_values, show_plot=True);

In [None]:
grader.check("q2b")

## **Question 3**: Biased Coin Simulation

In this question, you'll simulate a biased coin and analyze how the observed probability of heads converges to the theoretical value as the number of flips increases.

### Part a: Biased Coin Flip Simulator

Write a function `simulate_coin_flips(p, n_flips)` that simulates flipping a biased coin.

**Method:**
A biased coin has probability `p` of landing heads and probability `(1-p)` of landing tails. This follows a Bernoulli distribution where:
- Success (heads) occurs with probability `p`
- Failure (tails) occurs with probability `1-p`

**Requirements:**
- Use the seeded random number generator: `rng = np.random.default_rng(rng_seed)`
- Generate random samples from the Bernoulli distribution using `rng.binomial(...)`
- Return a list or array where 1 represents heads and 0 represents tails
- The function should work for any probability `p` between 0 and 1

**Parameters:**
- `p`: float, probability of heads (between 0 and 1)
- `n_flips`: int, number of coin flips to simulate

**Returns:**
- `flips`: numpy array or list, sequence of 1s (heads) and 0s (tails)

In [None]:
def simulate_coin_flips(p, n_flips):
    # Use seeded random number generator
    rng = np.random.default_rng(rng_seed)
    
    # Generate coin flips from Bernoulli distribution
    
    return flips

In [None]:
# Test the function with different probabilities
np.random.seed(rng_seed)
print("Biased coin simulation tests:")
for p in [0.3, 0.5, 0.7]:
    flips = simulate_coin_flips(p, 1000)
    observed_p = np.mean(flips)
    error = abs(observed_p - p)
    print(f"p = {p:.1f}: observed = {observed_p:.3f}, error = {error:.3f}")
    print(f"  First 20 flips: {flips[:20]}")

In [None]:
grader.check("q3a")

### Part b: Coin Flip Convergence Analysis

Write a function `plot_coin_convergence(p, n_values, show_plot=False)` that creates a plot showing how the observed proportion of heads converges to the theoretical probability as the number of flips increases.

This function should use the `simulate_coin_flips()` function from Part a to simulate coin flips for different numbers of flips and visualize the convergence behavior.

**Requirements:**
- Use the `simulate_coin_flips()` function from Part a to simulate coin flips for each N in `n_values`
- For each N, calculate the observed proportion of heads: `proportion = np.mean(flips)`
- Create a plot with:
  - X-axis: Number of flips (log scale) - use `ax.set_xscale('log')`
  - Y-axis: Observed proportion of heads
  - Plot the proportions as green circles connected by lines: `ax.plot(n_values, proportions, 'go-', linewidth=2, markersize=6)`
- Add a horizontal reference line for the theoretical probability:
  - Use `ax.axhline(p, color='red', linestyle='--', linewidth=2, label=f'Theoretical p = {p:.2f}')`
- Set labels and formatting:
  - X-axis: "Number of Flips"
  - Y-axis: "Observed Proportion of Heads"
  - Title: f"Coin Flip Convergence (p = {p:.2f})"
  - Add legend with `ax.legend()`
  - Add grid with `alpha=0.3`
- Set y-axis limits based on the probability: `ax.set_ylim([max(0, p-0.3), min(1, p+0.3)])`
- Return the matplotlib figure object

**Parameters:**
- `p`: float, true probability of heads for the biased coin
- `n_values`: array-like, list of N values to test (number of flips)
- `show_plot`: bool, default False. If True, call `plt.show()`

**Returns:**
- `fig`: matplotlib figure object

**Note:** This demonstrates the Law of Large Numbers - as the number of trials increases, the observed frequency approaches the theoretical probability.

In [None]:
def plot_coin_convergence(p, n_values, show_plot=False):
    # Calculate observed proportions for different N values
    np.random.seed(rng_seed)
    
    # Create plot with log scale x-axis
    fig, ax = plt.subplots(figsize=(10, 6))
    
    # Plot proportions and theoretical line
    
    # Labels and formatting
    
    if show_plot:
        plt.show()
    
    return fig

In [None]:
# Cell to view plot
np.random.seed(rng_seed)
n_flip_values = [50, 200, 500, 2000, 5000, 20000, 50000]
plot_coin_convergence(0.3, n_flip_values, show_plot=True);

In [None]:
grader.check("q3b")

## Required disclosure of use of AI technology

Please indicate whether you used AI to complete this homework. If you did, explain how you used it in the python cell below, as a comment.

In [None]:
"""
# write ai disclosure here:

"""

## Submission

Make sure you have run all cells in your notebook in order before running the cell below, so that all images/graphs appear in the output. The cell below will generate a zip file for you to submit.

Upload the .zip file to Gradescope!

In [None]:
grader.export(pdf=False, force_save=True, run_tests=True)