# Topic 8: Applications of Bayesian Methods

## Learning Objectives
- Apply Bayesian methods to real-world problems
- Integrate multiple data sources and prior knowledge
- Communicate Bayesian results effectively
- Understand when and why to use Bayesian approaches

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import pymc as pm
import arviz as az
from scipy import stats
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

plt.style.use('seaborn-v0_8')
np.random.seed(42)

## 1. Case Study: Clinical Trial Analysis

### Scenario:
A pharmaceutical company is testing a new drug. They want to:
- Incorporate prior knowledge from similar drugs
- Make interim decisions during the trial
- Quantify probability of success
- Plan future studies

In [None]:
# Clinical trial simulation
def simulate_clinical_trial():
    """
    Simulate a clinical trial with interim analyses
    """
    # True treatment effects (unknown to investigators)
    true_control_rate = 0.3
    true_treatment_rate = 0.45  # 15% absolute improvement
    
    # Prior information from similar drugs
    # Historical data suggests treatment effects between 0.05-0.25
    prior_mean_effect = 0.15
    prior_sd_effect = 0.05
    
    # Trial design
    max_n_per_arm = 200
    interim_analyses = [50, 100, 150, 200]  # Sample sizes for interim looks
    
    results = []
    
    for n_per_arm in interim_analyses:
        # Generate data up to current sample size
        control_outcomes = np.random.binomial(1, true_control_rate, n_per_arm)
        treatment_outcomes = np.random.binomial(1, true_treatment_rate, n_per_arm)
        
        # Bayesian analysis
        with pm.Model() as trial_model:
            # Priors for response rates
            p_control = pm.Beta('p_control', 1, 1)  # Non-informative
            
            # Informative prior for treatment effect
            treatment_effect = pm.Normal('treatment_effect', 
                                       prior_mean_effect, prior_sd_effect)
            
            # Treatment rate as function of control rate + effect
            p_treatment = pm.Deterministic('p_treatment', 
                                         pm.math.clip(p_control + treatment_effect, 0, 1))
            
            # Likelihoods
            control_obs = pm.Binomial('control_obs', n=n_per_arm, p=p_control, 
                                    observed=np.sum(control_outcomes))
            treatment_obs = pm.Binomial('treatment_obs', n=n_per_arm, p=p_treatment, 
                                      observed=np.sum(treatment_outcomes))
            
            # Sample
            trace = pm.sample(2000, return_inferencedata=True, random_seed=42)
        
        # Extract results
        effect_samples = trace.posterior['treatment_effect'].values.flatten()
        p_control_samples = trace.posterior['p_control'].values.flatten()
        p_treatment_samples = trace.posterior['p_treatment'].values.flatten()
        
        # Key metrics
        prob_positive_effect = np.mean(effect_samples > 0)
        prob_clinically_meaningful = np.mean(effect_samples > 0.1)  # 10% threshold
        prob_superiority = np.mean(p_treatment_samples > p_control_samples)
        
        effect_mean = np.mean(effect_samples)
        effect_ci = np.percentile(effect_samples, [2.5, 97.5])
        
        results.append({
            'n_per_arm': n_per_arm,
            'control_successes': np.sum(control_outcomes),
            'treatment_successes': np.sum(treatment_outcomes),
            'observed_effect': np.sum(treatment_outcomes)/n_per_arm - np.sum(control_outcomes)/n_per_arm,
            'posterior_effect_mean': effect_mean,
            'effect_ci_lower': effect_ci[0],
            'effect_ci_upper': effect_ci[1],
            'prob_positive': prob_positive_effect,
            'prob_meaningful': prob_clinically_meaningful,
            'prob_superiority': prob_superiority
        })
    
    return results, trace

# Run trial simulation
trial_results, final_trace = simulate_clinical_trial()

# Display results
print("Clinical Trial Interim Analyses:")
print("N/arm\tControl\tTreatment\tObs Effect\tPost Effect\t95% CI\t\tP(+)\tP(>10%)\tP(Superior)")
print("-" * 100)

for result in trial_results:
    print(f"{result['n_per_arm']}\t{result['control_successes']}/{result['n_per_arm']}\t"
          f"{result['treatment_successes']}/{result['n_per_arm']}\t\t{result['observed_effect']:.3f}\t\t"
          f"{result['posterior_effect_mean']:.3f}\t\t[{result['effect_ci_lower']:.3f}, {result['effect_ci_upper']:.3f}]\t"
          f"{result['prob_positive']:.3f}\t{result['prob_meaningful']:.3f}\t{result['prob_superiority']:.3f}")

# Visualization
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Evolution of effect estimate
n_values = [r['n_per_arm'] for r in trial_results]
effect_means = [r['posterior_effect_mean'] for r in trial_results]
effect_lowers = [r['effect_ci_lower'] for r in trial_results]
effect_uppers = [r['effect_ci_upper'] for r in trial_results]
observed_effects = [r['observed_effect'] for r in trial_results]

axes[0,0].plot(n_values, effect_means, 'bo-', linewidth=2, label='Posterior Mean')
axes[0,0].fill_between(n_values, effect_lowers, effect_uppers, alpha=0.3, label='95% CI')
axes[0,0].plot(n_values, observed_effects, 'rs--', label='Observed Effect')
axes[0,0].axhline(0.15, color='green', linestyle=':', label='True Effect')
axes[0,0].axhline(0.1, color='red', linestyle='--', label='Clinically Meaningful')
axes[0,0].set_xlabel('Sample Size per Arm')
axes[0,0].set_ylabel('Treatment Effect')
axes[0,0].set_title('Evolution of Treatment Effect Estimate')
axes[0,0].legend()
axes[0,0].grid(True, alpha=0.3)

# Probability evolution
prob_positive = [r['prob_positive'] for r in trial_results]
prob_meaningful = [r['prob_meaningful'] for r in trial_results]
prob_superiority = [r['prob_superiority'] for r in trial_results]

axes[0,1].plot(n_values, prob_positive, 'b-', linewidth=2, label='P(Effect > 0)')
axes[0,1].plot(n_values, prob_meaningful, 'r-', linewidth=2, label='P(Effect > 10%)')
axes[0,1].plot(n_values, prob_superiority, 'g-', linewidth=2, label='P(Treatment Superior)')
axes[0,1].axhline(0.95, color='black', linestyle='--', alpha=0.5, label='95% Threshold')
axes[0,1].set_xlabel('Sample Size per Arm')
axes[0,1].set_ylabel('Probability')
axes[0,1].set_title('Evolution of Key Probabilities')
axes[0,1].legend()
axes[0,1].grid(True, alpha=0.3)

# Final posterior distribution
effect_samples = final_trace.posterior['treatment_effect'].values.flatten()
axes[1,0].hist(effect_samples, bins=50, density=True, alpha=0.7, label='Posterior')
axes[1,0].axvline(0.15, color='green', linestyle=':', linewidth=2, label='True Effect')
axes[1,0].axvline(0.1, color='red', linestyle='--', linewidth=2, label='Clinical Threshold')
axes[1,0].axvline(np.mean(effect_samples), color='blue', linestyle='-', linewidth=2, label='Posterior Mean')
axes[1,0].set_xlabel('Treatment Effect')
axes[1,0].set_ylabel('Density')
axes[1,0].set_title('Final Posterior Distribution')
axes[1,0].legend()
axes[1,0].grid(True, alpha=0.3)

# Decision analysis
# Cost-benefit analysis
cost_per_patient = 1000  # Cost of treatment
benefit_per_success = 5000  # Benefit of successful treatment

# Expected net benefit
p_treatment_samples = final_trace.posterior['p_treatment'].values.flatten()
p_control_samples = final_trace.posterior['p_control'].values.flatten()

net_benefit_samples = (p_treatment_samples * benefit_per_success - cost_per_patient) - \
                     (p_control_samples * benefit_per_success)

axes[1,1].hist(net_benefit_samples, bins=50, density=True, alpha=0.7)
axes[1,1].axvline(0, color='red', linestyle='--', linewidth=2, label='Break-even')
axes[1,1].axvline(np.mean(net_benefit_samples), color='blue', linestyle='-', 
                 linewidth=2, label='Expected Net Benefit')

prob_profitable = np.mean(net_benefit_samples > 0)
axes[1,1].set_xlabel('Net Benefit per Patient ($)')
axes[1,1].set_ylabel('Density')
axes[1,1].set_title(f'Economic Analysis\nP(Profitable) = {prob_profitable:.3f}')
axes[1,1].legend()
axes[1,1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Decision recommendations
final_result = trial_results[-1]
print(f"\nFinal Decision Analysis:")
print(f"Probability of positive effect: {final_result['prob_positive']:.3f}")
print(f"Probability of clinically meaningful effect (>10%): {final_result['prob_meaningful']:.3f}")
print(f"Expected net benefit: ${np.mean(net_benefit_samples):.0f} per patient")
print(f"Probability of profitability: {prob_profitable:.3f}")

if final_result['prob_meaningful'] > 0.8 and prob_profitable > 0.7:
    decision = "PROCEED to Phase III"
elif final_result['prob_positive'] > 0.9:
    decision = "CONSIDER proceeding with larger study"
else:
    decision = "DO NOT PROCEED - insufficient evidence"

print(f"\nRecommendation: {decision}")

## 2. Case Study: Marketing Mix Modeling

### Scenario:
A company wants to optimize their marketing spend across different channels:
- TV, Digital, Print advertising
- Account for saturation effects
- Handle correlated media channels
- Make budget allocation decisions

In [None]:
# Generate synthetic marketing data
np.random.seed(42)
n_weeks = 104  # 2 years of weekly data

# Media channels
tv_spend = np.random.gamma(2, 50, n_weeks)  # TV spending
digital_spend = np.random.gamma(1.5, 30, n_weeks)  # Digital spending
print_spend = np.random.gamma(1, 20, n_weeks)  # Print spending

# Add seasonality
weeks = np.arange(n_weeks)
seasonality = 10 * np.sin(2 * np.pi * weeks / 52) + 5 * np.sin(2 * np.pi * weeks / 52 * 2)

# Adstock transformation (carryover effects)
def adstock_transform(x, decay_rate):
    """Apply adstock transformation"""
    adstocked = np.zeros_like(x)
    adstocked[0] = x[0]
    for i in range(1, len(x)):
        adstocked[i] = x[i] + decay_rate * adstocked[i-1]
    return adstocked

# Saturation transformation
def saturation_transform(x, alpha, gamma):
    """Apply saturation transformation"""
    return alpha * (x ** gamma) / (x ** gamma + 1)

# True parameters (unknown to modeler)
true_params = {
    'base': 100,
    'tv_coef': 0.8,
    'digital_coef': 1.2,
    'print_coef': 0.4,
    'tv_decay': 0.7,
    'digital_decay': 0.3,
    'print_decay': 0.5,
    'tv_alpha': 50,
    'tv_gamma': 0.5,
    'digital_alpha': 30,
    'digital_gamma': 0.7,
    'print_alpha': 20,
    'print_gamma': 0.6
}

# Generate sales with media effects
tv_adstocked = adstock_transform(tv_spend, true_params['tv_decay'])
digital_adstocked = adstock_transform(digital_spend, true_params['digital_decay'])
print_adstocked = adstock_transform(print_spend, true_params['print_decay'])

tv_saturated = saturation_transform(tv_adstocked, true_params['tv_alpha'], true_params['tv_gamma'])
digital_saturated = saturation_transform(digital_adstocked, true_params['digital_alpha'], true_params['digital_gamma'])
print_saturated = saturation_transform(print_adstocked, true_params['print_alpha'], true_params['print_gamma'])

sales = (true_params['base'] + seasonality +
         true_params['tv_coef'] * tv_saturated +
         true_params['digital_coef'] * digital_saturated +
         true_params['print_coef'] * print_saturated +
         np.random.normal(0, 10, n_weeks))

# Create DataFrame
mmm_data = pd.DataFrame({
    'week': weeks,
    'sales': sales,
    'tv_spend': tv_spend,
    'digital_spend': digital_spend,
    'print_spend': print_spend,
    'seasonality': seasonality
})

print("Marketing Mix Model Data:")
print(mmm_data.describe())

# Bayesian Marketing Mix Model
with pm.Model() as mmm_model:
    # Priors for base and seasonality
    base = pm.Normal('base', 100, 20)
    
    # Priors for media coefficients
    tv_coef = pm.HalfNormal('tv_coef', 2)
    digital_coef = pm.HalfNormal('digital_coef', 2)
    print_coef = pm.HalfNormal('print_coef', 2)
    
    # Priors for adstock decay rates
    tv_decay = pm.Beta('tv_decay', 2, 2)
    digital_decay = pm.Beta('digital_decay', 2, 2)
    print_decay = pm.Beta('print_decay', 2, 2)
    
    # Priors for saturation parameters
    tv_alpha = pm.HalfNormal('tv_alpha', 50)
    tv_gamma = pm.Beta('tv_gamma', 2, 2)
    digital_alpha = pm.HalfNormal('digital_alpha', 30)
    digital_gamma = pm.Beta('digital_gamma', 2, 2)
    print_alpha = pm.HalfNormal('print_alpha', 20)
    print_gamma = pm.Beta('print_gamma', 2, 2)
    
    # Error term
    sigma = pm.HalfNormal('sigma', 20)
    
    # Media transformations (simplified for PyMC)
    # Note: In practice, you'd implement custom functions for adstock and saturation
    
    # Simplified model (linear approximation)
    mu = (base + 
          tv_coef * tv_spend +
          digital_coef * digital_spend +
          print_coef * print_spend)
    
    # Likelihood
    sales_obs = pm.Normal('sales_obs', mu=mu, sigma=sigma, observed=sales)
    
    # Sample
    trace_mmm = pm.sample(1000, return_inferencedata=True, random_seed=42)

print("\nMarketing Mix Model Results:")
print(az.summary(trace_mmm, var_names=['base', 'tv_coef', 'digital_coef', 'print_coef']))

# Media contribution analysis
tv_coef_samples = trace_mmm.posterior['tv_coef'].values.flatten()
digital_coef_samples = trace_mmm.posterior['digital_coef'].values.flatten()
print_coef_samples = trace_mmm.posterior['print_coef'].values.flatten()

# Calculate media contributions
tv_contribution = np.mean(tv_coef_samples) * np.mean(tv_spend)
digital_contribution = np.mean(digital_coef_samples) * np.mean(digital_spend)
print_contribution = np.mean(print_coef_samples) * np.mean(print_spend)
total_media_contribution = tv_contribution + digital_contribution + print_contribution

# ROI calculation
tv_roi = tv_contribution / np.mean(tv_spend)
digital_roi = digital_contribution / np.mean(digital_spend)
print_roi = print_contribution / np.mean(print_spend)

print(f"\nMedia Contribution Analysis:")
print(f"TV: {tv_contribution:.1f} sales units (ROI: {tv_roi:.2f})")
print(f"Digital: {digital_contribution:.1f} sales units (ROI: {digital_roi:.2f})")
print(f"Print: {print_contribution:.1f} sales units (ROI: {print_roi:.2f})")

# Visualization
fig, axes = plt.subplots(2, 3, figsize=(18, 10))

# Time series
axes[0,0].plot(mmm_data['week'], mmm_data['sales'], 'b-', alpha=0.7, label='Actual Sales')
# Fitted values (simplified)
fitted = (np.mean(trace_mmm.posterior['base'].values) +
          np.mean(tv_coef_samples) * tv_spend +
          np.mean(digital_coef_samples) * digital_spend +
          np.mean(print_coef_samples) * print_spend)
axes[0,0].plot(mmm_data['week'], fitted, 'r--', alpha=0.7, label='Fitted')
axes[0,0].set_xlabel('Week')
axes[0,0].set_ylabel('Sales')
axes[0,0].set_title('Sales vs Fitted Values')
axes[0,0].legend()
axes[0,0].grid(True, alpha=0.3)

# Media spend correlation
spend_corr = mmm_data[['tv_spend', 'digital_spend', 'print_spend']].corr()
sns.heatmap(spend_corr, annot=True, cmap='coolwarm', center=0, ax=axes[0,1])
axes[0,1].set_title('Media Spend Correlations')

# ROI comparison
channels = ['TV', 'Digital', 'Print']
rois = [tv_roi, digital_roi, print_roi]
colors = ['blue', 'green', 'orange']

bars = axes[0,2].bar(channels, rois, color=colors, alpha=0.7)
axes[0,2].set_ylabel('ROI (Sales per $ Spent)')
axes[0,2].set_title('Return on Investment by Channel')
axes[0,2].grid(True, alpha=0.3)

# Add ROI values on bars
for bar, roi in zip(bars, rois):
    height = bar.get_height()
    axes[0,2].text(bar.get_x() + bar.get_width()/2., height + 0.01,
                  f'{roi:.2f}', ha='center', va='bottom')

# Posterior distributions of coefficients
axes[1,0].hist(tv_coef_samples, bins=30, alpha=0.7, label='TV', color='blue')
axes[1,0].hist(digital_coef_samples, bins=30, alpha=0.7, label='Digital', color='green')
axes[1,0].hist(print_coef_samples, bins=30, alpha=0.7, label='Print', color='orange')
axes[1,0].set_xlabel('Coefficient Value')
axes[1,0].set_ylabel('Density')
axes[1,0].set_title('Posterior Distributions of Media Coefficients')
axes[1,0].legend()
axes[1,0].grid(True, alpha=0.3)

# Budget optimization simulation
total_budget = np.mean(tv_spend + digital_spend + print_spend)
budget_scenarios = np.linspace(0.5, 2.0, 20)  # 50% to 200% of current budget

# Optimal allocation (simplified - equal ROI principle)
expected_sales = []
for budget_mult in budget_scenarios:
    new_budget = total_budget * budget_mult
    
    # Allocate proportionally to ROI
    total_roi = tv_roi + digital_roi + print_roi
    tv_allocation = new_budget * (tv_roi / total_roi)
    digital_allocation = new_budget * (digital_roi / total_roi)
    print_allocation = new_budget * (print_roi / total_roi)
    
    # Expected sales (linear approximation)
    expected_sales_value = (np.mean(trace_mmm.posterior['base'].values) +
                           np.mean(tv_coef_samples) * tv_allocation +
                           np.mean(digital_coef_samples) * digital_allocation +
                           np.mean(print_coef_samples) * print_allocation)
    expected_sales.append(expected_sales_value)

axes[1,1].plot(budget_scenarios, expected_sales, 'b-', linewidth=2)
axes[1,1].axvline(1.0, color='red', linestyle='--', label='Current Budget')
axes[1,1].set_xlabel('Budget Multiplier')
axes[1,1].set_ylabel('Expected Sales')
axes[1,1].set_title('Budget Optimization')
axes[1,1].legend()
axes[1,1].grid(True, alpha=0.3)

# Contribution waterfall
contributions = [tv_contribution, digital_contribution, print_contribution]
base_sales = np.mean(trace_mmm.posterior['base'].values)

x_pos = range(len(channels) + 2)
values = [base_sales] + contributions + [base_sales + sum(contributions)]
labels = ['Base'] + channels + ['Total']

colors_waterfall = ['gray'] + colors + ['black']
bars = axes[1,2].bar(x_pos, values, color=colors_waterfall, alpha=0.7)

# Add value labels
for i, (bar, value) in enumerate(zip(bars, values)):
    height = bar.get_height()
    axes[1,2].text(bar.get_x() + bar.get_width()/2., height + 5,
                  f'{value:.0f}', ha='center', va='bottom')

axes[1,2].set_xticks(x_pos)
axes[1,2].set_xticklabels(labels)
axes[1,2].set_ylabel('Sales Contribution')
axes[1,2].set_title('Sales Contribution Waterfall')
axes[1,2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Strategic recommendations
print(f"\nStrategic Recommendations:")
print(f"1. Digital has highest ROI ({digital_roi:.2f}) - consider increasing allocation")
print(f"2. Print has lowest ROI ({print_roi:.2f}) - consider reducing allocation")
print(f"3. Current total media contribution: {total_media_contribution:.0f} sales units")
print(f"4. Media accounts for {total_media_contribution/np.mean(sales)*100:.1f}% of total sales")

## 3. Best Practices for Bayesian Applications

### When to Use Bayesian Methods:
1. **Prior information available**: Expert knowledge, historical data
2. **Small sample sizes**: Bayesian methods handle uncertainty better
3. **Sequential decision making**: Clinical trials, A/B testing
4. **Complex hierarchical structure**: Multi-level data
5. **Missing data**: Natural imputation framework
6. **Uncertainty quantification**: Need full posterior distributions

### Communication Guidelines:
1. **Focus on probabilities**: "95% probability that..."
2. **Use credible intervals**: More intuitive than confidence intervals
3. **Visualize uncertainty**: Show full distributions when possible
4. **Explain prior assumptions**: Be transparent about assumptions
5. **Sensitivity analysis**: Show robustness to prior choices

In [None]:
# Example: Communicating Bayesian Results
def create_bayesian_report(trace, parameter_name, true_value=None, 
                          threshold=None, context="parameter"):
    """
    Create a comprehensive report for a Bayesian parameter
    """
    samples = trace.posterior[parameter_name].values.flatten()
    
    # Summary statistics
    mean_val = np.mean(samples)
    median_val = np.median(samples)
    std_val = np.std(samples)
    ci_95 = np.percentile(samples, [2.5, 97.5])
    ci_90 = np.percentile(samples, [5, 95])
    ci_50 = np.percentile(samples, [25, 75])
    
    print(f"\n=== Bayesian Analysis Report: {parameter_name} ===")
    print(f"Context: {context}")
    print(f"\nPosterior Summary:")
    print(f"  Mean: {mean_val:.4f}")
    print(f"  Median: {median_val:.4f}")
    print(f"  Standard Deviation: {std_val:.4f}")
    print(f"\nCredible Intervals:")
    print(f"  50%: [{ci_50[0]:.4f}, {ci_50[1]:.4f}]")
    print(f"  90%: [{ci_90[0]:.4f}, {ci_90[1]:.4f}]")
    print(f"  95%: [{ci_95[0]:.4f}, {ci_95[1]:.4f}]")
    
    # Probability statements
    if threshold is not None:
        prob_above = np.mean(samples > threshold)
        prob_below = np.mean(samples < threshold)
        print(f"\nProbability Statements:")
        print(f"  P({parameter_name} > {threshold}) = {prob_above:.3f}")
        print(f"  P({parameter_name} < {threshold}) = {prob_below:.3f}")
    
    # Comparison with true value if available
    if true_value is not None:
        prob_covers = (ci_95[0] <= true_value <= ci_95[1])
        distance_from_mean = abs(mean_val - true_value)
        print(f"\nValidation (True value = {true_value}):")
        print(f"  95% CI covers true value: {prob_covers}")
        print(f"  Distance from posterior mean: {distance_from_mean:.4f}")
    
    # Visualization
    fig, axes = plt.subplots(1, 2, figsize=(12, 4))
    
    # Posterior distribution
    axes[0].hist(samples, bins=50, density=True, alpha=0.7, color='skyblue')
    axes[0].axvline(mean_val, color='red', linestyle='-', linewidth=2, label=f'Mean = {mean_val:.3f}')
    axes[0].axvline(median_val, color='green', linestyle='--', linewidth=2, label=f'Median = {median_val:.3f}')
    
    # Credible intervals
    axes[0].axvspan(ci_95[0], ci_95[1], alpha=0.2, color='red', label='95% CI')
    axes[0].axvspan(ci_50[0], ci_50[1], alpha=0.3, color='orange', label='50% CI')
    
    if true_value is not None:
        axes[0].axvline(true_value, color='black', linestyle=':', linewidth=2, label=f'True = {true_value}')
    
    if threshold is not None:
        axes[0].axvline(threshold, color='purple', linestyle='-.', linewidth=2, label=f'Threshold = {threshold}')
    
    axes[0].set_xlabel(parameter_name)
    axes[0].set_ylabel('Density')
    axes[0].set_title('Posterior Distribution')
    axes[0].legend()
    axes[0].grid(True, alpha=0.3)
    
    # Cumulative distribution
    sorted_samples = np.sort(samples)
    cumulative_prob = np.arange(1, len(sorted_samples) + 1) / len(sorted_samples)
    
    axes[1].plot(sorted_samples, cumulative_prob, 'b-', linewidth=2)
    axes[1].axhline(0.5, color='green', linestyle='--', alpha=0.7, label='Median')
    axes[1].axhline(0.025, color='red', linestyle=':', alpha=0.7, label='2.5%')
    axes[1].axhline(0.975, color='red', linestyle=':', alpha=0.7, label='97.5%')
    
    if threshold is not None:
        prob_at_threshold = np.mean(samples <= threshold)
        axes[1].axvline(threshold, color='purple', linestyle='-.', linewidth=2)
        axes[1].axhline(prob_at_threshold, color='purple', linestyle='-.', alpha=0.7)
        axes[1].text(threshold, prob_at_threshold + 0.05, 
                    f'P â‰¤ {threshold} = {prob_at_threshold:.3f}', 
                    ha='center', va='bottom')
    
    axes[1].set_xlabel(parameter_name)
    axes[1].set_ylabel('Cumulative Probability')
    axes[1].set_title('Cumulative Distribution Function')
    axes[1].legend()
    axes[1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    return {
        'mean': mean_val,
        'median': median_val,
        'std': std_val,
        'ci_95': ci_95,
        'ci_90': ci_90,
        'ci_50': ci_50
    }

# Example usage with clinical trial data
treatment_effect_report = create_bayesian_report(
    final_trace, 
    'treatment_effect', 
    true_value=0.15, 
    threshold=0.1,
    context="Treatment effect in clinical trial (absolute difference in response rates)"
)

## Course Summary

### What We've Learned:

1. **Foundations**: Bayes' theorem, prior/posterior concepts
2. **Prior Selection**: Conjugate, non-informative, elicitation
3. **Decision Theory**: Loss functions, point estimation, intervals
4. **Model Comparison**: Bayes factors, model averaging
5. **Regression**: Linear, logistic, Poisson models
6. **Hierarchical Models**: Partial pooling, missing data
7. **MCMC**: Sampling algorithms, diagnostics
8. **Applications**: Real-world case studies

### Key Advantages of Bayesian Approach:
- **Intuitive probability interpretation**
- **Incorporates prior knowledge**
- **Quantifies uncertainty naturally**
- **Handles complex models flexibly**
- **Provides decision-theoretic framework**

### Next Steps:
1. **Practice with real data**: Apply methods to your domain
2. **Learn advanced topics**: Gaussian processes, variational inference
3. **Explore specialized packages**: Stan, JAGS, TensorFlow Probability
4. **Study computational methods**: Variational Bayes, approximate methods
5. **Read current literature**: Bayesian workflow, model checking

### Resources for Continued Learning:
- **Books**: Gelman et al. "Bayesian Data Analysis", McElreath "Statistical Rethinking"
- **Software**: PyMC, Stan, ArviZ
- **Communities**: PyMC Discourse, Stan Forums
- **Courses**: Advanced Bayesian methods, computational statistics

**Remember**: Bayesian statistics is as much about thinking probabilistically as it is about computation. The key is to start simple, build intuition, and gradually tackle more complex problems.