# Parameterized Capacity Planning Report

**Template for Papermill Execution**

This notebook is designed to be executed with different parameters using papermill.

## Usage
```bash
papermill template_capacity_report.ipynb output_report.ipynb -p TIME_HORIZON_DAYS 60 -p N_SIMULATIONS 10000
```

## Parameters Cell (Tagged for Papermill)

The cell below is tagged with `parameters` - this tells papermill which variables to inject.

In [None]:
# --- Papermill Parameters ---
# These values will be overridden when running via papermill

# Simulation Parameters
TIME_HORIZON_DAYS = 30          # Number of days to simulate
N_SIMULATIONS = 5000            # Number of Monte Carlo runs
RANDOM_SEED = 42                # For reproducibility

# Machine Configuration
BASE_THROUGHPUT = 100           # Wafers per hour base rate
EFFICIENCY_MEAN = 0.90          # Average machine efficiency
EFFICIENCY_STD = 0.03           # Efficiency variability
DOWNTIME_PROB = 0.03            # Daily downtime probability

# Report Configuration
REPORT_TITLE = "Capacity Planning Report"
FABRICATION_LINE = "Line-A"
TARGET_OUTPUT = 50000           # Target wafers for the period

# Output Paths
OUTPUT_DIR = "../reports"
SAVE_PLOTS = True

## 1. Setup and Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import os
import json

# Set random seed for reproducibility
np.random.seed(RANDOM_SEED)

# Setup plotting style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 10

# Ensure output directory exists
os.makedirs(OUTPUT_DIR, exist_ok=True)

print(f"Report: {REPORT_TITLE}")
print(f"Fabrication Line: {FABRICATION_LINE}")
print(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Parameters: {TIME_HORIZON_DAYS} days, {N_SIMULATIONS:,} simulations")

## 2. Monte Carlo Simulation

In [None]:
def run_simulation(days, n_sims, base_throughput, eff_mean, eff_std, downtime_prob):
    """
    Run Monte Carlo simulation for capacity planning.
    
    Returns:
        results: Array of total throughput for each simulation
        daily_outputs: 2D array of daily outputs per simulation
    """
    results = []
    daily_outputs = []
    
    for _ in range(n_sims):
        day_outputs = []
        
        for day in range(days):
            is_down = np.random.random() < downtime_prob
            
            if is_down:
                day_output = 0
            else:
                efficiency = np.random.normal(eff_mean, eff_std)
                efficiency = np.clip(efficiency, 0.3, 1.0)
                
                # Daily output: throughput * efficiency * hours * variation
                day_output = base_throughput * efficiency * 24
                day_output *= np.random.normal(1.0, 0.02)
            
            day_outputs.append(day_output)
        
        total = sum(day_outputs)
        results.append(total)
        daily_outputs.append(day_outputs)
    
    return np.array(results), np.array(daily_outputs)

# Run simulation
print(f"Running {N_SIMULATIONS:,} simulations...")
results, daily_data = run_simulation(
    TIME_HORIZON_DAYS, N_SIMULATIONS,
    BASE_THROUGHPUT, EFFICIENCY_MEAN, EFFICIENCY_STD, DOWNTIME_PROB
)
print(f"✓ Simulation complete")

## 3. Statistical Analysis

In [None]:
# Calculate statistics
stats = {
    'mean': np.mean(results),
    'median': np.median(results),
    'std': np.std(results),
    'min': np.min(results),
    'max': np.max(results),
    'p5': np.percentile(results, 5),
    'p10': np.percentile(results, 10),
    'p90': np.percentile(results, 90),
    'p95': np.percentile(results, 95),
    'p99': np.percentile(results, 99)
}

# Calculate probability of meeting target
prob_meeting_target = (results >= TARGET_OUTPUT).mean() * 100

# Display results
stats_df = pd.DataFrame([{
    'Metric': list(stats.keys()),
    'Value (wafers)': [f"{v:,.0f}" for v in stats.values()]
}]).T
stats_df.columns = stats_df.iloc[0]
stats_df = stats_df[1:]

print("\n=== Statistical Summary ===")
print(stats_df.to_string(index=False))

print(f"\n=== Target Analysis ===")
print(f"Target Output: {TARGET_OUTPUT:,} wafers")
print(f"Probability of Meeting Target: {prob_meeting_target:.1f}%")
print(f"Expected Shortfall (if below target): {np.mean(results[results < TARGET_OUTPUT]) - TARGET_OUTPUT:,.0f} wafers")

## 4. Visualizations

In [None]:
# Main distribution plot
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Histogram with percentiles
ax1 = axes[0, 0]
ax1.hist(results, bins=50, color='steelblue', edgecolor='white', alpha=0.8)
ax1.axvline(stats['mean'], color='red', linestyle='--', linewidth=2, label=f"Mean: {stats['mean']:,.0f}")
ax1.axvline(stats['p5'], color='orange', linestyle=':', linewidth=2, label=f"P5: {stats['p5']:,.0f}")
ax1.axvline(stats['p95'], color='orange', linestyle=':', linewidth=2, label=f"P95: {stats['p95']:,.0f}")
ax1.axvline(TARGET_OUTPUT, color='green', linestyle='-', linewidth=2, label=f"Target: {TARGET_OUTPUT:,}")
ax1.set_xlabel('Total Throughput (wafers)')
ax1.set_ylabel('Frequency')
ax1.set_title(f'Throughput Distribution ({N_SIMULATIONS:,} simulations)')
ax1.legend()

# Box plot
ax2 = axes[0, 1]
bp = ax2.boxplot(results, vert=True, patch_artist=True,
                  boxprops=dict(facecolor='lightblue', alpha=0.7))
ax2.set_ylabel('Throughput (wafers)')
ax2.set_title('Distribution Summary')
ax2.set_xticklabels([FABRICATION_LINE])

# CDF
ax3 = axes[1, 0]
sorted_results = np.sort(results)
cdf = np.arange(1, len(sorted_results) + 1) / len(sorted_results)
ax3.plot(sorted_results, cdf * 100, color='purple', linewidth=2)
ax3.axvline(TARGET_OUTPUT, color='green', linestyle='--', label=f'Target: {TARGET_OUTPUT:,}')
ax3.axhline(prob_meeting_target, color='green', linestyle=':', alpha=0.5)
ax3.fill_betweenx([0, prob_meeting_target], 0, TARGET_OUTPUT, alpha=0.2, color='red', label=f'Risk: {100-prob_meeting_target:.1f}%')
ax3.set_xlabel('Throughput (wafers)')
ax3.set_ylabel('Cumulative Probability (%)')
ax3.set_title('Cumulative Distribution')
ax3.legend()
ax3.grid(True, alpha=0.3)

# Daily variability
ax4 = axes[1, 1]
daily_means = np.mean(daily_data, axis=0)
daily_std = np.std(daily_data, axis=0)
days = range(1, TIME_HORIZON_DAYS + 1)
ax4.plot(days, daily_means, color='blue', label='Mean Daily Output')
ax4.fill_between(days, daily_means - daily_std, daily_means + daily_std, alpha=0.3, label='±1 Std Dev')
ax4.set_xlabel('Day')
ax4.set_ylabel('Daily Output (wafers)')
ax4.set_title('Daily Output Trend')
ax4.legend()

plt.suptitle(f'{REPORT_TITLE} - {FABRICATION_LINE}', fontsize=14, fontweight='bold')
plt.tight_layout()

if SAVE_PLOTS:
    plot_path = os.path.join(OUTPUT_DIR, f"capacity_report_{FABRICATION_LINE.lower().replace(' ', '_')}_{datetime.now().strftime('%Y%m%d')}.png")
    plt.savefig(plot_path, dpi=150, bbox_inches='tight')
    print(f"Plot saved: {plot_path}")

plt.show()

## 5. Risk Analysis

In [None]:
# Risk metrics
var_95 = stats['mean'] - stats['p5']  # Value at Risk at 95% confidence
cvar_95 = stats['mean'] - np.mean(results[results <= stats['p5']])  # Conditional VaR

print("=== Risk Metrics ===")
print(f"Value at Risk (95%): {var_95:,.0f} wafers")
print(f"Conditional VaR (95%): {cvar_95:,.0f} wafers")
print(f"Coefficient of Variation: {(stats['std']/stats['mean'])*100:.1f}%")

# Scenario analysis
scenarios = {
    'Conservative (P10)': stats['p10'],
    'Expected (Mean)': stats['mean'],
    'Optimistic (P90)': stats['p90']
}

print("\n=== Scenario Analysis ===")
for scenario, output in scenarios.items():
    gap = output - TARGET_OUTPUT
    status = "✓ Met" if gap >= 0 else "✗ Miss"
    print(f"{scenario}: {output:,.0f} wafers ({gap:+,.0f} vs target) {status}")

## 6. Export Results

In [None]:
# Save detailed results
report_data = {
    'metadata': {
        'title': REPORT_TITLE,
        'fabrication_line': FABRICATION_LINE,
        'generated_at': datetime.now().isoformat(),
        'parameters': {
            'time_horizon_days': TIME_HORIZON_DAYS,
            'n_simulations': N_SIMULATIONS,
            'random_seed': RANDOM_SEED,
            'base_throughput': BASE_THROUGHPUT,
            'efficiency_mean': EFFICIENCY_MEAN,
            'downtime_prob': DOWNTIME_PROB,
            'target_output': TARGET_OUTPUT
        }
    },
    'statistics': {k: float(v) for k, v in stats.items()},
    'risk_metrics': {
        'prob_meeting_target': float(prob_meeting_target),
        'var_95': float(var_95),
        'cvar_95': float(cvar_95)
    }
}

json_path = os.path.join(OUTPUT_DIR, f"capacity_report_{FABRICATION_LINE.lower().replace(' ', '_')}_{datetime.now().strftime('%Y%m%d')}.json")
with open(json_path, 'w') as f:
    json.dump(report_data, f, indent=2)

print(f"Report data saved: {json_path}")

# Summary CSV
summary_csv = pd.DataFrame({
    'simulation_id': range(1, len(results) + 1),
    'total_output': results
})
csv_path = os.path.join(OUTPUT_DIR, f"simulation_results_{FABRICATION_LINE.lower().replace(' ', '_')}_{datetime.now().strftime('%Y%m%d')}.csv")
summary_csv.to_csv(csv_path, index=False)
print(f"Simulation results saved: {csv_path}")

---

*Report generated by parameterized notebook template*

**Next Steps:**
1. Convert this notebook to HTML: `jupyter nbconvert --to html output_report.ipynb`
2. Schedule automated runs with different parameters
3. Compare multiple scenarios side-by-side