# üîÆ PROPHET AUTOMATED TRAINING PIPELINE
## Comprehensive Multi-Resolution Forecasting with Auto-Benchmark

### üöÄ AUTO-RUN CONFIGURATION:
**Just click "Run All" and the notebook will automatically:**
1. Train Prophet models for ALL resolutions (1min, 5min, 15min)
2. Forecast BOTH targets (request_count, total_bytes)
3. Perform hyperparameter tuning for each configuration
4. Generate comprehensive benchmarks
5. Save all predictions and parameters
6. Create comparative visualizations

### üìä Total Configurations:
- **3 resolutions** √ó **2 targets** = **6 model trainings**
- Each with hyperparameter tuning
- Expected runtime: **20-40 minutes** (CPU)

### üìÅ Output Structure:
```
RESULTS_PROPHET/
‚îú‚îÄ‚îÄ 1min_request_count/
‚îÇ   ‚îú‚îÄ‚îÄ predictions.csv
‚îÇ   ‚îú‚îÄ‚îÄ metrics.csv
‚îÇ   ‚îú‚îÄ‚îÄ best_parameters.csv
‚îÇ   ‚îî‚îÄ‚îÄ evaluation.png
‚îú‚îÄ‚îÄ 1min_total_bytes/
‚îú‚îÄ‚îÄ 5min_request_count/
‚îú‚îÄ‚îÄ ... (6 configurations total)
‚îî‚îÄ‚îÄ FINAL_BENCHMARK/
    ‚îú‚îÄ‚îÄ comprehensive_comparison.csv
    ‚îú‚îÄ‚îÄ final_report.txt
    ‚îî‚îÄ‚îÄ benchmark_visualizations.png
```

In [None]:
# ===========================
# CELL 1: MOUNT GOOGLE DRIVE
# ===========================

from google.colab import drive
drive.mount('/content/drive')

print("‚úì Google Drive mounted successfully")

In [None]:
# ===========================
# CELL 2: SETUP & INSTALLATIONS
# ===========================

!pip install prophet scikit-learn -q

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_absolute_error, mean_squared_error
import warnings
warnings.filterwarnings('ignore')
import os
from datetime import datetime
import time
from typing import Dict, List

# Prophet
from prophet import Prophet

# Set seeds
np.random.seed(42)

# Visualization
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (18, 6)

print("="*70)
print("PROPHET AUTOMATED TRAINING PIPELINE")
print("="*70)
print(f"  Start time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("="*70)

In [None]:
# ===========================
# CELL 3: GLOBAL CONFIGURATION
# ===========================

# Paths
DATA_DIR = '/content/drive/MyDrive/AUTOSCALING ANALYSIS/PROCESSED_DATAFINAL'
RESULTS_BASE_DIR = '/content/drive/MyDrive/AUTOSCALING ANALYSIS/RESULTS_PROPHET'

# Create base results directory
os.makedirs(RESULTS_BASE_DIR, exist_ok=True)

# All configurations to run
RESOLUTIONS = ['1min', '5min', '15min']
TARGETS = ['request_count', 'total_bytes']

# Hyperparameter search space
PARAM_GRID = {
    'changepoint_prior_scale': [0.5, 1, 5, 10],
    'seasonality_prior_scale': [1, 10, 30, 50],
    'seasonality_mode': ['additive', 'multiplicative']
}

# Storm/outage holiday (adjust dates if needed)
STORM_HOLIDAY = pd.DataFrame({
    'holiday': 'storm_outage',
    'ds': pd.date_range(start='1995-08-01 14:52:01', end='1995-08-03 04:36:13', freq='h'),
    'lower_window': 0,
    'upper_window': 0,
})

print("\nüìã CONFIGURATION LOADED:")
print(f"  Resolutions: {RESOLUTIONS}")
print(f"  Targets: {TARGETS}")
print(f"  Total configurations: {len(RESOLUTIONS) * len(TARGETS)}")
print(f"  Hyperparameter combinations: {len(PARAM_GRID['changepoint_prior_scale']) * len(PARAM_GRID['seasonality_prior_scale']) * len(PARAM_GRID['seasonality_mode'])}")
print(f"\n  Data directory: {DATA_DIR}")
print(f"  Results directory: {RESULTS_BASE_DIR}")

In [None]:
# ===========================
# CELL 4: UTILITY FUNCTIONS
# ===========================

def prepare_prophet_data(df, target_col):
    """
    Prepare data for Prophet.
    """
    prophet_df = pd.DataFrame({
        'ds': df.index,
        'y': df[target_col]
    })

    # Add time-based features (always known in future)
    prophet_df['hour'] = prophet_df['ds'].dt.hour
    prophet_df['day_of_week'] = prophet_df['ds'].dt.dayofweek
    prophet_df['is_weekend'] = (prophet_df['day_of_week'] >= 5).astype(int)

    return prophet_df


def calculate_metrics(y_true, y_pred, model_name="Model"):
    """
    Calculate comprehensive metrics.
    """
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)

    # MAPE (avoid division by zero)
    mask = y_true != 0
    mape = np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100 if mask.sum() > 0 else 0

    # R¬≤
    r2 = 1 - (np.sum((y_true - y_pred)**2) / np.sum((y_true - y_true.mean())**2))

    metrics = {
        'Model': model_name,
        'MAE': mae,
        'MSE': mse,
        'RMSE': rmse,
        'MAPE (%)': mape,
        'R¬≤': r2
    }

    return metrics


def tune_prophet_hyperparameters(train_data, param_grid, verbose=False):
    """
    Perform grid search for Prophet hyperparameters.
    """
    # Use subset for validation
    val_size = int(len(train_data) * 0.2)
    train_subset = train_data[:-val_size]
    val_subset = train_data[-val_size:]

    best_params = None
    best_mae = float('inf')
    results = []

    total_combinations = len(param_grid['changepoint_prior_scale']) * \
                        len(param_grid['seasonality_prior_scale']) * \
                        len(param_grid['seasonality_mode'])

    if verbose:
        print(f"    Testing {total_combinations} parameter combinations...")

    counter = 0
    for cp_scale in param_grid['changepoint_prior_scale']:
        for s_scale in param_grid['seasonality_prior_scale']:
            for s_mode in param_grid['seasonality_mode']:
                counter += 1

                try:
                    model = Prophet(
                        daily_seasonality=True,
                        weekly_seasonality=True,
                        yearly_seasonality=False,
                        changepoint_prior_scale=cp_scale,
                        seasonality_prior_scale=s_scale,
                        seasonality_mode=s_mode,
                        holidays=STORM_HOLIDAY
                    )

                    model.add_regressor('hour')
                    model.add_regressor('day_of_week')
                    model.add_regressor('is_weekend')

                    model.fit(train_subset)

                    forecast = model.predict(val_subset[['ds', 'hour', 'day_of_week', 'is_weekend']])
                    mae = mean_absolute_error(val_subset['y'], forecast['yhat'])

                    results.append({
                        'changepoint_prior_scale': cp_scale,
                        'seasonality_prior_scale': s_scale,
                        'seasonality_mode': s_mode,
                        'mae': mae
                    })

                    if mae < best_mae:
                        best_mae = mae
                        best_params = {
                            'changepoint_prior_scale': cp_scale,
                            'seasonality_prior_scale': s_scale,
                            'seasonality_mode': s_mode
                        }

                except Exception as e:
                    if verbose:
                        print(f"      Error with params {counter}/{total_combinations}: {str(e)}")

    return best_params, best_mae, pd.DataFrame(results)


print("‚úì Utility functions defined")

In [None]:
# ===========================
# CELL 5: MAIN TRAINING FUNCTION
# ===========================

def train_single_configuration(resolution, target, verbose=True):
    """
    Train a single Prophet model configuration and return results.
    """
    if verbose:
        print(f"\n{'='*70}")
        print(f"TRAINING: Prophet | {resolution} | {target}")
        print(f"{'='*70}")

    start_time = time.time()

    # Create results directory
    results_dir = f"{RESULTS_BASE_DIR}/{resolution}_{target}"
    os.makedirs(results_dir, exist_ok=True)

    try:
        # Load data
        if verbose:
            print(f"\n[1/5] Loading data...")
        train_df = pd.read_csv(f"{DATA_DIR}/train_{resolution}.csv", index_col=0, parse_dates=True)
        test_df = pd.read_csv(f"{DATA_DIR}/test_{resolution}.csv", index_col=0, parse_dates=True)

        # Prepare Prophet data
        if verbose:
            print(f"[2/5] Preparing Prophet data...")
        prophet_train = prepare_prophet_data(train_df, target)
        prophet_test = prepare_prophet_data(test_df, target)

        if verbose:
            print(f"    Train: {len(prophet_train):,} rows, Test: {len(prophet_test):,} rows")

        # Hyperparameter tuning
        if verbose:
            print(f"[3/5] Tuning hyperparameters...")
        best_params, best_mae, tuning_results = tune_prophet_hyperparameters(
            prophet_train, PARAM_GRID, verbose=verbose
        )

        if verbose:
            print(f"    Best MAE: {best_mae:.2f}")
            print(f"    Best params: cp={best_params['changepoint_prior_scale']}, "
                  f"s={best_params['seasonality_prior_scale']}, mode={best_params['seasonality_mode']}")

        # Save tuning results
        tuning_results.to_csv(f"{results_dir}/hyperparameter_tuning.csv", index=False)

        # Train final model with best parameters
        if verbose:
            print(f"[4/5] Training final model...")

        model = Prophet(
            daily_seasonality=True,
            weekly_seasonality=True,
            yearly_seasonality=False,
            changepoint_prior_scale=best_params['changepoint_prior_scale'],
            seasonality_prior_scale=best_params['seasonality_prior_scale'],
            seasonality_mode=best_params['seasonality_mode'],
            holidays=STORM_HOLIDAY
        )

        # Add custom seasonalities based on resolution
        if resolution == '1min':
            model.add_seasonality(name='hourly_pattern', period=1/24, fourier_order=10)
        elif resolution == '5min':
            model.add_seasonality(name='daily_high_freq', period=1, fourier_order=50)
            model.add_seasonality(name='weekly_high_freq', period=7, fourier_order=20)
        else:  # 15min
            model.add_seasonality(name='daily_pattern', period=1, fourier_order=25)

        model.add_regressor('hour')
        model.add_regressor('day_of_week')
        model.add_regressor('is_weekend')

        model.fit(prophet_train)

        # Predict
        if verbose:
            print(f"[5/5] Evaluating...")

        future = prophet_test[['ds', 'hour', 'day_of_week', 'is_weekend']].copy()
        forecast = model.predict(future)

        # Calculate metrics
        y_true = prophet_test['y'].values
        y_pred = forecast['yhat'].values

        metrics = calculate_metrics(y_true, y_pred, "Prophet")

        if verbose:
            print(f"    MAE: {metrics['MAE']:.2f}, RMSE: {metrics['RMSE']:.2f}, MAPE: {metrics['MAPE (%)']:.2f}%")

        # Prediction interval coverage
        lower = forecast['yhat_lower'].values
        upper = forecast['yhat_upper'].values
        within_interval = np.sum((y_true >= lower) & (y_true <= upper))
        coverage = (within_interval / len(y_true)) * 100

        # Save results
        # Predictions
        predictions_df = pd.DataFrame({
            'timestamp': prophet_test['ds'],
            'actual': y_true,
            'predicted': y_pred,
            'lower_bound': lower,
            'upper_bound': upper,
            'residual': y_true - y_pred
        })
        predictions_df.to_csv(f"{results_dir}/predictions.csv", index=False)

        # Metrics
        metrics_df = pd.DataFrame([metrics])
        metrics_df.to_csv(f"{results_dir}/metrics.csv", index=False)

        # Best parameters
        params_df = pd.DataFrame([{
            'changepoint_prior_scale': best_params['changepoint_prior_scale'],
            'seasonality_prior_scale': best_params['seasonality_prior_scale'],
            'seasonality_mode': best_params['seasonality_mode'],
            'daily_seasonality': True,
            'weekly_seasonality': True,
            'yearly_seasonality': False,
            'interval_coverage': coverage
        }])
        params_df.to_csv(f"{results_dir}/best_parameters.csv", index=False)

        # Full forecast
        forecast.to_csv(f"{results_dir}/forecast_full.csv", index=False)

        # Training time
        elapsed_time = time.time() - start_time

        if verbose:
            print(f"\n‚úì Completed in {elapsed_time:.1f} seconds")
            print(f"  Results saved to: {results_dir}")

        # Return results for benchmark
        return {
            'resolution': resolution,
            'target': target,
            'mae': metrics['MAE'],
            'mse': metrics['MSE'],
            'rmse': metrics['RMSE'],
            'mape': metrics['MAPE (%)'],
            'r2': metrics['R¬≤'],
            'interval_coverage': coverage,
            'changepoint_prior_scale': best_params['changepoint_prior_scale'],
            'seasonality_prior_scale': best_params['seasonality_prior_scale'],
            'seasonality_mode': best_params['seasonality_mode'],
            'training_time_sec': elapsed_time,
            'results_dir': results_dir
        }

    except Exception as e:
        print(f"\n‚ùå ERROR: {str(e)}")
        import traceback
        traceback.print_exc()
        return None


print("‚úì Training function defined")

In [None]:
# ===========================
# CELL 6: RUN ALL CONFIGURATIONS
# ===========================

print("\n" + "="*70)
print("STARTING AUTOMATED TRAINING PIPELINE")
print("="*70)

all_results = []
total_configs = len(RESOLUTIONS) * len(TARGETS)
current_config = 0

pipeline_start_time = time.time()

for resolution in RESOLUTIONS:
    for target in TARGETS:
        current_config += 1

        print(f"\n\n{'#'*70}")
        print(f"CONFIGURATION {current_config}/{total_configs}")
        print(f"{'#'*70}")

        result = train_single_configuration(resolution, target, verbose=True)

        if result is not None:
            all_results.append(result)
            print(f"\n‚úÖ Configuration {current_config}/{total_configs} completed successfully")
        else:
            print(f"\n‚ùå Configuration {current_config}/{total_configs} failed")

        # Progress update
        elapsed = time.time() - pipeline_start_time
        avg_time = elapsed / current_config
        remaining = (total_configs - current_config) * avg_time

        print(f"\nüìä Progress: {current_config}/{total_configs} ({current_config/total_configs*100:.1f}%)")
        print(f"   Elapsed: {elapsed/60:.1f} min | Est. remaining: {remaining/60:.1f} min")

total_elapsed = time.time() - pipeline_start_time

print("\n" + "="*70)
print("ALL CONFIGURATIONS COMPLETED")
print("="*70)
print(f"  Total time: {total_elapsed/60:.1f} minutes")
print(f"  Successful: {len(all_results)}/{total_configs}")
print(f"  Failed: {total_configs - len(all_results)}")

In [None]:
# ===========================
# CELL 7: CREATE COMPREHENSIVE BENCHMARK
# ===========================

print("\n" + "="*70)
print("GENERATING COMPREHENSIVE BENCHMARK")
print("="*70)

# Create benchmark directory
benchmark_dir = f"{RESULTS_BASE_DIR}/FINAL_BENCHMARK"
os.makedirs(benchmark_dir, exist_ok=True)

# Convert results to DataFrame
benchmark_df = pd.DataFrame(all_results)

# Save comprehensive comparison
benchmark_file = f"{benchmark_dir}/comprehensive_comparison.csv"
benchmark_df.to_csv(benchmark_file, index=False)
print(f"\n‚úì Benchmark saved: {benchmark_file}")

print("\nüìä BENCHMARK RESULTS:\n")
display(benchmark_df.style.background_gradient(cmap='RdYlGn_r', subset=['mae', 'rmse', 'mape']).format({
    'mae': '{:.2f}',
    'rmse': '{:.2f}',
    'mape': '{:.2f}%',
    'r2': '{:.4f}',
    'interval_coverage': '{:.1f}%',
    'training_time_sec': '{:.1f}s'
}))

# Find best configuration
print("\n" + "="*70)
print("BEST CONFIGURATIONS")
print("="*70)

# By resolution
print("\nBest by Resolution:")
for resolution in RESOLUTIONS:
    subset = benchmark_df[benchmark_df['resolution'] == resolution]
    if len(subset) > 0:
        best_idx = subset['mae'].idxmin()
        best = subset.loc[best_idx]
        print(f"  {resolution}: {best['target']} (MAE: {best['mae']:.2f}, MAPE: {best['mape']:.2f}%)")

# By target
print("\nBest by Target:")
for target in TARGETS:
    subset = benchmark_df[benchmark_df['target'] == target]
    if len(subset) > 0:
        best_idx = subset['mae'].idxmin()
        best = subset.loc[best_idx]
        print(f"  {target}: {best['resolution']} (MAE: {best['mae']:.2f}, MAPE: {best['mape']:.2f}%)")

# Overall best
overall_best_idx = benchmark_df['mae'].idxmin()
overall_best = benchmark_df.loc[overall_best_idx]
print(f"\nOverall Best: {overall_best['resolution']} | {overall_best['target']}")
print(f"  MAE: {overall_best['mae']:.2f}")
print(f"  RMSE: {overall_best['rmse']:.2f}")
print(f"  MAPE: {overall_best['mape']:.2f}%")
print(f"  R¬≤: {overall_best['r2']:.4f}")
print(f"  Parameters: cp={overall_best['changepoint_prior_scale']}, s={overall_best['seasonality_prior_scale']}, mode={overall_best['seasonality_mode']}")

In [None]:
# ===========================
# CELL 8: BENCHMARK VISUALIZATIONS
# ===========================

print("\n[CREATING BENCHMARK VISUALIZATIONS]\n")

fig = plt.figure(figsize=(20, 10))
gs = fig.add_gridspec(2, 3, hspace=0.3, wspace=0.3)

# 1. MAE by Resolution and Target
ax1 = fig.add_subplot(gs[0, :])
pivot_mae = benchmark_df.pivot_table(values='mae', index='resolution', columns='target')
pivot_mae.plot(kind='bar', ax=ax1, color=['#2a9d8f', '#e76f51'], alpha=0.8, width=0.6)
ax1.set_title('MAE Comparison Across All Configurations', fontweight='bold', fontsize=14)
ax1.set_ylabel('MAE')
ax1.set_xlabel('Resolution')
ax1.legend(title='Target')
ax1.grid(True, alpha=0.3, axis='y')
plt.setp(ax1.xaxis.get_majorticklabels(), rotation=0)

# 2. MAPE Comparison
ax2 = fig.add_subplot(gs[1, 0])
pivot_mape = benchmark_df.pivot_table(values='mape', index='resolution', columns='target')
pivot_mape.plot(kind='bar', ax=ax2, color=['#2a9d8f', '#e76f51'], alpha=0.8)
ax2.set_title('MAPE by Configuration', fontweight='bold', fontsize=12)
ax2.set_ylabel('MAPE (%)')
ax2.set_xlabel('Resolution')
ax2.legend(title='Target')
ax2.grid(True, alpha=0.3, axis='y')
plt.setp(ax2.xaxis.get_majorticklabels(), rotation=0)

# 3. R¬≤ Comparison
ax3 = fig.add_subplot(gs[1, 1])
pivot_r2 = benchmark_df.pivot_table(values='r2', index='resolution', columns='target')
pivot_r2.plot(kind='bar', ax=ax3, color=['#2a9d8f', '#e76f51'], alpha=0.8)
ax3.set_title('R¬≤ Score by Configuration', fontweight='bold', fontsize=12)
ax3.set_ylabel('R¬≤')
ax3.set_xlabel('Resolution')
ax3.legend(title='Target')
ax3.grid(True, alpha=0.3, axis='y')
plt.setp(ax3.xaxis.get_majorticklabels(), rotation=0)

# 4. Interval Coverage
ax4 = fig.add_subplot(gs[1, 2])
pivot_coverage = benchmark_df.pivot_table(values='interval_coverage', index='resolution', columns='target')
pivot_coverage.plot(kind='bar', ax=ax4, color=['#2a9d8f', '#e76f51'], alpha=0.8)
ax4.axhline(y=95, color='red', linestyle='--', label='Expected 95%')
ax4.set_title('Prediction Interval Coverage', fontweight='bold', fontsize=12)
ax4.set_ylabel('Coverage (%)')
ax4.set_xlabel('Resolution')
ax4.legend()
ax4.grid(True, alpha=0.3, axis='y')
plt.setp(ax4.xaxis.get_majorticklabels(), rotation=0)

plt.suptitle('Prophet: Comprehensive Benchmark Analysis',
            fontsize=16, fontweight='bold', y=0.995)

viz_file = f"{benchmark_dir}/benchmark_visualizations.png"
plt.savefig(viz_file, dpi=150, bbox_inches='tight')
plt.show()

print(f"‚úì Visualizations saved: {viz_file}")

In [None]:
# ===========================
# CELL 9: GENERATE FINAL REPORT
# ===========================

print("\n[GENERATING FINAL REPORT]\n")

report = f"""
{'='*80}
PROPHET: COMPREHENSIVE BENCHMARK REPORT
{'='*80}

Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
Total Pipeline Time: {total_elapsed/60:.1f} minutes

{'='*80}
CONFIGURATIONS TESTED
{'='*80}

Resolutions: {', '.join(RESOLUTIONS)}
Target Variables: {', '.join(TARGETS)}

Total Configurations: {total_configs}
Successful: {len(all_results)}
Failed: {total_configs - len(all_results)}

Hyperparameter Search Space:
  changepoint_prior_scale: {PARAM_GRID['changepoint_prior_scale']}
  seasonality_prior_scale: {PARAM_GRID['seasonality_prior_scale']}
  seasonality_mode: {PARAM_GRID['seasonality_mode']}

{'='*80}
OVERALL PERFORMANCE
{'='*80}

Average Metrics:
  MAE:  {benchmark_df['mae'].mean():.2f} (¬±{benchmark_df['mae'].std():.2f})
  RMSE: {benchmark_df['rmse'].mean():.2f} (¬±{benchmark_df['rmse'].std():.2f})
  MAPE: {benchmark_df['mape'].mean():.2f}% (¬±{benchmark_df['mape'].std():.2f}%)
  R¬≤:   {benchmark_df['r2'].mean():.4f} (¬±{benchmark_df['r2'].std():.4f})
  Interval Coverage: {benchmark_df['interval_coverage'].mean():.1f}% (¬±{benchmark_df['interval_coverage'].std():.1f}%)
  Avg Training Time: {benchmark_df['training_time_sec'].mean():.1f}s

{'='*80}
BEST OVERALL CONFIGURATION
{'='*80}

Resolution: {overall_best['resolution']}
Target: {overall_best['target']}

Performance:
  MAE:  {overall_best['mae']:.2f}
  RMSE: {overall_best['rmse']:.2f}
  MAPE: {overall_best['mape']:.2f}%
  R¬≤:   {overall_best['r2']:.4f}
  Interval Coverage: {overall_best['interval_coverage']:.1f}%

Best Hyperparameters:
  changepoint_prior_scale: {overall_best['changepoint_prior_scale']}
  seasonality_prior_scale: {overall_best['seasonality_prior_scale']}
  seasonality_mode: {overall_best['seasonality_mode']}

{'='*80}
PERFORMANCE BY RESOLUTION
{'='*80}

"""

for resolution in RESOLUTIONS:
    subset = benchmark_df[benchmark_df['resolution'] == resolution]
    if len(subset) > 0:
        report += f"""
{resolution}:
  Average MAE:  {subset['mae'].mean():.2f}
  Average MAPE: {subset['mape'].mean():.2f}%
  Average R¬≤:   {subset['r2'].mean():.4f}
  Best config: {subset.loc[subset['mae'].idxmin(), 'target']} (MAE: {subset['mae'].min():.2f})
"""

report += f"""
{'='*80}
PERFORMANCE BY TARGET
{'='*80}

"""

for target in TARGETS:
    subset = benchmark_df[benchmark_df['target'] == target]
    if len(subset) > 0:
        report += f"""
{target}:
  Average MAE:  {subset['mae'].mean():.2f}
  Average MAPE: {subset['mape'].mean():.2f}%
  Average R¬≤:   {subset['r2'].mean():.4f}
  Best config: {subset.loc[subset['mae'].idxmin(), 'resolution']} (MAE: {subset['mae'].min():.2f})
"""

report += f"""
{'='*80}
KEY FINDINGS
{'='*80}

1. Best Resolution:
   - {benchmark_df.groupby('resolution')['mae'].mean().idxmin()} has lowest average MAE ({benchmark_df.groupby('resolution')['mae'].mean().min():.2f})

2. Best Target:
   - {benchmark_df.groupby('target')['mae'].mean().idxmin()} is easier to predict (MAE: {benchmark_df.groupby('target')['mae'].mean().min():.2f})

3. Interval Coverage:
   - Average: {benchmark_df['interval_coverage'].mean():.1f}%
   - {'Good' if 90 <= benchmark_df['interval_coverage'].mean() <= 98 else 'Needs adjustment'}

4. Most Common Best Parameters:
   - changepoint_prior_scale: {benchmark_df['changepoint_prior_scale'].mode()[0]}
   - seasonality_prior_scale: {benchmark_df['seasonality_prior_scale'].mode()[0]}
   - seasonality_mode: {benchmark_df['seasonality_mode'].mode()[0]}

{'='*80}
RECOMMENDATIONS FOR AUTOSCALING
{'='*80}

1. Use {overall_best['resolution']} resolution for {overall_best['target']}
   - Achieves best accuracy (MAE: {overall_best['mae']:.2f})
   - 95% confidence intervals cover {overall_best['interval_coverage']:.1f}% of actuals

2. Safety Margins:
   - Use upper bound of prediction interval for conservative scaling
   - Or use forecast + 2œÉ where œÉ is residual std deviation

3. Retraining Strategy:
   - Retrain weekly to capture evolving patterns
   - Monitor forecast accuracy continuously
   - Retrain immediately if MAE increases by >20%

4. Production Deployment:
   - Load best model parameters from best_parameters.csv
   - Use same holiday definitions and regressors
   - Validate predictions before scaling actions

{'='*80}
FILES GENERATED
{'='*80}

Benchmark Files:
  ‚Ä¢ comprehensive_comparison.csv - All metrics for all configurations
  ‚Ä¢ benchmark_visualizations.png - Visual comparison
  ‚Ä¢ final_report.txt - This report

Individual Configuration Results (for each resolution√ótarget):
  ‚Ä¢ predictions.csv - Test predictions with confidence intervals
  ‚Ä¢ metrics.csv - Performance metrics
  ‚Ä¢ best_parameters.csv - Optimal hyperparameters
  ‚Ä¢ hyperparameter_tuning.csv - Full tuning results
  ‚Ä¢ forecast_full.csv - Complete forecast with components

{'='*80}
END OF REPORT
{'='*80}
"""

# Save report
report_file = f"{benchmark_dir}/final_report.txt"
with open(report_file, 'w') as f:
    f.write(report)

print(report)

print(f"\n‚úì Final report saved: {report_file}")

In [None]:
# ===========================
# CELL 10: SUMMARY & NEXT STEPS
# ===========================

print("\n" + "="*80)
print("üéâ AUTOMATED PIPELINE COMPLETED SUCCESSFULLY!")
print("="*80)

print(f"\nüìä SUMMARY:")
print(f"  Total configurations trained: {len(all_results)}")
print(f"  Total time: {total_elapsed/60:.1f} minutes")
print(f"  Average time per config: {total_elapsed/len(all_results):.1f} seconds")

print(f"\nüìÅ RESULTS LOCATION:")
print(f"  Main directory: {RESULTS_BASE_DIR}")
print(f"  Benchmark: {benchmark_dir}")

print(f"\nüèÜ BEST CONFIGURATION:")
print(f"  {overall_best['resolution']} | {overall_best['target']}")
print(f"  MAE: {overall_best['mae']:.2f}, MAPE: {overall_best['mape']:.2f}%, R¬≤: {overall_best['r2']:.4f}")

print(f"\nüìà TOP 3 PERFORMERS (by MAE):")
top_3 = benchmark_df.nsmallest(3, 'mae')[['resolution', 'target', 'mae', 'rmse', 'mape']]
for idx, row in top_3.iterrows():
    print(f"  {row['resolution']:5s} | {row['target']:15s} | MAE: {row['mae']:6.2f} | MAPE: {row['mape']:5.2f}%")

print(f"\nüí° NEXT STEPS:")
print(f"  1. Review the final_report.txt in {benchmark_dir}")
print(f"  2. Check benchmark_visualizations.png for visual analysis")
print(f"  3. Use best_parameters.csv from best configuration for production")
print(f"  4. Implement Prophet forecasting in autoscaling system")
print(f"  5. Set up weekly retraining schedule")

print(f"\n" + "="*80)
print("All results have been saved to Google Drive!")
print("="*80)