In [4]:
# Prophet Stock Price Forecasting & Hyperparameter Optimization
# Ready-to-run notebook for accurate stock price predictions

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from prophet import Prophet
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Directories
input_dir = "../Data/Prophet_Preprocessed"
output_dir = "Prophet_Results_Optimized"
os.makedirs(output_dir, exist_ok=True)

# Hyperparameter grids
changepoint_scales = [0.05, 0.1, 0.2, 0.5]
seasonality_scales = [5, 10, 20]
seasonality_modes = ['additive', 'multiplicative']

summary = []

# Loop through each stock file
for file in os.listdir(input_dir):
    if not file.endswith("_prophet.csv"):
        continue
    stock = file.replace("_prophet.csv", "")
    print(f"\n🔄 Processing {stock}...")

    df = pd.read_csv(os.path.join(input_dir, file))
    df['ds'] = pd.to_datetime(df['ds'])
    df = df.dropna(subset=['y'])

    # Optional: log-transform for high variance
    use_log = False
    if df['y'].min() > 0 and df['y'].max() / df['y'].min() > 10:
        df['y'] = np.log(df['y'])
        use_log = True

    # Train-test split (80/20)
    split_idx = int(len(df) * 0.8)
    train_df = df.iloc[:split_idx]
    test_df = df.iloc[split_idx:]

    # Grid search for best hyperparameters
    best_rmse = float('inf')
    best_params = None
    best_forecast = None
    best_model = None

    for cps in changepoint_scales:
        for sps in seasonality_scales:
            for smode in seasonality_modes:
                model = Prophet(
                    yearly_seasonality=True,
                    weekly_seasonality=True,
                    daily_seasonality=False,
                    seasonality_mode=smode,
                    changepoint_prior_scale=cps,
                    seasonality_prior_scale=sps
                )
                # Optionally add monthly seasonality
                model.add_seasonality(name='monthly', period=30.5, fourier_order=5)

                try:
                    model.fit(train_df)
                except Exception as e:
                    print(f"Model failed for params cps={cps}, sps={sps}, smode={smode}: {e}")
                    continue

                future = model.make_future_dataframe(periods=len(test_df), freq='B')
                forecast = model.predict(future)
                forecast_test = forecast.iloc[-len(test_df):][['ds', 'yhat']].set_index('ds')
                test_actual = test_df.set_index('ds')['y']

                if use_log:
                    forecast_test['yhat'] = np.exp(forecast_test['yhat'])
                    test_actual = np.exp(test_actual)

                rmse = np.sqrt(mean_squared_error(test_actual, forecast_test['yhat']))
                mae = mean_absolute_error(test_actual, forecast_test['yhat'])
                
                if rmse < best_rmse:
                    best_rmse = rmse
                    best_params = (cps, sps, smode)
                    best_forecast = forecast.copy()
                    best_model = model

    # Use best model and forecast
    forecast = best_forecast
    model = best_model
    cps, sps, smode = best_params
    forecast_test = forecast.iloc[-len(test_df):][['ds', 'yhat']].set_index('ds')
    test_actual = test_df.set_index('ds')['y']
    if use_log:
        forecast_test['yhat'] = np.exp(forecast_test['yhat'])
        test_actual = np.exp(test_actual)
        train_df['y'] = np.exp(train_df['y'])
        test_df['y'] = np.exp(test_df['y'])

    rmse = np.sqrt(mean_squared_error(test_actual, forecast_test['yhat']))
    mae = mean_absolute_error(test_actual, forecast_test['yhat'])

    print(f"Best Params for {stock}: changepoint_prior_scale={cps}, seasonality_prior_scale={sps}, mode={smode}")
    print(f"Test RMSE: {rmse:.4f} | MAE: {mae:.4f}")
    summary.append({'Stock': stock, 'RMSE': rmse, 'MAE': mae,
                    'changepoint_prior_scale': cps, 'seasonality_prior_scale': sps, 'seasonality_mode': smode})

    # Visualization: Full train and test period
    plt.figure(figsize=(12, 6))
    plt.plot(train_df['ds'], train_df['y'], label="Train", color="blue", linewidth=2)
    plt.plot(test_df['ds'], test_df['y'], label="Test Actual", color="green", linewidth=2)
    plt.plot(test_df['ds'], forecast_test['yhat'], label="Test Forecast", color="red", linestyle='--', linewidth=2)
    plt.title(f"{stock} Prophet Forecast (Optimized)")
    plt.xlabel("Date")
    plt.ylabel("Price")
    plt.legend()
    plt.tight_layout()
    plt.savefig(f"{output_dir}/{stock}_prophet_forecast.png")
    plt.close()

    # Model diagnostics: plot components
    fig2 = model.plot_components(forecast)
    fig2.savefig(f"{output_dir}/{stock}_prophet_components.png")
    plt.close(fig2)

    # Training fit plot
    fitted_train = forecast.iloc[:len(train_df)][['ds', 'yhat']].set_index('ds')
    actual_train = train_df.set_index('ds')['y']
    plt.figure(figsize=(12, 6))
    plt.plot(actual_train.index, actual_train.values, label="Train Actual", color="blue", linewidth=2)
    plt.plot(fitted_train.index, fitted_train['yhat'], label="Train Fitted", color="orange", linestyle='--', linewidth=2)
    plt.title(f"{stock} Prophet Training Fit (Optimized)")
    plt.xlabel("Date")
    plt.ylabel("Price")
    plt.legend()
    plt.tight_layout()
    plt.savefig(f"{output_dir}/{stock}_prophet_train_fit.png")
    plt.close()

    # Save forecast results
    out_df = pd.DataFrame({
        "Date": test_df['ds'],
        "Actual": test_df['y'],
        "Forecast": forecast_test['yhat'].values
    })
    out_df.to_csv(f"{output_dir}/{stock}_prophet_forecast.csv", index=False)
    print(f"✅ Saved optimized results for {stock}")

# Save RMSE/MAE summary for all stocks to CSV
summary_df = pd.DataFrame(summary)
summary_df.to_csv(f"{output_dir}/prophet_rmse_mae_summary.csv", index=False)
print("\n📊 RMSE/MAE summary for all stocks (Optimized):")
print(summary_df)

print("\n✅ All stocks processed. Optimized Prophet results and diagnostics saved in Prophet_Results_Optimized/")


🔄 Processing Apple...


14:45:02 - cmdstanpy - INFO - Chain [1] start processing
14:45:04 - cmdstanpy - INFO - Chain [1] done processing


Test RMSE: 220.5416 | MAE: 173.1168
✅ Saved results for Apple

🔄 Processing GeneralElectric...


14:45:07 - cmdstanpy - INFO - Chain [1] start processing
14:45:11 - cmdstanpy - INFO - Chain [1] done processing


Test RMSE: 87.9133 | MAE: 63.9492
✅ Saved results for GeneralElectric

🔄 Processing IBM...


14:45:15 - cmdstanpy - INFO - Chain [1] start processing
14:45:18 - cmdstanpy - INFO - Chain [1] done processing


Test RMSE: 71.6479 | MAE: 52.2737
✅ Saved results for IBM

🔄 Processing Johnson&Johnson...


14:45:21 - cmdstanpy - INFO - Chain [1] start processing
14:45:23 - cmdstanpy - INFO - Chain [1] done processing


Test RMSE: 22.8844 | MAE: 18.5075
✅ Saved results for Johnson&Johnson

🔄 Processing Microsoft...


14:45:27 - cmdstanpy - INFO - Chain [1] start processing
14:45:29 - cmdstanpy - INFO - Chain [1] done processing


Test RMSE: 272.1464 | MAE: 223.9676
✅ Saved results for Microsoft

📊 RMSE/MAE summary for all stocks:
             Stock        RMSE         MAE
0            Apple  220.541571  173.116783
1  GeneralElectric   87.913347   63.949244
2              IBM   71.647932   52.273698
3  Johnson&Johnson   22.884402   18.507506
4        Microsoft  272.146429  223.967611

✅ All stocks processed. Prophet results and diagnostics saved in Prophet_Results/


In [6]:
# Save RMSE/MAE summary for all stocks to CSV
summary_df = pd.DataFrame(summary)
summary_df.to_csv(f"{output_dir}/prophet_rmse_mae_summary.csv", index=False)
print("\n📊 RMSE/MAE summary for all stocks saved")


📊 RMSE/MAE summary for all stocks saved
