In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from prophet import Prophet
from sklearn.metrics import mean_squared_error, mean_absolute_error
from joblib import dump
import warnings
warnings.filterwarnings("ignore")

# Setup
sns.set(style="whitegrid")
input_dir = "Prophet_data"
output_dir = "prophet_deploy"
os.makedirs(output_dir, exist_ok=True)

# Grid search params
cp_grid = [0.05, 0.1, 0.5]
seasonality_modes = ["additive", "multiplicative"]

# Evaluate model on test data
def evaluate_model(model, df_test):
    forecast = model.predict(df_test[['ds']])
    y_true = df_test['y'].values
    y_pred = forecast['yhat'].values
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    return rmse, mae, forecast

# Plotting function
def plot_forecast_results(stock, df_train, df_test, forecast_test, full_forecast, output_dir):
    plt.figure(figsize=(16, 6))

    # Subplot 1: Test performance
    plt.subplot(1, 2, 1)
    plt.plot(df_train['ds'], df_train['y'], label='Train', color='gray', linestyle='--')
    plt.plot(df_test['ds'], df_test['y'], label='Actual Test', color='black')
    plt.plot(forecast_test['ds'], forecast_test['yhat'], label='Predicted Test', color='orange')
    plt.fill_between(forecast_test['ds'], forecast_test['yhat_lower'], forecast_test['yhat_upper'],
                     color='orange', alpha=0.3, label='Confidence Interval')
    plt.title(f'{stock} - Test Forecast vs Actual')
    plt.xlabel('Date')
    plt.ylabel('Stock Price')
    plt.legend()

    # Subplot 2: Future forecast
    plt.subplot(1, 2, 2)
    plt.plot(df_train['ds'], df_train['y'], label='Train', color='gray', linestyle='--')
    plt.plot(df_test['ds'], df_test['y'], label='Test Actual', color='black')
    plt.plot(full_forecast['ds'], full_forecast['yhat'], label='Full Forecast', color='blue')
    plt.fill_between(full_forecast['ds'], full_forecast['yhat_lower'], full_forecast['yhat_upper'],
                     color='blue', alpha=0.3, label='Confidence Interval')
    plt.title(f'{stock} - Forecast until Aug 2025')
    plt.xlabel('Date')
    plt.ylabel('Stock Price')
    plt.legend()

    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, f"{stock}_forecast_visuals.png"))
    plt.close()

# Processing loop
summary_rows = []

for file in os.listdir(input_dir):
    if file.endswith(".csv"):
        stock = file.replace(".csv", "")
        df = pd.read_csv(os.path.join(input_dir, file))
        df['ds'] = pd.to_datetime(df['ds'])

        # Split train/test
        train_size = int(len(df) * 0.8)
        df_train, df_test = df[:train_size], df[train_size:]

        best_rmse = float("inf")
        best_model = None
        best_cp = None
        best_mode = None
        best_forecast = None

        # Grid search
        for cp in cp_grid:
            for mode in seasonality_modes:
                model = Prophet(
                    changepoint_prior_scale=cp,
                    seasonality_mode=mode,
                    daily_seasonality=False,
                    weekly_seasonality=True,
                    yearly_seasonality=True
                )
                model.fit(df_train)
                try:
                    rmse, mae, forecast = evaluate_model(model, df_test)
                    if rmse < best_rmse:
                        best_rmse = rmse
                        best_model = model
                        best_cp = cp
                        best_mode = mode
                        best_mae = mae
                        best_forecast = forecast
                except Exception as e:
                    print(f"Model failed for {stock} with cp={cp}, mode={mode}: {e}")

        #Retrain best model on full dataset
        final_model = Prophet(
            changepoint_prior_scale=best_cp,
            seasonality_mode=best_mode,
            daily_seasonality=False,
            weekly_seasonality=True,
            yearly_seasonality=True
        )
        final_model.fit(df)

        #Save final model trained on full data
        dump(final_model, os.path.join(output_dir, f"{stock}_model.pkl"), compress=3)

        #Generate full forecast till August 2025
        last_date = df['ds'].max()
        target_date = pd.Timestamp("2025-08-31")
        extra_days = (target_date - last_date).days

        future_dates = final_model.make_future_dataframe(periods=max(extra_days, 0))
        future_forecast = final_model.predict(future_dates)


        # Save forecast CSV
        future_forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].to_csv(
            os.path.join(output_dir, f"{stock}_future_forecast.csv"), index=False
        )

        # Plot results
        plot_forecast_results(stock, df_train, df_test, best_forecast, future_forecast, output_dir)

        # Save summary
        summary_rows.append({
            "Stock": stock,
            "Best_CP": best_cp,
            "Seasonality_Mode": best_mode,
            "RMSE_Test": round(best_rmse, 4),
            "MAE_Test": round(best_mae, 4)
        })

# Save all model summaries
summary_df = pd.DataFrame(summary_rows)
summary_df.to_csv(os.path.join(output_dir, "prophet_model_summary.csv"), index=False)

print("✅ All models trained, saved, and visualized.")


  from .autonotebook import tqdm as notebook_tqdm
18:37:35 - cmdstanpy - INFO - Chain [1] start processing
18:37:36 - cmdstanpy - INFO - Chain [1] done processing
18:37:37 - cmdstanpy - INFO - Chain [1] start processing
18:37:40 - cmdstanpy - INFO - Chain [1] done processing
18:37:41 - cmdstanpy - INFO - Chain [1] start processing
18:37:42 - cmdstanpy - INFO - Chain [1] done processing
18:37:43 - cmdstanpy - INFO - Chain [1] start processing
18:37:45 - cmdstanpy - INFO - Chain [1] done processing
18:37:45 - cmdstanpy - INFO - Chain [1] start processing
18:37:47 - cmdstanpy - INFO - Chain [1] done processing
18:37:47 - cmdstanpy - INFO - Chain [1] start processing
18:37:49 - cmdstanpy - INFO - Chain [1] done processing
18:37:50 - cmdstanpy - INFO - Chain [1] start processing
18:37:51 - cmdstanpy - INFO - Chain [1] done processing
18:37:54 - cmdstanpy - INFO - Chain [1] start processing
18:37:55 - cmdstanpy - INFO - Chain [1] done processing
18:37:56 - cmdstanpy - INFO - Chain [1] start 

✅ All models trained, saved, and visualized.
