In [None]:
# 02 – Stock Forecasting (Prophet & ARIMA)
#
# Steps:
# - Load cleaned stock prices
# - Select a single stock symbol
# - Prepare time-series data
# - Train Prophet and ARIMA models
# - Visualise forecasts
# - Export results to results/forecast_results.csv

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error

from src.forecasting_utils import (
    prepare_time_series,
    fit_prophet_model,
    forecast_with_prophet,
    fit_arima_model,
    forecast_with_arima,
)

# -------------------------------------------------------------------
# Paths
# -------------------------------------------------------------------
DATA_DIR = os.path.join("..", "data")
RESULTS_DIR = os.path.join("..", "results")
os.makedirs(RESULTS_DIR, exist_ok=True)

# -------------------------------------------------------------------
# Load Cleaned Stock Data
# -------------------------------------------------------------------
clean_stocks_path = os.path.join(DATA_DIR, "cleaned_stocks.csv")
stocks_clean = pd.read_csv(clean_stocks_path, parse_dates=["date"])

print("Cleaned stocks shape:", stocks_clean.shape)
display(stocks_clean.head())

# -------------------------------------------------------------------
# Select Symbol
# -------------------------------------------------------------------
available_symbols = stocks_clean["symbol"].unique()
print("\nAvailable symbols:", available_symbols)

symbol = available_symbols[0]  # adjust manually if desired
print("Using symbol:", symbol)

symbol_df = stocks_clean[stocks_clean["symbol"] == symbol].copy()
symbol_df = symbol_df.sort_values("date")
display(symbol_df.tail())

# -------------------------------------------------------------------
# Prepare Time Series for Prophet
# -------------------------------------------------------------------
ts_prophet = prepare_time_series(
    symbol_df,
    date_col="date",
    value_col="close",
)

print("\nProphet-ready time series shape:", ts_prophet.shape)
display(ts_prophet.tail())

# -------------------------------------------------------------------
# Train/Test Split (time-based)
# -------------------------------------------------------------------
horizon = 30  # days
train_ts = ts_prophet.iloc[:-horizon]
test_ts = ts_prophet.iloc[-horizon:]

print("\nTrain size:", train_ts.shape)
print("Test size:", test_ts.shape)
display(train_ts.tail())
display(test_ts.head())

# -------------------------------------------------------------------
# Prophet Model
# -------------------------------------------------------------------
prophet_model = fit_prophet_model(
    train_ts,
    yearly_seasonality=True,
    weekly_seasonality=True,
    daily_seasonality=False,
    changepoint_prior_scale=0.05,
)

prophet_forecast = forecast_with_prophet(
    prophet_model,
    periods=horizon + 30,
    freq="D",
)

print("\nProphet forecast tail:")
display(prophet_forecast.tail())

plt.figure(figsize=(10, 5))
plt.plot(train_ts["ds"], train_ts["y"], label="Train")
plt.plot(test_ts["ds"], test_ts["y"], label="Test", linestyle="--")
plt.plot(prophet_forecast["ds"], prophet_forecast["yhat"], label="Prophet forecast")
plt.xlabel("Date")
plt.ylabel("Price")
plt.title(f"{symbol} – Prophet Forecast")
plt.legend()
plt.tight_layout()
plt.show()

# -------------------------------------------------------------------
# ARIMA Model
# -------------------------------------------------------------------
ts_arima = symbol_df.set_index("date")["close"].asfreq("D").interpolate()

train_series = ts_arima.iloc[:-horizon]
test_series = ts_arima.iloc[-horizon:]

arima_model = fit_arima_model(train_series, order=(1, 1, 1))
arima_forecast = forecast_with_arima(arima_model, steps=horizon + 30)

print("\nARIMA forecast tail:")
display(arima_forecast.tail())

plt.figure(figsize=(10, 5))
plt.plot(train_series.index, train_series.values, label="Train")
plt.plot(test_series.index, test_series.values, label="Test", linestyle="--")
plt.plot(arima_forecast.index, arima_forecast.values, label="ARIMA forecast")
plt.xlabel("Date")
plt.ylabel("Price")
plt.title(f"{symbol} – ARIMA Forecast")
plt.legend()
plt.tight_layout()
plt.show()

# -------------------------------------------------------------------
# Evaluation – MAPE & RMSE
# -------------------------------------------------------------------
def mape(y_true, y_pred):
    return float(np.mean(np.abs((y_true - y_pred) / y_true)) * 100)

def rmse(y_true, y_pred):
    return float(np.sqrt(mean_squared_error(y_true, y_pred)))

prophet_test = prophet_forecast.set_index("ds").loc[test_ts["ds"]]["yhat"]
arima_test = arima_forecast.iloc[:horizon]

y_true = test_ts["y"].values
y_pred_prophet = prophet_test.values
y_pred_arima = arima_test.values

metrics = {
    "prophet_mape": mape(y_true, y_pred_prophet),
    "prophet_rmse": rmse(y_true, y_pred_prophet),
    "arima_mape": mape(y_true, y_pred_arima),
    "arima_rmse": rmse(y_true, y_pred_arima),
}

print("\nForecast metrics:")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

# -------------------------------------------------------------------
# Export Forecast Results
# -------------------------------------------------------------------
results_df = pd.DataFrame({
    "date": test_ts["ds"].values,
    "symbol": symbol,
    "actual_price": y_true,
    "prophet_forecast": y_pred_prophet,
    "arima_forecast": y_pred_arima,
})

results_df["abs_error_prophet"] = (results_df["actual_price"] - results_df["prophet_forecast"]).abs()
results_df["abs_error_arima"] = (results_df["actual_price"] - results_df["arima_forecast"]).abs()

forecast_results_path = os.path.join(RESULTS_DIR, "forecast_results.csv")
results_df.to_csv(forecast_results_path, index=False)

print("\nSaved forecast results to:", forecast_results_path)
display(results_df.head())

print("\n=== 02_stock_forecasting completed ===")
