In [19]:
from prophet import Prophet
import pandas as pd
import numpy as np
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error
import os, glob, warnings, datetime as dt
warnings.filterwarnings("ignore")


# List of 50 stock symbols
stocks = [
    "AAPL","MSFT","GOOGL","AMZN","TSLA","META","NVDA","JPM","V","UNH",
    "HD","PG","DIS","MA","BAC","NFLX","ADBE","PYPL","CMCSA","XOM",
    "PFE","KO","INTC","CSCO","NKE","MRK","PEP","ABBV","ABT","CRM",
    "ORCL","T","VZ","CVX","MCD","WMT","DHR","ACN","LLY","AVGO",
    "QCOM","TXN","NEE","COST","TMUS","IBM","SBUX","MDT","HON","AMD"
]

# Folders
processed_folder = "../data/processed"
predictions_folder = "../data/prophet_prediction"
plots_folder = "../plots/prophet_plot"
os.makedirs(predictions_folder, exist_ok=True)
os.makedirs(plots_folder, exist_ok=True)

processed_stocks = []
skipped_stocks = []

for symbol in stocks:
    print(f"\n--- Processing {symbol} ---")
    # Find latest processed CSV for the symbol
    files = glob.glob(f"{processed_folder}/{symbol}_processed*.csv")
    if files:
        processed_file = sorted(files)[-1]  # pick the latest file
    else:
        print(f"Processed file for {symbol} not found. Skipping.")
        skipped_stocks.append(symbol)
        continue
    
    SELECTED_REGRESSORS = [
    'Return_lag1', 
    'Distance_from_MA_20',
    'RSI_14',
    'High_Low_Range',
    'OBV_Divergence'
    ]
    

    # Load data
    data  = pd.read_csv(processed_file, parse_dates=['Price'])
    data = data[['Price', 'Close']].rename(columns={'Price': 'ds', 'Close': 'y'})
    
    # Filter data to include ds, y, and all valid regressors
    valid_regressors = [reg for reg in SELECTED_REGRESSORS if reg in data.columns]
    prophet_data = data[['ds', 'y'] + valid_regressors]
    prophet_data = prophet_data.dropna()
    
    temp = int(len(prophet_data) * 0.8)
    train_data = prophet_data[:temp]
    test_data = prophet_data[temp:]

    train_data['y_scaled'] = train_data['y'] / 100
    test_data['y_scaled'] = test_data['y'] / 100
    
    
    # Create Model
    model_prophet = Prophet(daily_seasonality=True, yearly_seasonality=True,
                            weekly_seasonality=True, interval_width=0.95, changepoint_prior_scale=0.5)
    for reg in valid_regressors:
        model_prophet.add_regressor(reg, prior_scale=0.5, mode='multiplicative')

    model_prophet.fit(train_data)

    # Predict first using test data
    test_future = test_data[['ds']]
    test_predictions = model_prophet.predict(test_future)

    y_true = test_data['y'].values
    y_pred = test_predictions['yhat'].values

    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)

    print("\nMetrics:")
    print("Close >> RMSE: $", "%.4f" % rmse, "R2:", "%.4f" % r2, "MAE: $", "%.4f" % mae)

    # Future pred
    future = model_prophet.make_future_dataframe(periods=30)
    predictions = model_prophet.predict(future)
    
    fig = model_prophet.plot(predictions)
    # Add a marker for the split point
    plt.title(f"Prophet Forecast for {symbol}")
    plot_file = os.path.join(plots_folder, f"{symbol}_prophet_plot.png")
    fig.savefig(plot_file)
    plt.close(fig)
    
    # Store data
    predictions_file = os.path.join(predictions_folder, f"{symbol}_predictions.csv")
    predictions.to_csv(predictions_file)



print("\nProphet processing complete.")
print("Processed stocks:", processed_stocks)
print("Skipped stocks:", skipped_stocks)

22:02:25 - cmdstanpy - INFO - Chain [1] start processing



--- Processing AAPL ---


22:02:26 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 47.6359 R2: -1.9704 MAE: $ 40.0941


22:02:27 - cmdstanpy - INFO - Chain [1] start processing



--- Processing MSFT ---


22:02:28 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 181.3970 R2: -7.6393 MAE: $ 162.0729


22:02:28 - cmdstanpy - INFO - Chain [1] start processing



--- Processing GOOGL ---


22:02:30 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 86.9773 R2: -9.2944 MAE: $ 75.6355


22:02:30 - cmdstanpy - INFO - Chain [1] start processing



--- Processing AMZN ---


22:02:31 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 133.3862 R2: -12.6553 MAE: $ 114.6292


22:02:32 - cmdstanpy - INFO - Chain [1] start processing



--- Processing TSLA ---


22:02:32 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 69.1541 R2: -0.4406 MAE: $ 50.3197


22:02:33 - cmdstanpy - INFO - Chain [1] start processing



--- Processing META ---


22:02:33 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 552.0557 R2: -15.1677 MAE: $ 495.7840


22:02:34 - cmdstanpy - INFO - Chain [1] start processing



--- Processing NVDA ---


22:02:35 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 78.4760 R2: -2.6765 MAE: $ 64.2627


22:02:35 - cmdstanpy - INFO - Chain [1] start processing



--- Processing JPM ---


22:02:36 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 111.7572 R2: -8.8533 MAE: $ 98.4022


22:02:37 - cmdstanpy - INFO - Chain [1] start processing



--- Processing V ---


22:02:37 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 93.0932 R2: -10.7728 MAE: $ 84.5803


22:02:38 - cmdstanpy - INFO - Chain [1] start processing



--- Processing UNH ---


22:02:39 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 126.6406 R2: -8.3013 MAE: $ 120.6680


22:02:39 - cmdstanpy - INFO - Chain [1] start processing



--- Processing HD ---


22:02:40 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 117.3925 R2: -6.9796 MAE: $ 100.0758


22:02:40 - cmdstanpy - INFO - Chain [1] start processing



--- Processing PG ---


22:02:41 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 18.7611 R2: -1.5063 MAE: $ 15.3367


22:02:42 - cmdstanpy - INFO - Chain [1] start processing



--- Processing DIS ---


22:02:43 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 95.7312 R2: -87.7052 MAE: $ 85.6456


22:02:44 - cmdstanpy - INFO - Chain [1] start processing



--- Processing MA ---


22:02:44 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 144.8079 R2: -6.8365 MAE: $ 128.3970


22:02:45 - cmdstanpy - INFO - Chain [1] start processing



--- Processing BAC ---


22:02:46 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 14.6556 R2: -5.1061 MAE: $ 11.0167


22:02:46 - cmdstanpy - INFO - Chain [1] start processing



--- Processing NFLX ---


22:02:48 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 813.7201 R2: -23.5289 MAE: $ 739.9745


22:02:48 - cmdstanpy - INFO - Chain [1] start processing



--- Processing ADBE ---


22:02:49 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 497.8930 R2: -37.6633 MAE: $ 463.3293


22:02:50 - cmdstanpy - INFO - Chain [1] start processing



--- Processing PYPL ---


22:02:50 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 202.0794 R2: -504.3851 MAE: $ 187.0548


22:02:51 - cmdstanpy - INFO - Chain [1] start processing



--- Processing CMCSA ---


22:02:52 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 28.1182 R2: -120.8671 MAE: $ 26.2060


22:02:52 - cmdstanpy - INFO - Chain [1] start processing



--- Processing XOM ---


22:02:53 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 36.0097 R2: -24.5487 MAE: $ 32.0348


22:02:54 - cmdstanpy - INFO - Chain [1] start processing



--- Processing PFE ---


22:02:54 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 20.0793 R2: -20.5492 MAE: $ 19.0200


22:02:55 - cmdstanpy - INFO - Chain [1] start processing



--- Processing KO ---


22:02:56 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 8.0316 R2: -2.3070 MAE: $ 7.3496


22:02:56 - cmdstanpy - INFO - Chain [1] start processing



--- Processing INTC ---


22:02:57 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 25.7380 R2: -10.5044 MAE: $ 23.1892


22:02:58 - cmdstanpy - INFO - Chain [1] start processing



--- Processing CSCO ---


22:02:59 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 18.7231 R2: -24.5424 MAE: $ 16.9945


22:03:00 - cmdstanpy - INFO - Chain [1] start processing



--- Processing NKE ---


22:03:01 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 57.1708 R2: -13.7716 MAE: $ 55.1259


22:03:01 - cmdstanpy - INFO - Chain [1] start processing



--- Processing MRK ---


22:03:02 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 12.4242 R2: -0.7130 MAE: $ 9.4377


22:03:03 - cmdstanpy - INFO - Chain [1] start processing



--- Processing PEP ---


22:03:03 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 24.9152 R2: -11.8807 MAE: $ 20.9184


22:03:04 - cmdstanpy - INFO - Chain [1] start processing



--- Processing ABBV ---


22:03:05 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 31.3082 R2: -1.5680 MAE: $ 29.1506


22:03:06 - cmdstanpy - INFO - Chain [1] start processing



--- Processing ABT ---


22:03:06 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 23.6469 R2: -10.6926 MAE: $ 20.1331


22:03:07 - cmdstanpy - INFO - Chain [1] start processing



--- Processing CRM ---


22:03:08 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 234.1376 R2: -22.5921 MAE: $ 212.7403

--- Processing ORCL ---


22:03:09 - cmdstanpy - INFO - Chain [1] start processing
22:03:10 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 73.9052 R2: -6.2267 MAE: $ 66.1627


22:03:10 - cmdstanpy - INFO - Chain [1] start processing



--- Processing T ---


22:03:11 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 3.3381 R2: -0.5615 MAE: $ 2.4164


22:03:12 - cmdstanpy - INFO - Chain [1] start processing



--- Processing VZ ---


22:03:13 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 11.8072 R2: -8.4553 MAE: $ 8.9761


22:03:13 - cmdstanpy - INFO - Chain [1] start processing



--- Processing CVX ---


22:03:14 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 81.8978 R2: -140.5290 MAE: $ 75.9693


22:03:14 - cmdstanpy - INFO - Chain [1] start processing



--- Processing MCD ---


22:03:15 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 17.1883 R2: -0.1947 MAE: $ 14.3231


22:03:16 - cmdstanpy - INFO - Chain [1] start processing



--- Processing WMT ---


22:03:17 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 22.2011 R2: -1.8520 MAE: $ 17.3796


22:03:17 - cmdstanpy - INFO - Chain [1] start processing



--- Processing DHR ---


22:03:18 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 49.4234 R2: -4.3448 MAE: $ 38.0168


22:03:19 - cmdstanpy - INFO - Chain [1] start processing



--- Processing ACN ---


22:03:20 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 104.3793 R2: -8.8734 MAE: $ 93.3749


22:03:20 - cmdstanpy - INFO - Chain [1] start processing



--- Processing LLY ---


22:03:21 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 241.6889 R2: -0.5134 MAE: $ 198.9738


22:03:21 - cmdstanpy - INFO - Chain [1] start processing



--- Processing AVGO ---


22:03:23 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 73.6399 R2: -1.9019 MAE: $ 59.8460


22:03:23 - cmdstanpy - INFO - Chain [1] start processing



--- Processing QCOM ---


22:03:24 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 55.5056 R2: -2.4121 MAE: $ 41.3832


22:03:25 - cmdstanpy - INFO - Chain [1] start processing



--- Processing TXN ---


22:03:25 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 47.6665 R2: -5.8919 MAE: $ 40.1372


22:03:26 - cmdstanpy - INFO - Chain [1] start processing



--- Processing NEE ---


22:03:26 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 13.3328 R2: -1.4309 MAE: $ 10.0236


22:03:27 - cmdstanpy - INFO - Chain [1] start processing



--- Processing COST ---


22:03:28 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 187.7213 R2: -0.2655 MAE: $ 140.0837


22:03:29 - cmdstanpy - INFO - Chain [1] start processing



--- Processing TMUS ---


22:03:29 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 33.3698 R2: -0.2275 MAE: $ 24.3629


22:03:30 - cmdstanpy - INFO - Chain [1] start processing



--- Processing IBM ---


22:03:31 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 40.9043 R2: -0.4376 MAE: $ 30.7574


22:03:31 - cmdstanpy - INFO - Chain [1] start processing



--- Processing SBUX ---


22:03:32 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 44.3357 R2: -28.8695 MAE: $ 43.2732


22:03:33 - cmdstanpy - INFO - Chain [1] start processing



--- Processing MDT ---


22:03:34 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 41.6676 R2: -71.9454 MAE: $ 37.0410


22:03:34 - cmdstanpy - INFO - Chain [1] start processing



--- Processing HON ---


22:03:35 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 49.7559 R2: -14.6162 MAE: $ 44.8401


22:03:35 - cmdstanpy - INFO - Chain [1] start processing



--- Processing AMD ---


22:03:36 - cmdstanpy - INFO - Chain [1] done processing



Metrics:
Close >> RMSE: $ 98.3435 R2: -8.2223 MAE: $ 87.0375

Prophet processing complete.
Processed stocks: []
Skipped stocks: []
