In [None]:
from prophet import Prophet
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import numpy as np
import os
import glob


In [None]:
# List of 50 stock symbols
stocks = [
    "AAPL","MSFT","GOOGL","AMZN","TSLA","META","NVDA","JPM","V","UNH",
    "HD","PG","DIS","MA","BAC","NFLX","ADBE","PYPL","CMCSA","XOM",
    "PFE","KO","INTC","CSCO","NKE","MRK","PEP","ABBV","ABT","CRM",
    "ORCL","T","VZ","CVX","MCD","WMT","DHR","ACN","LLY","AVGO",
    "QCOM","TXN","NEE","COST","TMUS","IBM","SBUX","MDT","HON","AMD"
]

# Folders
processed_folder = "../data/processed"
predictions_folder = "../data/prophet_prediction"
plots_folder = "../plots/prophet_plot"
os.makedirs(predictions_folder, exist_ok=True)
os.makedirs(plots_folder, exist_ok=True)

processed_stocks = []
skipped_stocks = []

for symbol in stocks:
    print(f"\n--- Processing {symbol} ---")
    # Find latest processed CSV for the symbol
    files = glob.glob(f"{processed_folder}/{symbol}_processed*.csv")
    if files:
        processed_file = sorted(files)[-1]  # pick the latest file
    else:
        print(f"Processed file for {symbol} not found. Skipping.")
        skipped_stocks.append(symbol)
        continue

    # Load data
    data = pd.read_csv(processed_file, parse_dates=['Date'])

    required_cols = ['Date', 'Close']
    for col in ['MA_20', 'Volatility', 'Volume']:
        if col not in data.columns:
            data[col] = np.nan

    data_file = data.rename(columns={'Date': 'ds', 'Close': 'y'})
    data_file = data_file[['ds', 'y', 'MA_20', 'Volatility', 'Volume']].dropna()

    temp = int(len(data_file) * 0.9)
    train_data = data_file.iloc[:temp]
    test_data = data_file.iloc[temp:]

    # Initialize Prophet
    model_prophet = Prophet()

    # Regressor to strengthen the model
    for reg in ['MA_20', 'Volatility', 'Volume']:
        model_prophet.add_regressor(reg)

    # Input data
    model_prophet.fit(train_data)

    # Predict
    predict = test_data[['ds', 'MA_20', 'Volatility', 'Volume']]
    predictions = model_prophet.predict(predict)


    # Evaluate
    y_actual = test_data['y']
    y_predicted = predictions['yhat']
    
    mse = mean_squared_error(y_actual, y_predicted)
    mae =mean_absolute_error(y_actual, y_predicted)
    r2 = r2_score(y_actual, y_predicted)
    accuracy = 100 * (1- (mae/ np.mean(y_predicted)))

    print("Stock:", symbol, "| MSE: ", "%.4f" %mse, "R2: ", "%.4f" %r2, " Accuracy: ", "%.4f" %accuracy)



    # Store data
    predictions_file = os.path.join(predictions_folder, f"{symbol}_predictions.csv")
    predictions.to_csv(predictions_file)

    # Plot and save
    fig = model_prophet.plot(predictions)
    plot_file = os.path.join(plots_folder, f"{symbol}_prophet_plot.png")
    fig.savefig(plot_file)
    plt.close(fig)

    processed_stocks.append(symbol)



print("\nProphet processing complete.")
print("Processed stocks:", processed_stocks)
print("Skipped stocks:", skipped_stocks)

18:36:55 - cmdstanpy - INFO - Chain [1] start processing



--- Processing AAPL ---


18:36:55 - cmdstanpy - INFO - Chain [1] done processing


Stock: AAPL | MSE:  61.9730 R2:  0.9054  Accuracy:  96.7327

--- Processing MSFT ---


18:36:55 - cmdstanpy - INFO - Chain [1] start processing
18:36:56 - cmdstanpy - INFO - Chain [1] done processing


Stock: MSFT | MSE:  179.8186 R2:  0.4143  Accuracy:  97.4638

--- Processing GOOGL ---


18:36:56 - cmdstanpy - INFO - Chain [1] start processing
18:36:56 - cmdstanpy - INFO - Chain [1] done processing


Stock: GOOGL | MSE:  47.8583 R2:  0.7949  Accuracy:  96.7165

--- Processing AMZN ---


18:36:57 - cmdstanpy - INFO - Chain [1] start processing
18:36:57 - cmdstanpy - INFO - Chain [1] done processing


Stock: AMZN | MSE:  54.5170 R2:  0.8151  Accuracy:  97.0459

--- Processing TSLA ---


18:36:57 - cmdstanpy - INFO - Chain [1] start processing
18:36:58 - cmdstanpy - INFO - Chain [1] done processing


Stock: TSLA | MSE:  909.4883 R2:  0.8202  Accuracy:  89.8510

--- Processing META ---


18:36:58 - cmdstanpy - INFO - Chain [1] start processing
18:36:58 - cmdstanpy - INFO - Chain [1] done processing


Stock: META | MSE:  670.9611 R2:  0.8179  Accuracy:  96.1011

--- Processing NVDA ---


18:36:58 - cmdstanpy - INFO - Chain [1] start processing


KeyboardInterrupt: 