In [7]:
import os
import pandas as pd
import numpy as np
from datetime import datetime
# Plotting
import matplotlib.pyplot as plt
import seaborn as sns
from prophet import Prophet
from sklearn.metrics import mean_absolute_error
from tqdm import tqdm

pd.set_option('display.max_columns', None)

import warnings
import logging
warnings.filterwarnings('ignore')
%matplotlib inline

In [8]:
logging.getLogger("cmdstanpy").disabled = True #  turn 'cmdstanpy' logs off

In [9]:
price_df_dict = {}
dir_path = 'price/raw'
for filename in os.listdir(dir_path):
    file_path = os.path.join(dir_path, filename)

    key = os.path.splitext(filename)[0]
    price_df_dict[key] = pd.read_csv(file_path)

In [10]:
price_df_dict.keys()

dict_keys(['EQIX', 'TM', 'V', 'BAC', 'SHEL', 'PEP', 'LLY', 'COST', 'VALE', 'D', 'TSM', 'UNP', 'ABBV', 'AMT', 'PG', 'CAT', 'AMZN', 'CCI', 'KO', 'NEE', 'UPS', 'MSFT', 'SHW', 'NVDA', 'HD', 'SO', 'XOM', 'CVX', 'CMCSA', 'PLD', 'BHP', 'DIS', 'TTE', 'JPM', 'TSLA', 'COP', 'META', 'GOOG', 'PFE', 'AVGO', 'DUK', 'VZ', 'BABA', 'UNH', 'MA', 'HON', 'BRK-A', 'AAPL', 'APD', 'WMT', 'LMT', 'RIO', 'PSA', 'AEP', 'JNJ'])

In [11]:
# CREATE A DATAFRAM FOR EACH COMPANY from this dictionary
# For now, create a dataframe for AAPL
stock_df = price_df_dict['AAPL']


In [12]:
stock_df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2019-12-31,72.482498,73.419998,72.379997,73.412498,72.552094,100805600
1,2020-01-02,74.059998,75.150002,73.797501,75.087502,74.207466,135480400
2,2020-01-03,74.287498,75.144997,74.125,74.357498,73.486023,146322800
3,2020-01-06,73.447502,74.989998,73.1875,74.949997,74.071579,118387200
4,2020-01-07,74.959999,75.224998,74.370003,74.597504,73.723213,108872000


In [13]:
def prepare_prophet_data(df, end_idx):
    """
    Prepare data for Prophet using data up to end_idx
    Keeps original column names intact
    """
    prophet_df = pd.DataFrame()
    prophet_df['ds'] = pd.to_datetime(df['Date'][:end_idx])
    prophet_df['y'] = df['Adj Close'][:end_idx]
    
    # Add additional features as regressors
    prophet_df['Open'] = df['Open'][:end_idx]
    prophet_df['Close'] = df['Close'][:end_idx]
    prophet_df['High'] = df['High'][:end_idx]
    prophet_df['Low'] = df['Low'][:end_idx]
    prophet_df['Volume'] = df['Volume'][:end_idx]
    
    
    return prophet_df

In [14]:
prophet_df = prepare_prophet_data(stock_df, -1)
prophet_df.head()

Unnamed: 0,ds,y,Open,Close,High,Low,Volume
0,2019-12-31,72.552094,72.482498,73.412498,73.419998,72.379997,100805600
1,2020-01-02,74.207466,74.059998,75.087502,75.150002,73.797501,135480400
2,2020-01-03,73.486023,74.287498,74.357498,75.144997,74.125,146322800
3,2020-01-06,74.071579,73.447502,74.949997,74.989998,73.1875,118387200
4,2020-01-07,73.723213,74.959999,74.597504,75.224998,74.370003,108872000


In [15]:
def make_single_prediction(historical_data, current_row, change_point_prior_scale=0.05):
    """Make a single prediction using Prophet for the next day"""
    # Prepare Prophet model
    model = Prophet(
        weekly_seasonality=True,
        yearly_seasonality=True,
        changepoint_prior_scale=change_point_prior_scale
    )
    model.add_country_holidays(country_name='US')
    # print(f"Length of historical data: {len(historical_data)}")

    # Add additional regressors
    # model.add_regressor('Close')
    # model.add_regressor('High')
    # model.add_regressor('Low')
    # model.add_regressor('Volume')
    # model.add_regressor('Adj Close')
    
    # Fit model on historical data
    model.fit(historical_data)
    
    # Create future dataframe for just the next day, with the last day being current row date
    future = model.make_future_dataframe(periods=1)
    #Make last ds of future date as date from current row
    future['ds'].iloc[-1] = current_row['Date']
    

    # Add regressor values for the prediction day
    # future['Close'] = current_row['Close']
    # future['High'] = current_row['High']
    # future['Low'] = current_row['Low']
    # future['Volume'] = current_row['Volume']
    # future['Adj Close'] = current_row['Adj Close']

    # Make prediction
    forecast = model.predict(future)
    # print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail())
    # Return only the last prediction
    return forecast.iloc[-1]['yhat']

In [16]:
def create_rolling_predictions(df, initial_training_days=7, change_point_prior_scale=0.05):
    """
    Create rolling predictions where each prediction only uses data available before that day
    
    Parameters:
    df: DataFrame with Date, Open, High, Low, Close, 'Adj Close', Volume
    initial_training_days: Number of days to use for initial training
    
    Returns:
    DataFrame with all original columns plus predicted prices
    """
    # Create a copy of the original dataframe
    result_df = df.copy()
    
    # Add a column for Prophet predictions (initially NaN)
    result_df['prophet_predicted_price'] = np.nan
    
    # Create predictions for each day after the initial training period
    print("Generating rolling predictions...")
    
    for i in tqdm(range(initial_training_days, len(df))):
        try:
            # Prepare historical data up to current day
            historical_data = prepare_prophet_data(df, i)
            
            # Get current day's data for regressors
            current_row = df.iloc[i]
            # print(f"Current row: \n{current_row}")
            # Make prediction for the current day
            prediction = make_single_prediction(historical_data, current_row, change_point_prior_scale)
            
            # Store the prediction
            result_df.iloc[i, result_df.columns.get_loc('prophet_predicted_price')] = prediction
            
        except Exception as e:
            print(f"Error making prediction for day {i}: {e}")
            continue
     
    # Calculate prediction error metrics where we have both actual and predicted values
    mask = result_df['prophet_predicted_price'].notna()
    # print(mask)
    if mask.any():
        mae = np.mean(np.abs(result_df.loc[mask, 'Open'] - result_df.loc[mask, 'prophet_predicted_price']))
        mape = np.mean(np.abs((result_df.loc[mask, 'Open'] - result_df.loc[mask, 'prophet_predicted_price']) / result_df.loc[mask, 'Open'])) * 100
        rmse = np.sqrt(np.mean((result_df.loc[mask, 'Open'] - result_df.loc[mask, 'prophet_predicted_price'])**2))

    print("\nModel Performance Metrics:")
    print(f"############ MAE: ${mae:.2f}")
    print(f"############ MAPE: {mape:.2f}%")
    print(f"############ RMSE: ${rmse:.2f}")
    return result_df

In [17]:
# change_point_prior_scales = [0.01, 0.025, 0.05, 0.1, 0.5, 1]
# for scale in change_point_prior_scales:
#     print(f"Change Point Prior Scale: {scale}")
#     result_df = create_rolling_predictions(stock_df, initial_training_days=7, change_point_prior_scale=scale)
#     print("\n\n")
# result_df = create_rolling_predictions(stock_df, initial_training_days=7, change_point_prior_scale=0.05)

In [18]:
result_df = create_rolling_predictions(stock_df, initial_training_days=28, change_point_prior_scale=0.1)

Generating rolling predictions...


 65%|██████▍   | 472/729 [01:28<01:23,  3.07it/s]

In [None]:
#Write df to csv
result_df.to_csv('price/raw_with_prophet_adj/AAPL_prophet_predictions.csv', index=False)

In [None]:
stock = 'KO'
result_df = create_rolling_predictions(price_df_dict[stock], initial_training_days=28, change_point_prior_scale=0.1)
result_df.to_csv(f'price/raw_with_prophet_adj/{stock}_prophet_predictions.csv', index=False)



In [None]:
stock = 'TSLA'
result_df = create_rolling_predictions(price_df_dict[stock], initial_training_days=28, change_point_prior_scale=0.1)
result_df.to_csv(f'price/raw_with_prophet_adj/{stock}_prophet_predictions.csv', index=False)

In [None]:
stock = 'V'
result_df = create_rolling_predictions(price_df_dict[stock], initial_training_days=28, change_point_prior_scale=0.1)
result_df.to_csv(f'price/raw_with_prophet_adj/{stock}_prophet_predictions.csv', index=False)

In [None]:
stock = 'XOM'
result_df = create_rolling_predictions(price_df_dict[stock], initial_training_days=28, change_point_prior_scale=0.1)
result_df.to_csv(f'price/raw_with_prophet_adj/{stock}_prophet_predictions.csv', index=False)