In [1]:
from prophet import Prophet
from yahooquery import Ticker
import pandas as pd
from datetime import datetime as dt
import pandas_ta as ta
import plotly.express as px
import plotly.graph_objs as go
from prophet.diagnostics import cross_validation
from prophet.diagnostics import performance_metrics

  from .autonotebook import tqdm as notebook_tqdm


In [2]:

symbol = "VOO"

# Query stock in yahoo finance
stock = Ticker(symbol)

# Get 48 months data for enough sample terms
history = stock.history(period="48mo")

# Let only date as index
history.reset_index(level=["symbol"], inplace=True)

# Create date column
history['date'] = history.index

# Reindex data using a DatetimeIndex
history.set_index(pd.DatetimeIndex(history.index), inplace=True)

# select features that have interest to us
data = history[['date','adjclose']].copy()

# use technical analyses using 21 one days and append to our dataset
data.ta.ema(close='adjclose', length=21, append=True)

# Drop empty values
data.dropna(inplace=True)
data.head()

Unnamed: 0_level_0,date,adjclose,EMA_21
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-06-18,2019-06-18,250.974899,244.094171
2019-06-19,2019-06-19,251.638474,244.780017
2019-06-20,2019-06-20,253.984558,245.616794
2019-06-21,2019-06-21,253.685425,246.350305
2019-06-24,2019-06-24,253.358322,246.987398


In [3]:
#Plot
fig = px.line(data, x='date', y='adjclose')

fig.update_xaxes(
    rangeslider_visible=True,
    rangeselector=dict(
        buttons=list([
            dict(count=1, label="1m", step="month", stepmode="backward"),
            dict(count=6, label="6m", step="month", stepmode="backward"),
            dict(count=1, label="YTD", step="year", stepmode="todate"),
            dict(count=1, label="1y", step="year", stepmode="backward"),
            dict(step="all")
        ])
    )
)

In [4]:
# Add all dataset as training model
df_train = data[['date','adjclose','EMA_21']]
df_train['date'] = pd.to_datetime(df_train['date'], errors='coerce')
df_train["date"].dt.date
df_train = df_train.rename(columns={"date": "ds", "adjclose": "y"})

# Fit model
m = Prophet(daily_seasonality=True)

# Train 
m.fit(df_train)

# Get dates 30 days in the future
future = m.make_future_dataframe(periods=30)

# Drop weekends
future['day'] =  future['ds'].dt.weekday
future = future[future['day'] <=4]

# Predict dates
forecast = m.predict(future)
df_train.head()

23:41:21 - cmdstanpy - INFO - Chain [1] start processing
23:41:22 - cmdstanpy - INFO - Chain [1] done processing


Unnamed: 0_level_0,ds,y,EMA_21
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-06-18,2019-06-18,250.974899,244.094171
2019-06-19,2019-06-19,251.638474,244.780017
2019-06-20,2019-06-20,253.984558,245.616794
2019-06-21,2019-06-21,253.685425,246.350305
2019-06-24,2019-06-24,253.358322,246.987398


In [5]:
# Plot linear regression result
fig = go.Figure([
    go.Scatter(x=df_train['ds'], y=df_train['y'], name='Actual', mode='lines'),
    go.Scatter(x=forecast['ds'], y=forecast['yhat'], name='Predicted', mode='lines'),
    go.Scatter(x=forecast['ds'], y=df_train['EMA_21'], name='EMA', mode='lines')
])

fig.update_xaxes(
    rangeslider_visible=True,
    rangeselector=dict(
        buttons=list([
            dict(count=1, label="1d", step="day", stepmode="backward"),
            dict(count=1, label="1m", step="month", stepmode="backward"),
            dict(count=6, label="6m", step="month", stepmode="backward"),
            dict(count=1, label="YTD", step="year", stepmode="todate"),
            dict(count=1, label="1y", step="year", stepmode="backward"),
            dict(step="all")
        ])
    )
)
fig.show()

In [7]:
# Get all predictions
pred_df = forecast[forecast['ds'] > dt.today()][['ds','yhat']]

# Reset Index
pred_df.reset_index(inplace=True)

# Drop index column
pred_df.drop(labels='index', axis=1,inplace=True)

# Rename columns
pred_df.rename(columns={'ds': 'date', 'yhat': 'predicted price'}, inplace=True)

# Show first elements
pred_df

Unnamed: 0,date,predicted price
0,2023-05-22,374.381414
1,2023-05-23,374.752275
2,2023-05-24,375.363416
3,2023-05-25,375.799914
4,2023-05-26,376.355513
5,2023-05-29,377.565092
6,2023-05-30,377.740632
7,2023-05-31,378.101177
8,2023-06-01,378.237818
9,2023-06-02,378.451339


In [8]:
# execute cross validation

# reference
# https://facebook.github.io/prophet/docs/diagnostics.html#:~:text=Cross%20validation,up%20to%20that%20cutoff%20point.

df_cv = cross_validation(m, initial='720 days', period='30 days', horizon = '365 days')

# visualize data
print(df_cv.head())

# measure performance
df_p = performance_metrics(df_cv)
print(df_p.head())

  0%|          | 0/12 [00:00<?, ?it/s]23:59:50 - cmdstanpy - INFO - Chain [1] start processing
23:59:50 - cmdstanpy - INFO - Chain [1] done processing
  8%|▊         | 1/12 [00:00<00:06,  1.58it/s]23:59:50 - cmdstanpy - INFO - Chain [1] start processing
23:59:51 - cmdstanpy - INFO - Chain [1] done processing
 17%|█▋        | 2/12 [00:01<00:05,  1.72it/s]23:59:51 - cmdstanpy - INFO - Chain [1] start processing
23:59:51 - cmdstanpy - INFO - Chain [1] done processing
 25%|██▌       | 3/12 [00:01<00:05,  1.67it/s]23:59:52 - cmdstanpy - INFO - Chain [1] start processing
23:59:52 - cmdstanpy - INFO - Chain [1] done processing
 33%|███▎      | 4/12 [00:02<00:04,  1.74it/s]23:59:52 - cmdstanpy - INFO - Chain [1] start processing
23:59:53 - cmdstanpy - INFO - Chain [1] done processing
 42%|████▏     | 5/12 [00:03<00:04,  1.51it/s]23:59:53 - cmdstanpy - INFO - Chain [1] start processing
23:59:53 - cmdstanpy - INFO - Chain [1] done processing
 50%|█████     | 6/12 [00:03<00:04,  1.43it/s]23:59:54

          ds        yhat  yhat_lower  yhat_upper           y     cutoff
0 2021-06-24  374.122945  366.792768  381.151480  380.065216 2021-06-23
1 2021-06-25  373.941028  366.641651  380.404127  381.470886 2021-06-23
2 2021-06-28  373.543146  366.449556  380.107650  382.265900 2021-06-23
3 2021-06-29  373.663229  366.500666  380.430864  382.434113 2021-06-23
4 2021-06-30  373.726433  366.350523  380.817943  382.784271 2021-06-23
  horizon         mse       rmse        mae      mape     mdape     smape   
0 37 days  592.480897  24.340930  18.372822  0.047647  0.032015  0.046831  \
1 38 days  614.413720  24.787370  18.727612  0.048541  0.032227  0.047729   
2 39 days  633.405718  25.167553  19.038858  0.049330  0.032227  0.048538   
3 40 days  651.962082  25.533548  19.376352  0.050207  0.032685  0.049422   
4 41 days  664.598746  25.779813  19.568102  0.050703  0.032685  0.049930   

   coverage  
0  0.284976  
1  0.280731  
2  0.278595  
3  0.271951  
4  0.274086  
