#### Import libraries & set-up holidays

In [None]:
import pandas as pd
import numpy as np
#import tiingo
import requests
from tqdm import tqdm
from pandas.tseries.holiday import USFederalHolidayCalendar as calendar
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly



## Create holiday calendar
cal = calendar()

holidays = cal.holidays(start='2000-01-01',
                        end='2030-12-30')

holiday_df = pd.DataFrame(data=holidays,
                          columns = ['date']).assign(holiday='USFederalHoliday')

holiday_df.rename(columns={'date':'ds'},inplace=True)
#holiday_df.to_csv('us_holidays_until_2030.csv')

In [None]:
## The full data for the 5 minute chart is 120k rows starting from late 2016, it takes 5 minutes to train the prophet model and a minute to make the forecast

#### Connect to tiingo

In [None]:
# 15min to # 1h 
headers = {
    'Content-Type': 'application/json'
}
requestResponse = requests.get("https://api.tiingo.com/iex/SPY/prices?startDate=2015-01-02&endDate=2022-12-31&resampleFreq=5min&columns=open,high,low,close,volume&token=0ed744c7db5ef348139953912e2f3cee79f4608b", headers=headers)
#print(requestResponse.json())

In [None]:
## The earliest Tiingo 5 min data starts from 2016-12-01 
ticker = 'SPY'
tiingo_time_interval = '5min'
## Tiingo time intervals accepted = 5min, 15min, 30min, 1hour, 4hour


# Generate month_start and month end to use for looping over and getting data month by month
month_start_date  = pd.date_range(start='12/1/2016', end='12/1/2022', freq='MS').strftime('%Y-%m-%d')
month_end_date  = pd.date_range(start='12/1/2016', end='12/31/2022', freq='M').strftime('%Y-%m-%d')
print("the size of the monthly data is " + str(month_end_date.size))


# Below loop gets data for each month on a given interval level for a given ticker

total_df = pd.DataFrame()
for x in tqdm(range(month_start_date.size)):
    month_start = month_start_date[x]
    month_end  = month_end_date[x]
    requestResponse = requests.get("https://api.tiingo.com/iex/"+str(ticker)+"/prices?startDate="+str(month_start)+"&endDate="+str(month_end)+"&resampleFreq="+str(tiingo_time_interval)+"&columns=open,high,low,close,volume&token=0ed744c7db5ef348139953912e2f3cee79f4608b", headers=headers)
    #print(x) 
    #print("https://api.tiingo.com/iex/SPY/prices?startDate="+str(month_start)+"&endDate="+str(month_end)+"&resampleFreq=5min&columns=open,high,low,close,volume&token=0ed744c7db5ef348139953912e2f3cee79f4608b") 
    df_stock = pd.DataFrame(requestResponse.json()) 

    total_df = total_df.append(df_stock)

print('Data download done for the ticker '+str(ticker)+' with a earliest date ' +str(total_df.date.min())+' and latest date '+str(total_df.date.max()))
total_df.to_csv('spy_5_min_all_data.csv')

In [None]:
df_stock = total_df
df_stock.date = pd.to_datetime(df_stock.date)

## Remove the US public holidays
df_stock = df_stock[~df_stock.date.dt.date.isin(holiday_df.ds)]
df_stock.dtypes

In [None]:
df_stock.rename(columns={"date": "ds", "close": "y"},inplace=True)
df_stock.ds = df_stock.ds.dt.tz_convert(None)
df_stock.ds.dt.day_name().value_counts()

In [None]:
print(df_stock.shape)
df_stock.ds.dt.hour.value_counts()
mask = np.logical_and(df_stock.ds.dt.hour >= 13,
                      df_stock.ds.dt.hour <= 20)
df_stock = df_stock[mask]
df_stock = df_stock.sort_values('ds',  ascending=True)
df_stock.reset_index(drop=True,inplace=True)
print(df_stock.shape)

In [None]:
df_stock

#### Prophet

In [None]:
# Python
m = Prophet(holidays=holiday_df, growth = 'linear')
# m = Prophet(holidays=holiday_df, growth = 'linear')
m.fit(df_stock[['ds','y']])

In [None]:
# Python
future = m.make_future_dataframe(periods=2000,freq="5min")
future.tail()

In [None]:
mask = np.logical_and(future.ds.dt.hour >= 13,
                      future.ds.dt.hour <= 20)
future = future[mask]
future = future.sort_values('ds',  ascending=True)
future.reset_index(drop=True,inplace=True)
future.shape

#### Prediction results

In [None]:
# Python
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(100)

In [None]:
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(500)

In [None]:
# Python
fig1 = m.plot(forecast)

In [None]:
# Python
fig2 = m.plot_components(forecast)

In [None]:
# Python
plot_plotly(m, forecast)

In [None]:
# Python
plot_components_plotly(m, forecast)

#### Appendix

In [None]:
#### Test the earleist data available for give ticker

# ticker = 'SPY'

# x = 77


# # Generate month_start and month end to use for looping over and getting data month by month
# month_start_date  = pd.date_range(start='12/1/2010', end='12/1/2022', freq='MS').strftime('%Y-%m-%d')
# month_end_date  = pd.date_range(start='12/1/2010', end='12/31/2022', freq='M').strftime('%Y-%m-%d')
# print("the size of the monthly data is " + str(month_end_date.size))


# month_start = month_start_date[x]
# month_end = month_start_date[x]

# print(month_start_date[x])


# requestResponse = requests.get("https://api.tiingo.com/iex/"+str(ticker)+"/prices?startDate="+str(month_start)+"&endDate="+str(month_end)+"&resampleFreq=15min&columns=open,high,low,close,volume&token=0ed744c7db5ef348139953912e2f3cee79f4608b", headers=headers)
# print(x) 
# #print("https://api.tiingo.com/iex/SPY/prices?startDate="+str(month_start)+"&endDate="+str(month_end)+"&resampleFreq=5min&columns=open,high,low,close,volume&token=0ed744c7db5ef348139953912e2f3cee79f4608b") 
# df_stock = pd.DataFrame(requestResponse.json()) 
# df_stock


In [None]:
# import xgboost as xgb 
# clf_xgb = xgb.XGBRegressor(objective='reg:squarederror', 
#                             tree_method = 'gpu_hist',
#                             gamma = 0.5,
#                            max_depth = 25,
#                             seed=42,
#                             n_estimators=30)

In [None]:
# clf_xgb.fit(X_train,
#             y_train,
#            verbose=True,
#             early_stopping_rounds=20,
#             eval_metric='rmse',   #aucpr,auc, map
#             eval_set=[(X_test,y_test)])