In [17]:
import pandas as pd
import numpy as np

In [18]:
dat = pd.read_csv("../../stockData.csv")

In [19]:
dat.columns = ['Date', 'SPX', 'TSLA', 'AAPL', 'NKE', 'AMZN', 'WMT', 'KO', 'Gold', 'Oil', 'UST10', 'UST2']
dat = dat.drop(['TSLA', 'AAPL', 'NKE', 'AMZN', 'WMT', 'KO'],axis=1)
dat = dat.iloc[:-1,:]
dat['Date'] = pd.to_datetime(dat['Date'])
#dat['USTspread'] = dat.UST10 - dat.UST2
dat.head()

Unnamed: 0,Date,SPX,Gold,Oil,UST10,UST2
0,2017-01-02,2238.83,1147.5,53.72,2.4443,1.1883
1,2017-01-03,2257.83,1158.84,52.33,2.4444,1.2141
2,2017-01-04,2270.75,1163.65,53.26,2.439,1.214
3,2017-01-05,2269.0,1180.12,53.76,2.3443,1.1621
4,2017-01-06,2276.98,1172.63,53.99,2.4193,1.2098


### Start time series analysis proper

In [20]:
from prophet import Prophet
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import plotly.io as pio
import plotly.express as px

In [21]:
fig = px.line(dat, x="Date", y="SPX", title='S&P time series')
fig.show()

In [22]:
# Format data for prophet model using ds and y
dat = dat.reset_index(drop=True).rename(columns={'Date':'ds','SPX':'y'})

In [23]:
dat.tail()

Unnamed: 0,ds,y,Gold,Oil,UST10,UST2
1429,2022-06-24,3911.74,1826.88,107.62,3.1301,3.0632
1430,2022-06-27,3900.11,1822.85,109.57,3.1997,3.1212
1431,2022-06-28,3821.55,1820.01,111.76,3.1715,3.1096
1432,2022-06-29,3818.83,1817.73,109.78,3.0891,3.0385
1433,2022-06-30,3785.38,1807.27,105.76,3.0129,2.9533


In [24]:
len(dat)

1434

In [25]:
# Split sections in to train and test sets
ts_cv = TimeSeriesSplit(n_splits=1000, max_train_size=150, test_size=1)

In [26]:
all_splits = list(ts_cv.split(dat)) #List of indices of sections
print("Number of sections: {}".format(len(all_splits)))
print("Number of X values per section: {}".format(len(all_splits[0][0])))
print("Number of y values per section: {}".format(len(all_splits[0][1])))

Number of sections: 1000
Number of X values per section: 150
Number of y values per section: 1


In [27]:
dat.columns

Index(['ds', 'y', 'Gold', 'Oil', 'UST10', 'UST2'], dtype='object')

In [28]:
len(all_splits)

1000

In [29]:
#loop through all the splits and compile the results with date
results = []

for section in all_splits:
    m = Prophet()
    regressors = [i for i in dat.columns if i != 'ds' and i != 'y']
    for reg in regressors:
        m.add_regressor(reg)
    m.fit(dat.iloc[section[0]])
    test_date = dat.iloc[section[1],0].values[0]
    true_y = dat.iloc[section[1]].y.values[0]
    forecast = m.predict(dat.iloc[section[1]]).yhat.values[0]
    mape = np.abs((true_y-forecast)/true_y)*100
    results_dict = {'test_date':test_date, 'true_y':true_y, 'prediction':forecast, 'MAPE':mape}
    results.append(results_dict)

16:07:22 - cmdstanpy - INFO - Chain [1] start processing
16:07:22 - cmdstanpy - INFO - Chain [1] done processing
16:07:23 - cmdstanpy - INFO - Chain [1] start processing
16:07:23 - cmdstanpy - INFO - Chain [1] done processing
16:07:24 - cmdstanpy - INFO - Chain [1] start processing
16:07:24 - cmdstanpy - INFO - Chain [1] done processing
16:07:26 - cmdstanpy - INFO - Chain [1] start processing
16:07:26 - cmdstanpy - INFO - Chain [1] done processing
16:07:27 - cmdstanpy - INFO - Chain [1] start processing
16:07:27 - cmdstanpy - INFO - Chain [1] done processing
16:07:28 - cmdstanpy - INFO - Chain [1] start processing
16:07:28 - cmdstanpy - INFO - Chain [1] done processing
16:07:30 - cmdstanpy - INFO - Chain [1] start processing
16:07:30 - cmdstanpy - INFO - Chain [1] done processing
16:07:31 - cmdstanpy - INFO - Chain [1] start processing
16:07:31 - cmdstanpy - INFO - Chain [1] done processing
16:07:32 - cmdstanpy - INFO - Chain [1] start processing
16:07:32 - cmdstanpy - INFO - Chain [1]

In [30]:
r = pd.DataFrame(results)

In [31]:
np.mean(r.MAPE)

1.6617426600055836

In [39]:
r.to_csv("results_ex.csv")

In [38]:
#pred_ex = r.loc[:,['test_date','prediction']]
#pred_ex.columns= ['date','prediction']
#pred_ex.to_csv("pred_ex.csv")

In [None]:
# Regressor assessment. Consider regularisation and hyperparameter optimsation.
#from prophet.utilities import regressor_coefficients

In [None]:
#regressor_coefficients(m)

Unnamed: 0,regressor,regressor_mode,center,coef_lower,coef,coef_upper
0,Gold,additive,1286.724533,0.891458,0.891458,0.891458
1,Oil,additive,66.810933,-2.819052,-2.819052,-2.819052
2,UST10,additive,2.89111,-148.828024,-148.828024,-148.828024
3,UST2,additive,2.445769,612.452169,612.452169,612.452169
