### Packages

In [1]:
import numpy as np
import pandas as pd
import scipy
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
from statsmodels.tsa.arima.model import ARIMA
from arch import arch_model
import seaborn as sns
import yfinance
import warnings
warnings.filterwarnings("ignore")
sns.set()

### Loading the data

In [3]:
raw_data = yfinance.download (tickers = "^GSPC ^FTSE ^N225 ^GDAXI", start = "1994-01-07", end = "2018-01-29", 
                              interval = "1d", group_by = 'ticker', auto_adjust = True)

[*********************100%%**********************]  4 of 4 completed


In [4]:
df_comp = raw_data.copy()

In [5]:
df_comp['spx'] = df_comp['^GSPC'].Close[:]
df_comp['dax'] = df_comp['^GDAXI'].Close[:]
df_comp['ftse'] = df_comp['^FTSE'].Close[:]
df_comp['nikkei'] = df_comp['^N225'].Close[:]

In [6]:
df_comp = df_comp.iloc[1:]
del df_comp['^N225']
del df_comp['^GSPC']
del df_comp['^GDAXI']
del df_comp['^FTSE']
df_comp=df_comp.asfreq('b')
df_comp=df_comp.fillna(method='ffill')

### Creating Returns

In [7]:
df_comp['ret_spx'] = df_comp.spx.pct_change(1)*100
df_comp['ret_ftse'] = df_comp.ftse.pct_change(1)*100
df_comp['ret_dax'] = df_comp.dax.pct_change(1)*100
df_comp['ret_nikkei'] = df_comp.nikkei.pct_change(1)*100

### Splitting the Data

In [8]:
size = int(len(df_comp)*0.8)
df, df_test = df_comp.iloc[:size], df_comp.iloc[size:]

### Fitting a Model

In [10]:
from pmdarima.arima import auto_arima

In [11]:
model_auto = auto_arima(df.ret_ftse[1:])

In [16]:
model_auto

In [15]:
model_auto.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,5019.0
Model:,"SARIMAX(4, 0, 5)",Log Likelihood,-7882.776
Date:,"Thu, 15 Feb 2024",AIC,15785.552
Time:,17:04:16,BIC,15850.762
Sample:,01-11-1994,HQIC,15808.403
,- 04-05-2013,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ar.L1,0.0121,0.082,0.148,0.882,-0.148,0.172
ar.L2,-0.6542,0.077,-8.456,0.000,-0.806,-0.503
ar.L3,-0.1627,0.071,-2.290,0.022,-0.302,-0.023
ar.L4,0.2015,0.074,2.713,0.007,0.056,0.347
ma.L1,-0.0357,0.081,-0.440,0.660,-0.195,0.123
ma.L2,0.6067,0.078,7.768,0.000,0.454,0.760
ma.L3,0.0621,0.068,0.908,0.364,-0.072,0.196
ma.L4,-0.1934,0.073,-2.650,0.008,-0.336,-0.050
ma.L5,-0.1052,0.010,-11.066,0.000,-0.124,-0.087

0,1,2,3
Ljung-Box (L1) (Q):,0.0,Jarque-Bera (JB):,6354.66
Prob(Q):,0.96,Prob(JB):,0.0
Heteroskedasticity (H):,1.99,Skew:,-0.2
Prob(H) (two-sided):,0.0,Kurtosis:,8.5


### Important Arguments

In [27]:
model_auto = auto_arima(df_comp.ret_ftse[1:], X = df_comp[['ret_spx', 'ret_dax', 'ret_nikkei']][1:],
                       max_order = None, n_jobs = -1, trend = 'ct', seasonal = True,
                       information_criterion = 'oob', out_of_sample_size = int(len(df_comp)*0.2))

# exogenous -> outside factors (e.g other time series)
# m -> seasonal cycle length
# max_order -> maximum amount of variables to be used in the regression (p + q)
# max_p -> maximum AR components
# max_q -> maximum MA components
# max_d -> maximum Integrations
# maxiter -> maximum iterations we're giving the model to converge the coefficients (becomes harder as the order increases)
# return_valid_fits -> whether or not the method should validate the results 
# alpha -> level of significance, default is 5%, which we should be using most of the time
# n_jobs -> how many models to fit at a time (-1 indicates "as many as possible")
# trend -> "ct" usually
# information_criterion -> 'aic', 'aicc', 'bic', 'hqic', 'oob' 
#        (Akaike Information Criterion, Corrected Akaike Information Criterion,
#        Bayesian Information Criterion, Hannan-Quinn Information Criterion, or
#        "out of bag"--for validation scoring--respectively)
# out_of_smaple_size -> validates the model selection (pass the entire dataset, and set 20% to be the out_of_sample_size)

In [28]:
model_auto.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,6274.0
Model:,"SARIMAX(2, 0, 2)",Log Likelihood,-6358.228
Date:,"Thu, 15 Feb 2024",AIC,12736.456
Time:,18:10:07,BIC,12803.898
Sample:,0,HQIC,12759.824
,- 6274,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,-0.0093,0.008,-1.127,0.260,-0.025,0.007
drift,1.032e-06,2.49e-06,0.414,0.679,-3.85e-06,5.91e-06
x1,0.0852,0.006,13.610,0.000,0.073,0.097
x2,0.5630,0.005,103.675,0.000,0.552,0.574
x3,0.0748,0.005,16.061,0.000,0.066,0.084
ar.L1,-0.1321,0.083,-1.586,0.113,-0.295,0.031
ar.L2,0.5566,0.045,12.432,0.000,0.469,0.644
ma.L1,0.0113,0.083,0.136,0.892,-0.151,0.174
ma.L2,-0.5844,0.046,-12.807,0.000,-0.674,-0.495

0,1,2,3
Ljung-Box (L1) (Q):,2.19,Jarque-Bera (JB):,14210.53
Prob(Q):,0.14,Prob(JB):,0.0
Heteroskedasticity (H):,0.55,Skew:,0.25
Prob(H) (two-sided):,0.0,Kurtosis:,10.36
