### Packages

In [1]:
!pip install pmdarima 

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pmdarima
  Downloading pmdarima-2.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl (1.8 MB)
[K     |████████████████████████████████| 1.8 MB 5.2 MB/s 
Collecting statsmodels>=0.13.2
  Downloading statsmodels-0.13.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.9 MB)
[K     |████████████████████████████████| 9.9 MB 41.5 MB/s 
Installing collected packages: statsmodels, pmdarima
  Attempting uninstall: statsmodels
    Found existing installation: statsmodels 0.12.2
    Uninstalling statsmodels-0.12.2:
      Successfully uninstalled statsmodels-0.12.2
Successfully installed pmdarima-2.0.1 statsmodels-0.13.5


In [2]:
!pip install arch

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting arch
  Downloading arch-5.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (903 kB)
[K     |████████████████████████████████| 903 kB 5.2 MB/s 
[?25hCollecting property-cached>=1.6.4
  Downloading property_cached-1.6.4-py2.py3-none-any.whl (7.8 kB)
Installing collected packages: property-cached, arch
Successfully installed arch-5.3.1 property-cached-1.6.4


In [3]:
!pip install yfinance

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting yfinance
  Downloading yfinance-0.1.87-py2.py3-none-any.whl (29 kB)
Collecting requests>=2.26
  Downloading requests-2.28.1-py3-none-any.whl (62 kB)
[K     |████████████████████████████████| 62 kB 1.5 MB/s 
Installing collected packages: requests, yfinance
  Attempting uninstall: requests
    Found existing installation: requests 2.23.0
    Uninstalling requests-2.23.0:
      Successfully uninstalled requests-2.23.0
Successfully installed requests-2.28.1 yfinance-0.1.87


In [4]:
import numpy as np
import pandas as pd
import scipy
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
from statsmodels.tsa.arima_model import ARIMA
from arch import arch_model
import seaborn as sns
import yfinance
import warnings
warnings.filterwarnings("ignore")
sns.set()

### Loading the data

In [5]:
raw_data = yfinance.download (tickers = "^GSPC ^FTSE ^N225 ^GDAXI", start = "1994-01-07", end = "2018-01-29", 
                              interval = "1d", group_by = 'ticker', auto_adjust = True, treads = True)

[*********************100%***********************]  4 of 4 completed


In [6]:
df_comp = raw_data.copy()

In [7]:
df_comp['spx'] = df_comp['^GSPC'].Close[:]
df_comp['dax'] = df_comp['^GDAXI'].Close[:]
df_comp['ftse'] = df_comp['^FTSE'].Close[:]
df_comp['nikkei'] = df_comp['^N225'].Close[:]

In [8]:
df_comp = df_comp.iloc[1:]
del df_comp['^N225']
del df_comp['^GSPC']
del df_comp['^GDAXI']
del df_comp['^FTSE']
df_comp=df_comp.asfreq('b')
df_comp=df_comp.fillna(method='ffill')

### Creating Returns

In [9]:
df_comp['ret_spx'] = df_comp.spx.pct_change(1)*100
df_comp['ret_ftse'] = df_comp.ftse.pct_change(1)*100
df_comp['ret_dax'] = df_comp.dax.pct_change(1)*100
df_comp['ret_nikkei'] = df_comp.nikkei.pct_change(1)*100

### Splitting the Data

In [10]:
size = int(len(df_comp)*0.8)
df, df_test = df_comp.iloc[:size], df_comp.iloc[size:]

### Fitting a Model

In [11]:
from pmdarima import auto_arima

In [12]:
model_auto = auto_arima(df.ret_ftse[1:])

In [13]:
model_auto

      with_intercept=False)

In [14]:
model_auto.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,5019.0
Model:,"SARIMAX(4, 0, 5)",Log Likelihood,-7882.776
Date:,"Sat, 19 Nov 2022",AIC,15785.552
Time:,08:21:37,BIC,15850.762
Sample:,01-11-1994,HQIC,15808.403
,- 04-05-2013,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ar.L1,0.0121,0.082,0.148,0.882,-0.148,0.172
ar.L2,-0.6541,0.077,-8.456,0.000,-0.806,-0.503
ar.L3,-0.1627,0.071,-2.289,0.022,-0.302,-0.023
ar.L4,0.2015,0.074,2.714,0.007,0.056,0.347
ma.L1,-0.0358,0.081,-0.441,0.659,-0.195,0.123
ma.L2,0.6066,0.078,7.767,0.000,0.454,0.760
ma.L3,0.0621,0.068,0.907,0.364,-0.072,0.196
ma.L4,-0.1935,0.073,-2.652,0.008,-0.337,-0.050
ma.L5,-0.1052,0.010,-11.066,0.000,-0.124,-0.087

0,1,2,3
Ljung-Box (L1) (Q):,0.0,Jarque-Bera (JB):,6354.72
Prob(Q):,0.96,Prob(JB):,0.0
Heteroskedasticity (H):,1.99,Skew:,-0.2
Prob(H) (two-sided):,0.0,Kurtosis:,8.5


### Important Arguments

In [15]:

model_auto = auto_arima(df_comp.ret_ftse[1:], exogenous=df_comp[['ret_spx','ret_nikkei','ret_dax']][1:],
                        m=5 , max_order = None, max_p = 7, max_q = 7, max_d=2,
                        max_P= 4, max_Q = 4, max_D = 2,
                        max_iter=50, alpha = 0.05, n_jobs = -1, trend = 'ct',
                        information_criterion='oob', out_of_sample_size=int(len(df_comp)*0.2)) 

# exogenous -> outside factors (e.g other time series)
# m -> seasonal cycle length
# max_order -> maximum amount of variables to be used in the regression (p + q)
# max_p -> maximum AR components
# max_q -> maximum MA components
# max_d -> maximum Integrations
# maxiter -> maximum iterations we're giving the model to converge the coefficients (becomes harder as the order increases)
# return_valid_fits -> whether or not the method should validate the results 
# alpha -> level of significance, default is 5%, which we should be using most of the time
# n_jobs -> how many models to fit at a time (-1 indicates "as many as possible")
# trend -> "ct" usually
# information_criterion -> 'aic', 'aicc', 'bic', 'hqic', 'oob' 
#        (Akaike Information Criterion, Corrected Akaike Information Criterion,
#        Bayesian Information Criterion, Hannan-Quinn Information Criterion, or
#        "out of bag"--for validation scoring--respectively)
# out_of_smaple_size -> validates the model selection (pass the entire dataset, and set 20% to be the out_of_sample_size)

In [16]:
model_auto.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,6274.0
Model:,"SARIMAX(0, 0, 3)x(2, 0, [1, 2], 5)",Log Likelihood,-9581.139
Date:,"Sat, 19 Nov 2022",AIC,19182.278
Time:,09:00:21,BIC,19249.719
Sample:,0,HQIC,19205.645
,- 6274,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,0.0283,0.039,0.716,0.474,-0.049,0.106
drift,-2.361e-06,1.06e-05,-0.223,0.824,-2.31e-05,1.84e-05
ma.L1,-0.0242,0.009,-2.752,0.006,-0.041,-0.007
ma.L2,-0.0503,0.008,-6.351,0.000,-0.066,-0.035
ma.L3,-0.0840,0.008,-10.746,0.000,-0.099,-0.069
ar.S.L5,-0.0949,0.724,-0.131,0.896,-1.514,1.324
ar.S.L10,-0.1821,0.203,-0.899,0.369,-0.579,0.215
ma.S.L5,0.0420,0.724,0.058,0.954,-1.377,1.461
ma.S.L10,0.1657,0.231,0.717,0.473,-0.287,0.619

0,1,2,3
Ljung-Box (L1) (Q):,0.14,Jarque-Bera (JB):,9004.63
Prob(Q):,0.71,Prob(JB):,0.0
Heteroskedasticity (H):,0.86,Skew:,-0.23
Prob(H) (two-sided):,0.0,Kurtosis:,8.85
