In [4]:
!pip install arch
!pip install yfinance
!pip install pmdarima



In [0]:
import numpy as np
import pandas as pd
import scipy
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import sklearn
from statsmodels.tsa.arima_model import ARIMA
from arch import arch_model
import yfinance
import warnings; warnings.filterwarnings("ignore")

In [8]:
raw_data = yfinance.download(tickers="^GSPC ^FTSE ^N225 ^GDAXI", 
                             start="1994-01-07", end="2018-01-29", 
                             interval="1d", group_by="ticker", auto_adjust=True, 
                             treads=True)

[*********************100%***********************]  4 of 4 completed


In [0]:
df_comp = raw_data.copy()

In [0]:
df_comp["spx"] = df_comp["^GSPC"].Close[:]
df_comp["dax"] = df_comp["^GDAXI"].Close[:]
df_comp["ftse"] = df_comp["^FTSE"].Close[:]
df_comp["nikkei"] = df_comp["^N225"].Close[:]

In [0]:
df_comp = df_comp.iloc[1:]
del df_comp["^GSPC"]
del df_comp["^GDAXI"]
del df_comp["^FTSE"]
del df_comp["^N225"]

df_comp = df_comp.asfreq('b')
df_comp = df_comp.fillna(method="ffill")

In [0]:
df_comp["ret_spx"] = df_comp.spx.pct_change(1)*100
df_comp["ret_ftse"] = df_comp.ftse.pct_change(1)*100
df_comp["ret_dax"] = df_comp.dax.pct_change(1)*100
df_comp["ret_nikkei"] = df_comp.nikkei.pct_change(1)*100

In [0]:
size = int(len(df_comp)*0.8)
df, df_test = df_comp.iloc[:size], df_comp.iloc[size:]

In [0]:
from pmdarima.arima import auto_arima

In [0]:
model_auto = auto_arima(df.ret_ftse[1:])

In [16]:
model_auto

ARIMA(maxiter=50, method='lbfgs', order=(4, 0, 5), out_of_sample_size=0,
      scoring='mse', scoring_args=None, seasonal_order=(0, 0, 0, 0),
      with_intercept=True)

In [17]:
model_auto.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,5019.0
Model:,"SARIMAX(4, 0, 5)",Log Likelihood,-7882.658
Date:,"Tue, 17 Mar 2020",AIC,15787.316
Time:,22:46:01,BIC,15859.047
Sample:,0,HQIC,15812.452
,- 5019,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,0.0309,0.025,1.246,0.213,-0.018,0.080
ar.L1,0.0135,0.082,0.165,0.869,-0.147,0.174
ar.L2,-0.6690,0.077,-8.645,0.000,-0.821,-0.517
ar.L3,-0.1616,0.072,-2.260,0.024,-0.302,-0.021
ar.L4,0.1898,0.074,2.553,0.011,0.044,0.335
ma.L1,-0.0384,0.081,-0.471,0.637,-0.198,0.121
ma.L2,0.6205,0.078,7.933,0.000,0.467,0.774
ma.L3,0.0592,0.069,0.858,0.391,-0.076,0.194
ma.L4,-0.1836,0.073,-2.510,0.012,-0.327,-0.040

0,1,2,3
Ljung-Box (Q):,67.77,Jarque-Bera (JB):,6360.08
Prob(Q):,0.0,Prob(JB):,0.0
Heteroskedasticity (H):,2.0,Skew:,-0.19
Prob(H) (two-sided):,0.0,Kurtosis:,8.5


In [0]:
model_auto = auto_arima(df_comp.ret_ftse[1:], 
                        exogenous=df_comp[["ret_spx", "ret_dax", "ret_nikkei"]][1:], 
                        m=5, max_order=None, max_p=7, max_q=7, max_d=2,
                        max_P=4, max_Q=4, max_D=2,
                        maxiter=50, alpha=0.05, n_jobs=-1, trend="ct",
                        information_critereion="oob", 
                        out_of_sample_size=int(len(df_comp)*0.20))

In [19]:
model_auto.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,6275.0
Model:,"SARIMAX(0, 0, 1)x(0, 0, 3, 5)",Log Likelihood,-6354.882
Date:,"Tue, 17 Mar 2020",AIC,12729.764
Time:,23:10:12,BIC,12797.208
Sample:,0,HQIC,12753.132
,- 6275,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,-0.0062,0.015,-0.414,0.679,-0.035,0.023
drift,-6.875e-07,4.59e-06,-0.150,0.881,-9.68e-06,8.31e-06
x1,0.0854,0.006,13.667,0.000,0.073,0.098
x2,0.5634,0.005,103.668,0.000,0.553,0.574
x3,0.0739,0.005,15.909,0.000,0.065,0.083
ma.L1,-0.1196,0.008,-14.856,0.000,-0.135,-0.104
ma.S.L5,-0.0320,0.009,-3.408,0.001,-0.050,-0.014
ma.S.L10,-0.0558,0.010,-5.544,0.000,-0.076,-0.036
ma.S.L15,-0.0269,0.010,-2.752,0.006,-0.046,-0.008

0,1,2,3
Ljung-Box (Q):,78.5,Jarque-Bera (JB):,13953.68
Prob(Q):,0.0,Prob(JB):,0.0
Heteroskedasticity (H):,0.54,Skew:,0.24
Prob(H) (two-sided):,0.0,Kurtosis:,10.29
