In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.graphics.tsaplots as sgt
import statsmodels.tsa.stattools as sts
from statsmodels.tsa.arima_model import ARIMA
from scipy.stats import chi2
import itertools

%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

## ARIMAX

***ARIMAX stands for Autoregressive Integrated Moving Average exogenous Model.***

***ARIMAX is like ARIMA which can take other variables(called exogenous variables) which are useful for predicting the current value in the series besides from the previous values and the previous residuals.***

$$\Delta X(t) = C + \beta \cdot Y + (\delta_1 \cdot \Delta X(t - 1) + \dotso + \delta_p \cdot \Delta X(t - p)) +
                                      (\theta_1 \cdot Er(t - 1) + \dotso + \theta_q \cdot Er(t - q)) + Er(t) $$
$$\text{where,}$$
$$\beta = \text{value which will be fitted based on model selection and data}$$
$$Y = \text{variable(s) which affects the predictor(also called exogenous variable(s))}$$

In [2]:
actual_data = pd.read_csv("../../datasets/stock_prices/index_2018.csv")
df = actual_data.copy()
df.head()

Unnamed: 0,date,spx,dax,ftse,nikkei
0,07/01/1994,469.9,2224.95,3445.98,18124.01
1,10/01/1994,475.27,2225.0,3440.58,18443.44
2,11/01/1994,474.13,2228.1,3413.77,18485.25
3,12/01/1994,474.17,2182.06,3372.02,18793.88
4,13/01/1994,472.47,2142.37,3360.01,18577.26


In [3]:
df["date"] = pd.to_datetime(df["date"], dayfirst=True)
df_index = df["date"]
df.drop("date", axis=1, inplace=True)
df.index = df_index
df.index.name = None

df = df.asfreq("b")

df = df.apply(lambda x: x.fillna(method="ffill"))

df_spx = pd.DataFrame(df["spx"])
df_ftse = pd.DataFrame(df["ftse"])

df_ftse["returns"] = df_ftse["ftse"].pct_change(1)

In [12]:
x_ = ARIMA(df_ftse["ftse"], exog=df[["spx", "dax"]], order=(1, 1, 1))
r_ = x_.fit()
print(r_.summary())

                             ARIMA Model Results                              
Dep. Variable:                 D.ftse   No. Observations:                 6276
Model:                 ARIMA(1, 1, 1)   Log Likelihood              -34416.179
Method:                       css-mle   S.D. of innovations             58.255
Date:                Mon, 16 Mar 2020   AIC                          68844.357
Time:                        11:25:21   BIC                          68884.824
Sample:                    01-10-1994   HQIC                         68858.378
                         - 01-29-2018                                         
                   coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------
const           -2.0857      1.840     -1.133      0.257      -5.692       1.521
spx              0.0091      0.005      1.815      0.070      -0.001       0.019
dax             -0.0015      0.001     -1.66

## SARIMAX

***SARIMAX stands for Seasonal Autoregressive Integrated Moving Average Exogenous Model.***

***This model takes into consideration the seasonal effect on the trends in the series. The absence of taking this effect into consideration in previous models resulted in poor predictions. Here, we take previous season values alongwith the values before the seasonal values if we wish to.***


$$SARIMA(p, d, q)(P, D, Q, s) = C + (\delta_1 \cdot X(t - 1) + \delta_10 \cdot X(t - 10) \delta_11 \cdot X(t - 11)
                                     + \delta_20 \cdot X(t - 20) \delta_21 \cdot X(t - 21)) + (\theta_1 \cdot Er(t - 1)
                                     + \theta_10 \cdot Er(t - 10) + \theta_11 \cdot Er(t - 11) + 
                                     \theta_12 \cdot Er(t - 12)) + Er(t)$$

In [13]:
from statsmodels.tsa.statespace.sarimax import SARIMAX

In [16]:
m = SARIMAX(df_ftse["ftse"], exog=df["spx"], order=(1, 0, 1), seasonal_order=(2, 0, 1, 5), enforce_stationarity=False)
r = m.fit()
print(r.summary())

                                 Statespace Model Results                                
Dep. Variable:                              ftse   No. Observations:                 6277
Model:             SARIMAX(1, 0, 1)x(2, 0, 1, 5)   Log Likelihood              -33260.628
Date:                           Mon, 16 Mar 2020   AIC                          66535.255
Time:                                   12:12:53   BIC                          66582.455
Sample:                               01-07-1994   HQIC                         66551.610
                                    - 01-29-2018                                         
Covariance Type:                             opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
spx            2.5786      0.036     72.428      0.000       2.509       2.648
ar.L1          0.9998      0.000   4565.998