In [10]:
import pandas as pd
import numpy as np
import tqdm
import statsmodels.api as sm
import statsmodels.tsa.api as smt

from arch import arch_model

from warnings import filterwarnings 
filterwarnings('ignore')

In [4]:
sales_of_company = pd.read_csv('monthly-sales-of-company-x-jan-6.csv')
robberies_in_boston = pd.read_csv('monthly-boston-armed-robberies-j.csv')
#airlines_passengers = pd.read_csv("international-airline-passengers.csv")
mean_monthly_temp = pd.read_csv("mean-monthly-air-temperature-deg.csv")
dowjones_closing = pd.read_csv("weekly-closings-of-the-dowjones-.csv")
female_births = pd.read_csv("daily-total-female-births-in-cal.csv")

all_series = {
    "Sales": sales_of_company["Count"],
    "Robberies": robberies_in_boston["Count"],
    "Temperature": mean_monthly_temp["Deg"],
    "Dow_Jones": dowjones_closing["Close"],
    "Births": female_births["Count"]
}

Построим ARCH и GARCH модели для разных рядов

Для GARCH моделей будем использовать "лучшие" параметры ARIMA модели

1. Ряд Sales

In [16]:
%%time

best_aic_sales = np.inf 
best_order_sales = None
best_mdl_sales = None

for i in range(10):
    for d in range(10):
        for j in range(10):
            try:
                tmp_mdl = smt.ARIMA(all_series['Sales'], order=(i,d,j)).fit(method='mle', trend='nc')
                tmp_aic = tmp_mdl.aic
                if tmp_aic < best_aic_sales:
                    best_aic_sales = tmp_aic
                    best_order_sales = (i, d, j)
                    best_mdl_sales = tmp_mdl
            except: continue


print('aic: {:6.5f} | order: {}'.format(best_aic_sales, best_order_sales))


aic: 858.17298 | order: (3, 1, 2)
Wall time: 37.5 s


In [20]:
p_sales, o_sales, q_sales = best_order_sales

am_sales_garch = arch_model(all_series['Dow_Jones'], p=p_sales, o=o_sales, q=q_sales, dist='StudentsT')
res_sales_garch = am_sales_garch.fit(update_freq=5, disp='off')

am_sales_arch = arch_model(all_series['Dow_Jones'], p=p_sales, o=o_sales, q=q_sales, dist='StudentsT', vol='ARCH')
res_sales_arch = am_sales_arch.fit(update_freq=5, disp='off')
print('GARCH model')
print(res_sales_garch.summary())
print('+++++++++++++++++++++++++')
print('ARCH model')
print(res_sales_arch.summary())

GARCH model
                      Constant Mean - GJR-GARCH Model Results                       
Dep. Variable:                        Close   R-squared:                      -0.153
Mean Model:                   Constant Mean   Adj. R-squared:                 -0.153
Vol Model:                        GJR-GARCH   Log-Likelihood:               -853.880
Distribution:      Standardized Student's t   AIC:                           1725.76
Method:                  Maximum Likelihood   BIC:                           1753.55
                                              No. Observations:                  162
Date:                      Fri, Jan 29 2021   Df Residuals:                      153
Time:                              17:27:22   Df Model:                            9
                                 Mean Model                                 
                 coef    std err          t      P>|t|      95.0% Conf. Int.
---------------------------------------------------------------------

2. Ряд Dow_Jones

In [14]:
%%time

best_aic_dj = np.inf 
best_order_dj = None
best_mdl_dj = None

for i in range(10):
    for d in range(10):
        for j in range(10):
            try:
                tmp_mdl = smt.ARIMA(all_series['Dow_Jones'], order=(i,d,j)).fit(method='mle', trend='nc')
                tmp_aic = tmp_mdl.aic
                if tmp_aic < best_aic_dj:
                    best_aic_dj = tmp_aic
                    best_order_dj = (i, d, j)
                    best_mdl_dj = tmp_mdl
            except: continue


print('aic: {:6.5f} | order: {}'.format(best_aic_dj, best_order_dj))

#print(best_mdl_dj.summary())
#tsplot(best_mdl_dj.resid, lags=30)

aic: 1416.27670 | order: (2, 2, 3)
Wall time: 20min 25s


In [15]:
p_dj, o_dj, q_dj = best_order_dj

# Using student T distribution usually provides better fit
am_dj = arch_model(all_series['Dow_Jones'], p=p_dj, o=o_dj, q=q_dj, dist='StudentsT')
res_dj = am_dj.fit(update_freq=5, disp='off')
print(res_dj.summary())

                      Constant Mean - GJR-GARCH Model Results                       
Dep. Variable:                        Close   R-squared:                      -0.000
Mean Model:                   Constant Mean   Adj. R-squared:                 -0.000
Vol Model:                        GJR-GARCH   Log-Likelihood:               -853.463
Distribution:      Standardized Student's t   AIC:                           1726.93
Method:                  Maximum Likelihood   BIC:                           1757.80
                                              No. Observations:                  162
Date:                      Fri, Jan 29 2021   Df Residuals:                      152
Time:                              17:16:45   Df Model:                           10
                                 Mean Model                                 
                 coef    std err          t      P>|t|      95.0% Conf. Int.
----------------------------------------------------------------------------
mu  

3. Ряд Births

In [12]:
%%time

best_aic_b = np.inf 
best_order_b = None
best_mdl_b = None

for i in range(7):
    for d in range(7):
        for j in range(7):
            try:
                tmp_mdl = smt.ARIMA(all_series['Births'], order=(i,d,j)).fit(method='mle', trend='nc')
                tmp_aic = tmp_mdl.aic
                if tmp_aic < best_aic_b:
                    best_aic_b = tmp_aic
                    best_order_b = (i, d, j)
                    best_mdl_b = tmp_mdl
            except: continue


print('aic: {:6.5f} | order: {}'.format(best_aic_b, best_order_b))

#print(best_mdl_b.summary())
#tsplot(best_mdl_b.resid, lags=30)

aic: 2452.86209 | order: (3, 1, 4)
Wall time: 1min 58s


In [13]:
p_, o_, q_ = best_order_b

# Using student T distribution usually provides better fit
am = arch_model(all_series['Births'], p=p_, o=o_, q=q_, dist='StudentsT')
res = am.fit(update_freq=5, disp='off')
print(res.summary())

                      Constant Mean - GJR-GARCH Model Results                       
Dep. Variable:                        Count   R-squared:                      -0.001
Mean Model:                   Constant Mean   Adj. R-squared:                 -0.001
Vol Model:                        GJR-GARCH   Log-Likelihood:               -1241.35
Distribution:      Standardized Student's t   AIC:                           2504.70
Method:                  Maximum Likelihood   BIC:                           2547.60
                                              No. Observations:                  365
Date:                      Fri, Jan 29 2021   Df Residuals:                      354
Time:                              16:34:03   Df Model:                           11
                               Mean Model                               
                 coef    std err          t      P>|t|  95.0% Conf. Int.
------------------------------------------------------------------------
mu            41