In [1]:
! pip install PythonTsa
! pip install arch
! pip install yfinance


Collecting PythonTsa
  Downloading PythonTsa-1.4.8-py3-none-any.whl.metadata (740 bytes)
Downloading PythonTsa-1.4.8-py3-none-any.whl (440 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/440.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━[0m [32m256.0/440.7 kB[0m [31m7.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m440.7/440.7 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PythonTsa
Successfully installed PythonTsa-1.4.8
Collecting arch
  Downloading arch-7.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading arch-7.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (985 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m985.3/985.3 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: arch
Successfully installed arch-7

In [2]:
import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.tsa.stattools import kpss
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.graphics.api import qqplot
from arch import arch_model
from scipy.stats import norm, jarque_bera, skew, kurtosis
from PythonTsa.LjungBoxtest import plot_LB_pvalue
from PythonTsa.Selecting_arma2 import choose_arma2
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# ------------------------ Utility Functions ------------------------

def save_figure(fig, filename):
    fig.savefig(filename, dpi=300, bbox_inches='tight', transparent=True)
    plt.close(fig)

def acf_pacf_fig_save(series, lags=48, both=True, filename="acf_pacf.png"):
    if both:
        fig, ax = plt.subplots(2, 1, figsize=(10, 8))
        plot_acf(series, lags=lags, ax=ax[0], title="ACF")
        plot_pacf(series, lags=lags, ax=ax[1], title="PACF")
    else:
        fig, ax = plt.subplots(figsize=(10, 4))
        plot_acf(series, lags=lags, ax=ax, title="ACF")
    fig.tight_layout(rect=[0, 0.05, 1, 0.95])
    save_figure(fig, filename)

def format_float(x, decimals=3):
    return f"{x:.{decimals}f}"

# ------------------------ Load and Preprocess Data ------------------------

dax = yf.download('^GDAXI', start='2014-10-23', end='2019-07-07')
dax = dax[['Close']].dropna().rename(columns={'Close': 'index'})
dax['logreturns'] = np.log(dax['index'] / dax['index'].shift(1))
dax.dropna(inplace=True)
logret = dax['logreturns']
logret.index = dax.index.astype(str)

# ------------------------ KPSS Test ------------------------

stat, pvalue, _, crit = kpss(logret, regression='c', nlags='auto')
print(f"KPSS Statistic: {format_float(stat)}")
print(f"p-value: {format_float(pvalue)}")

# ------------------------ Histogram + KDE + Normal ------------------------

smean = logret.mean()
scal = logret.std(ddof=1)
print(f"Sample Mean: {format_float(smean, 4)}")
print(f"Sample Std Dev: {format_float(scal, 4)}")

fig, ax = plt.subplots(figsize=(10, 6))
ax.hist(logret, bins=40, density=True, label='Histogram', alpha=0.6)
kde = sm.nonparametric.KDEUnivariate(logret)
kde.fit()
ax.plot(kde.support, kde.density, label='KDE')
ax.plot(kde.support, norm.pdf(kde.support, loc=smean, scale=scal),
        label=f'Normal PDF\nμ={format_float(smean, 4)}, σ={format_float(scal, 4)}')
ax.set_title("Histogram of Log-Returns")
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.1), ncol=3)
save_figure(fig, "histogram_kde_normal.png")

# ------------------------ ACF/PACF ------------------------

acf_pacf_fig_save(logret, lags=48, both=True, filename="acf_pacf_logret.png")

# ------------------------ Ljung–Box on Returns ------------------------

plot_LB_pvalue(logret, noestimatedcoef=0, nolags=36)
save_figure(plt.gcf(), "lb_pvalue_logret.png")

# ------------------------ ARMA Order Selection ------------------------

choose_arma2(logret, max_p=5, max_q=5, ctrl=1.02)

# ------------------------ Fit ARMA(0,1) ------------------------

arma01 = ARIMA(logret, order=(0, 0, 1), trend='n').fit()
print("\nARMA(0,1) Coefficients:")
for name, val in arma01.params.items():
    ci = arma01.conf_int().loc[name]
    print(f"{name}: {format_float(val)} (95% CI: {format_float(ci[0])}, {format_float(ci[1])})")

# ------------------------ Residual Diagnostics ------------------------

plot_LB_pvalue(arma01.resid, noestimatedcoef=1, nolags=36)
save_figure(plt.gcf(), "lb_pvalue_arma_resid.png")

acf_pacf_fig_save(arma01.resid**2, lags=30, both=True, filename="acf_pacf_arma_resid_squared.png")

# ------------------------ GARCH(1,1) Model ------------------------

garchmod = arch_model(arma01.resid, mean='Zero').fit(disp='off')
print("\nGARCH(1,1) Coefficients:")
for name in garchmod.params.index:
    val = garchmod.params[name]
    ci = garchmod.conf_int().loc[name]
    print(f"{name}: {format_float(val)} (95% CI: {format_float(ci[0])}, {format_float(ci[1])})")

# ------------------------ GARCH Residual Diagnostics ------------------------

garchresid = garchmod.std_resid
acf_pacf_fig_save(garchresid**2, lags=40, both=True, filename="acf_pacf_garchresid_squared.png")

plot_LB_pvalue(garchresid**2, noestimatedcoef=0, nolags=30)
save_figure(plt.gcf(), "lb_pvalue_garch_squared.png")

fig = qqplot(garchresid, line='q', fit=True)
plt.title("QQ Plot of Standardized GARCH Residuals")
save_figure(fig, "qqplot_garchresid.png")

# ------------------------ Distributional Summary ------------------------

jb_stat, jb_p = jarque_bera(logret)[:2]
print(f"\nJarque–Bera: {format_float(jb_stat)} (p = {format_float(jb_p)})")
print(f"Skewness: {format_float(skew(logret))}")
print(f"Kurtosis: {format_float(kurtosis(logret, fisher=False))}")


YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed
look-up table. The actual p-value is greater than the p-value returned.

  stat, pvalue, _, crit = kpss(logret, regression='c', nlags='auto')


KPSS Statistic: 0.081
p-value: 0.100
Sample Mean: 0.0003
Sample Std Dev: 0.0114
AIC:  
          0        1        2        3        4        5
0      NaN -7245.03 -7243.08 -7243.76 -7248.39 -7253.86
1 -7245.03 -7242.95 -7241.03 -7242.06 -7248.68 -7251.99
2 -7243.13 -7240.95 -7245.92 -7247.82 -7249.06 -7250.05
3 -7244.16 -7242.13 -7243.05 -7241.86 -7247.97 -7248.44
4 -7248.48 -7246.50 -7248.79 -7248.04 -7247.27 -7246.23
5 -7253.36 -7251.33 -7248.37 -7248.53 -7247.09 -7245.28
AIC minimum is -7253.86
(p, q)= (array([0]), array([5]))
BIC:  
          0        1        2        3        4        5
0      NaN -7229.80 -7222.77 -7218.38 -7217.93 -7218.32
1 -7229.80 -7222.64 -7215.65 -7211.60 -7213.13 -7211.37
2 -7222.82 -7215.57 -7215.46 -7212.28 -7208.44 -7204.35
3 -7218.77 -7211.66 -7207.50 -7201.24 -7202.27 -7197.67
4 -7218.02 -7210.96 -7208.17 -7202.35 -7196.49 -7190.38
5 -7217.82 -7210.71 -7202.68 -7197.75 -7191.24 -7184.35
BIC minimum is -7229.8
(p, q)= (array([0, 1]), array([1, 0]))
H