In [None]:
import statsmodels.api as sm
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pandas_datareader.data import DataReader
from datetime import datetime
from statsmodels.tsa.api import acf, graphics, pacf
from statsmodels.tsa.ar_model import AutoReg, ar_select_order
from statsmodels.tsa.arima.model import ARIMA

sns.set_theme()

def plot_correlogram(y: np.ndarray, label: str) -> None:
    fig, axs = plt.subplots(2, figsize=(20, 12))
    fig.suptitle(label)
    axs[0].plot(y)
    sm.graphics.tsa.plot_acf(y, lags=30, ax=axs[1])
    plt.show()

In [None]:
white_noise = np.random.normal(loc=0, scale=1, size=(1000,))
plot_correlogram(white_noise, 'White Noise')

## Random Walk
A random walk is a time series model where the current observation is equal to the previous observation with a random step up or down.

$x_t = x_{t-1} + w_t = \text{B}x_t + w_t$

$x_t = (1 + \text{B} + \text{B}^2 + \text{B}^3 + ...) w_t$

In [None]:
random_walk = white_noise.cumsum()
plot_correlogram(random_walk, 'Random Walk')

In [None]:
from pandas_datareader import data as pdr
from datetime import date
import yfinance as yf
yf.pdr_override() 
import pandas as pd
# Tickers list
# We can add and delete any ticker from the list to get desired ticker live data
ticker_list=['EXPE', 'PXD', 'MCHP', 'CRM', 'NRG', 'AMZN', 'MSFT', '^GSPC']
today = date.today()
# We can get data by our choice by giving days bracket
start_date= "2017–01–01"
end_date="2019–11–30"

start_date = datetime(2007, 1, 1)
end_date = datetime(2015, 7, 17)

df_by_ticker = {}
for ticker in ticker_list:
    print(ticker)
    data = pdr.get_data_yahoo(ticker, start=start_date, end=end_date)
    dataname= ticker+'_'+str(today)
    df_by_ticker[ticker] = data
    data.to_csv('./data/'+dataname+'.csv')

In [None]:
microsoft = df_by_ticker["MSFT"]["Adj Close"]
amazon = df_by_ticker["AMZN"]["Adj Close"]
s_and_p_500 = df_by_ticker["^GSPC"]["Adj Close"]
s_and_p_500

In [None]:
microsoft_diff = np.diff(microsoft)
plot_correlogram(microsoft_diff, 'Microsoft Diff')

In [None]:
s_and_p_500_diff = np.diff(s_and_p_500)
plot_correlogram(s_and_p_500_diff, 'S&P500 Diff')

## AR(p) process
The autoregressive model is simply an extension of the random walk that includes terms further back in time. The structure of the model is linear, that is the model depends linearly on the previous terms, with coefficients for each term. This is where the "regressive" comes from in "autoregressive". It is essentially a regression model where the previous terms are the predictors.

$x_t = \alpha_1 x_{t-1} + \alpha_2 x_{t-2} + ... + \alpha_p x_{t-p} + w_t$

$x_t = \alpha_1 \text{B} x_{t} + \alpha_2 \text{B}^2 x_{t} + ... + \alpha_p \text{B}^p x_t + w_t$

$(1 - \alpha_1 \text{B} - \alpha_2 \text{B}^2 - ... - \alpha_p \text{B}^p) x_t = w_t$

In [None]:
white_noise = np.random.normal(loc=0, scale=1, size=(1000,))

alpha1 = 1
alpha2 = -0.1
ar1 = white_noise.copy()
for i, wn in enumerate(white_noise):
    if i <= 1:
        continue
    
    ar1[i] = alpha1 * ar1[i-1] + alpha2 * ar1[i-2] + wn
    
plot_correlogram(ar1, 'AR(2)')

In [None]:
orders = ar_select_order(ar1, maxlag=5)
order = max(orders.ar_lags)
print(f'Order determined to be: {order}')
mod = AutoReg(ar1, order, trend='n')
res = mod.fit()
print(res.summary())

In [None]:
from statsmodels.tsa.arima_process import ArmaProcess
alpha1 = 2/3
alpha2 = -1/3
ar = [1, -alpha1, -alpha2]
ma = None
process = ArmaProcess(ar, ma)
ar_samples = process.generate_sample(nsample=1_000, burnin=100)
plot_correlogram(ar_samples, 'AR(2) via statsmodels')

In [None]:
fig, axs = plt.subplots(1, figsize=(8, 5))
fig.suptitle('AMAZON')
amazon.plot(ax=axs)

In [None]:
# log(T) - log(T-1) = log(T / T-1) -> log returns!
amazon_diff = np.diff(np.log(amazon))
plot_correlogram(amazon_diff, 'Amazon Diff')

In [None]:
fig, axs = plt.subplots(1, figsize=(8, 5))
fig.suptitle('S&P500')
s_and_p_500.plot(ax=axs)

In [None]:
# log(T) - log(T-1) = log(T / T-1) -> log returns!
gspc_diff = np.diff(np.log(s_and_p_500))
plot_correlogram(gspc_diff, 'S&P500 Log returns')

In [None]:
order = 22
model = AutoReg(gspc_diff, order, trend='n')
res = model.fit()
print(res.summary())

## MA(q) process

A Moving Average model is similar to an Autoregressive model, except that instead of being a linear combination of past time series values, it is a linear combination of the past white noise terms.

A key difference is that the MA model will only ever see the last q shocks for any particular MA(q) model, whereas the AR(p) model will take all prior shocks into account, albeit in a decreasingly weak manner.

$x_t = w_t + \beta_1 w_{t-1} + \beta_2 w_{t-2} + ... + \beta_q w_{t-q}$

$x_t = (1 + \beta_1 \text{B} + \beta_2 \text{B}^2 + ... + \beta_q \text{B}^q) w_t$

In [None]:
white_noise = np.random.normal(loc=0, scale=1, size=(1000,))

alpha1 = 1.1
alpha2 = 1.2
alpha3 = 0.9
alpha4 = 1.1

ma4 = white_noise.copy()
for i in range(len(white_noise)):
    if i < 4:
        continue
    
    ma4[i] = white_noise[i] + alpha1 * white_noise[i-1] + alpha2 * white_noise[i-2]
    
plot_correlogram(ma4, 'MA(4)')

In [None]:
from statsmodels.tsa.arima_process import ArmaProcess
beta1 = 0.6
beta2 = 0.4
beta3 = 0.3
ar = None
ma = (1, beta1, beta2, beta3)
process = ArmaProcess(ar, ma)
ma_samples = process.generate_sample(nsample=1_000, burnin=10_000)
plot_correlogram(ma_samples, 'MA(3) via statsmodels')

In [None]:
ar, i, ma = (0, 0, 3)
order = (ar, i, ma)
model = ARIMA(ma_samples, order=order, trend='n')
result = model.fit()
print(result.summary())

In [None]:
plot_correlogram(result.resid, 'MA(3) residuals')

## ARMA(p, q) process

 - The AR model attempts to capture market participant effects such as momentum and mean-reversion in stock trading. 
 - The MA model is used to characterise "shock" information to a series such as a surprise earnings announcements. A good example of "shock" news would be the BP Deepwater Horizon oil spill.

The ARMA model attempts to capture both of these effects.

$x_t = \alpha_1 x_{t-1} + \alpha_2 x_{t-2} + ... + \alpha_p x_{t-p} + w_t + \beta_1 w_{t-1} + \beta_2 w_{t-2} + ... + \beta_q w_{t-q}$

$(1 - \alpha_1 \text{B} - \alpha_2 \text{B}^2 - ... - \alpha_p \text{B}^p) x_t = (1 + \beta_1 \text{B} + \beta_2 \text{B}^2 + ... + \beta_q \text{B}^q) w_t$

In [None]:
from statsmodels.tsa.arima_process import ArmaProcess

alpha1 = 0.5
alpha2 = -0.25

beta1 = 0.5
beta2 = -0.3

ar = (1, -alpha1, -alpha2)
ma = (1,   beta1,   beta2)
process = ArmaProcess(ar, ma)
ma_samples = process.generate_sample(nsample=1_000, burnin=10)
plot_correlogram(ma_samples, 'MA(3) via statsmodels')

## Fitting an ARMA[p, q] model using the lowest Akaike Information Criteria (AIC) 

In [None]:
ar, i, ma = (0, 0, 0)

lowest_aic = float('inf')

for ar in range(1, 4):
    for ma in range(1, 4):

        order = (ar, i, ma)
        model = ARIMA(ma_samples, order=order, trend='n')
        result = model.fit()
        print(f'ARMA[{ar}, {ma}]: {result.aic:.2f}')
        if result.aic < lowest_aic:
            lowest_aic = result.aic
            best_order = (ar, i, ma)
            
model = ARIMA(ma_samples, order=best_order, trend='n')
result = model.fit()
print(result.summary())

In [None]:
plot_correlogram(result.resid, 'Residuals given fit to ARMA[p, q]')

In [None]:
import pandas as pd
import arch
import arch.data.sp500

# Assuming your time series data is stored in a DataFrame called 'data'
# with the time series in a column called 'timeseries_column'
data = arch.data.sp500.load()
timeseries = data["Adj Close"]

# Fit an ARMA(2,2) model to the time series
order=(2, 0, 2)
arma_model = ARIMA(timeseries, order=order, trend='n')
arma_results = model.fit()

# Extract the residuals from the ARMA model
residuals = arma_results.resid

# Fit a GARCH(1,1) model to capture heteroscedasticity in the residuals
garch_model = arch.arch_model(residuals, vol='Garch', p=1, q=1)
garch_results = garch_model.fit()
print(garch_results.summary())
print('Here we have fitted a GARCH model to the residuals\n\n\n')

# Combine the ARMA and GARCH models into a single model
combined_model = arch.arch_model(y=timeseries, x=arma_results.fittedvalues, vol='Garch', p=1, q=1)

# Fit the combined model to the data
results = combined_model.fit()

# Access the model summary and parameters
print(results.summary())


In [None]:
arma_results.forecast(10)