# GARCH (Generalized Autoregressive Conditional Heteroskedasticity)

### A popular time series forecasting method used to predict future stock price volatility.


In [39]:
import pandas as pd

file_name='DLINKINDIA.csv'
# Load stock price data
data = pd.read_csv(f'../data/raw/{file_name}', parse_dates=True)
data['datetime'] = pd.to_datetime(data['datetime'])

data.set_index('datetime', inplace=True)
data = data[['open', 'high', 'low', 'close', 'volume']]


### Resample data to daily frequency using OHLC dictionary


In [40]:
ohlc_dict = {                                                                                                             
    'open': 'first',                                                                                                    
    'high': 'max',                                                                                                       
    'low': 'min',                                                                                                        
    'close': 'last',                                                                                                    
    'volume': 'sum',
}

data = data.resample('D', closed='left', label='left').apply(ohlc_dict)
# data = pd.DatetimeIndex(data, freq='D')

data.drop(data.tail(2).index, inplace=True)
data = data.asfreq('D')
data.dropna(inplace=True)


data

Unnamed: 0_level_0,open,high,low,close,volume
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-01-04,222.55,224.95,221.50,223.90,100776
2023-01-05,224.05,225.55,217.10,221.00,149954
2023-01-06,222.80,222.80,210.95,215.80,175370
2023-01-09,217.15,219.95,215.50,216.80,82558
2023-01-10,217.90,217.90,212.10,213.40,110191
...,...,...,...,...,...
2023-07-25,322.00,322.90,294.15,301.85,1090657
2023-07-26,299.95,305.45,295.35,296.95,365650
2023-07-27,298.80,310.95,297.60,307.80,703015
2023-07-28,309.45,312.80,301.60,310.35,469952


In [41]:
from arch import arch_model
import numpy as np
import plotly.graph_objects as go
import warnings

# Suppress warnings
warnings.filterwarnings("ignore")

# Define a function to fit GARCH model and make predictions
def predict_stock_high_low_garch(data, p, q, forecast_length):
    # Fit GARCH model for 'high' prices
    model_high = arch_model(data['high'], vol='Garch', p=p, q=q)
    model_high_fit = model_high.fit(disp='off')
    forecast_high = model_high_fit.forecast(horizon=forecast_length).mean.iloc[-1].values

    # Fit GARCH model for 'low' prices
    model_low = arch_model(data['low'], vol='Garch', p=p, q=q)
    model_low_fit = model_low.fit(disp='off')
    forecast_low = model_low_fit.forecast(horizon=forecast_length).mean.iloc[-1].values

    return forecast_high, forecast_low


In [42]:
from sklearn.metrics import mean_squared_error
from itertools import product

# Define a function for hyperparameter tuning using grid search
def grid_search_garch(data, p_values, q_values, forecast_length):
    best_score, best_cfg = float("inf"), None
    for p, q in product(p_values, q_values):
        try:
            forecast_high, forecast_low = predict_stock_high_low_garch(data, p, q, forecast_length)
            actual_high = data['high'][-forecast_length:]
            actual_low = data['low'][-forecast_length:]
            error = mean_squared_error(actual_high, forecast_high) + mean_squared_error(actual_low, forecast_low)
            if error < best_score:
                best_score, best_cfg = error, (p, q)
            print(f'GARCH({p},{q}) MSE={error}')
        except Exception as e:
            print(f'GARCH({p},{q}) failed with error: {e}')
            continue
    return best_cfg

# Hyperparameter ranges
p_values = range(1, 6)
q_values = range(1, 6)

# Set forecast length
forecast_length = 2

# Prepare data for prediction by dropping the last forecast_length rows
prediction_df = data.copy(deep=True)
prediction_df.drop(prediction_df.tail(forecast_length).index, inplace=True)

# Perform grid search to find the best GARCH parameters
best_pq = grid_search_garch(prediction_df, p_values, q_values, forecast_length)


f'Best GARCH order: {best_pq}'

GARCH(1,1) MSE=6760.502187282686
GARCH(1,2) MSE=6931.530030856559
GARCH(1,3) MSE=6998.846036662034
GARCH(1,4) MSE=7008.334482892213
GARCH(1,5) MSE=6986.856007778712
GARCH(2,1) MSE=6760.498124759334
GARCH(2,2) MSE=6931.490224249483
GARCH(2,3) MSE=6998.73707355317
GARCH(2,4) MSE=7008.305580089649
GARCH(2,5) MSE=6986.839592769842
GARCH(3,1) MSE=6933.388007490629
GARCH(3,2) MSE=6933.421045964489
GARCH(3,3) MSE=7000.750947886033
GARCH(3,4) MSE=7010.403843037351
GARCH(3,5) MSE=6988.789512157544
GARCH(4,1) MSE=6999.946130908151
GARCH(4,2) MSE=6999.945029468086
GARCH(4,3) MSE=7000.7297194372395
GARCH(4,4) MSE=7010.280444121145
GARCH(4,5) MSE=6988.798982402261
GARCH(5,1) MSE=7006.405191568079
GARCH(5,2) MSE=7006.438079473899
GARCH(5,3) MSE=7006.487249536525
GARCH(5,4) MSE=7010.420039196465
GARCH(5,5) MSE=6988.843295442246


'Best GARCH order: (2, 1)'

In [43]:
# Get predictions with the best GARCH parameters
forecast_high, forecast_low = predict_stock_high_low_garch(prediction_df, best_pq[0], best_pq[1], forecast_length)



In [44]:
# Prepare data for visualization
forecast_dates = data.index[-forecast_length:]

# Add forecasted values to the original DataFrame
forecast_df = pd.DataFrame({
    'forecast_high': forecast_high,
    'forecast_low': forecast_low
}, index=forecast_dates)

# Combine original data and forecasted data
data = pd.concat([data, forecast_df], axis=1)

# Remove NaNs introduced during the concatenation
data.dropna(subset=['high', 'low'], inplace=True)


In [45]:

# Create Plotly graph
fig = go.Figure()
fig.add_trace(go.Scatter(x=data.index, y=data['high'], name='Actual High Price'))
fig.add_trace(go.Scatter(x=data.index, y=data['forecast_high'], name='Forecast High Price'))
fig.add_trace(go.Scatter(x=data.index, y=data['low'], name='Actual Low Price'))
fig.add_trace(go.Scatter(x=data.index, y=data['forecast_low'], name='Forecast Low Price'))
fig.update_layout(title=f'{file_name} Stock High & Low Price Prediction using GARCH', xaxis_title='Date', yaxis_title='Price')
fig.show()


In [46]:

# Print the last few rows of the data with predictions
data[['high','low','forecast_high','forecast_low']].tail()

Unnamed: 0_level_0,high,low,forecast_high,forecast_low
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-07-25,322.9,294.15,,
2023-07-26,305.45,295.35,,
2023-07-27,310.95,297.6,,
2023-07-28,312.8,301.6,248.505868,240.010138
2023-07-31,307.65,288.1,248.505868,240.010138
