In [8]:
import pandas as pd
import numpy as np
import ta as ta
from sklearn import preprocessing
from scipy.stats import t
import tensorflow as tf
from arch import arch_model

In [15]:
df = pd.read_csv("/Users/ayush/Documents/University/Year 03/Sem 01/DATA3888/Optiver-07/Data/S27FE-10K.csv")
df['time_id'] = pd.to_datetime(df['time_id'], errors='coerce')
df = df.set_index('time_id')

In [10]:
# Modified ReturnCalculation: use existing 'log_return' column.
def ReturnCalculation(Database, lag=1):
    # Use the already-computed log_return values.
    DailyReturns = Database['log_return'].values
    Index = Database.index
    # Optionally, if you want to shift the series for a lag greater than 1
    if lag > 1:
        DailyReturns = np.append(np.repeat(np.nan, lag), DailyReturns[lag:])
    return DailyReturns, Index

def SDCalculation(DailyReturns, LagSD):
    dimension = DailyReturns.shape[0]
    dif = LagSD
    Out = np.zeros(dimension - dif)
    for i in range(dimension - dif):
        Out[i] = np.std(DailyReturns[i:i+LagSD], ddof=1)
    return np.append(np.repeat(np.nan, dif), Out)

def TrueSDCalculation(DailyReturns, LagSD):
    dimension = DailyReturns.shape[0]
    dif = LagSD
    Out = np.zeros(dimension - dif + 1)
    for i in range(dimension - dif + 1):
        Out[i] = np.std(DailyReturns[i:i+LagSD], ddof=1)
    return np.append(Out, np.repeat(np.nan, dif-1))

# Create a database that contains the returns and volatility measures.
def DatabaseGeneration(Database, Lag=1, LagSD=5):
    DailyReturns, Index = ReturnCalculation(Database, Lag)
    DailyReturnsOld = np.append(np.repeat(np.nan, 1), DailyReturns[0:(DailyReturns.shape[0]-1)])
    SD = SDCalculation(DailyReturns, LagSD)
    TrueSD = TrueSDCalculation(DailyReturns, LagSD)
    Data = pd.DataFrame({
        'DailyReturns': DailyReturns,
        'SD': SD,
        'TrueSD': TrueSD,
        'DailyReturnsOld': DailyReturnsOld
    }, index=Index)
    return Data.dropna()

# Similar function for forecasting (without dropping NaNs).
def DatabaseGenerationForecast(Database, Lag, LagSD):
    DailyReturns, Index = ReturnCalculation(Database, Lag)
    DailyReturnsOld = np.append(np.repeat(np.nan, 1), DailyReturns[0:(DailyReturns.shape[0]-1)])
    SD = SDCalculation(DailyReturns, LagSD)
    TrueSD = TrueSDCalculation(DailyReturns, LagSD)
    Data = pd.DataFrame({
        'DailyReturns': DailyReturns,
        'SD': SD,
        'TrueSD': TrueSD,
        'DailyReturnsOld': DailyReturnsOld
    }, index=Index)
    return Data

In [11]:
# ARCH-Family Model Functions

def GARCH_Model_Student(Data):
    AR_Data = Data['DailyReturns'] * 100
    GARCH11 = arch_model(AR_Data, dist='t')
    res_GARCH11 = GARCH11.fit(disp='off')
    CV_GARCH11 = res_GARCH11.conditional_volatility
    For_CV_GARCH11 = np.array(res_GARCH11.forecast(horizon=1).variance.dropna())[0][0]
    return GARCH11, res_GARCH11, CV_GARCH11, For_CV_GARCH11

def GJR_GARCH_Model_Student(Data):
    AR_Data = Data['DailyReturns'] * 100
    GJR_GARCH11 = arch_model(AR_Data, p=1, o=1, q=1, dist='t')
    res_GJR_GARCH11 = GJR_GARCH11.fit(disp='off')
    CV_GJR_GARCH11 = res_GJR_GARCH11.conditional_volatility
    For_CV_GJR_GARCH11 = np.array(res_GJR_GARCH11.forecast(horizon=1).variance.dropna())[0][0]
    return GJR_GARCH11, res_GJR_GARCH11, CV_GJR_GARCH11, For_CV_GJR_GARCH11

def TARCH_Model_Student(Data):
    AR_Data = Data['DailyReturns'] * 100
    TARCH11 = arch_model(AR_Data, p=1, o=1, q=1, power=1.0, dist='t')
    res_TARCH11 = TARCH11.fit(disp='off')
    CV_TARCH11 = res_TARCH11.conditional_volatility
    For_CV_TARCH11 = np.array(res_TARCH11.forecast(horizon=1).variance.dropna())[0][0]
    return TARCH11, res_TARCH11, CV_TARCH11, For_CV_TARCH11

def EGARCH_Model_Student(Data):
    AR_Data = Data['DailyReturns'] * 100
    EGARCH11 = arch_model(AR_Data, dist='t', vol="EGARCH")
    res_EGARCH11 = EGARCH11.fit(disp='off')
    CV_EGARCH11 = res_EGARCH11.conditional_volatility
    For_CV_EGARCH11 = np.array(res_EGARCH11.forecast(horizon=1).variance.dropna())[0][0]
    return EGARCH11, res_EGARCH11, CV_EGARCH11, For_CV_EGARCH11

def AVGARCH_Model_Student(Data):
    AR_Data = Data['DailyReturns'] * 100
    AVGARCH11 = arch_model(AR_Data, dist='t', power=1)
    res_AVGARCH11 = AVGARCH11.fit(disp='off', options={'maxiter': 1000})
    CV_AVGARCH11 = res_AVGARCH11.conditional_volatility
    For_CV_AVGARCH11 = np.array(res_AVGARCH11.forecast(horizon=1).variance.dropna())[0][0]
    return AVGARCH11, res_AVGARCH11, CV_AVGARCH11, For_CV_AVGARCH11

def FIGARCH_Model_Student(Data):
    AR_Data = Data['DailyReturns'] * 100
    FIGARCH11 = arch_model(AR_Data, dist='t', vol="FIGARCH")
    res_FIGARCH11 = FIGARCH11.fit(disp='off')
    CV_FIGARCH11 = res_FIGARCH11.conditional_volatility
    For_CV_FIGARCH11 = np.array(res_FIGARCH11.forecast(horizon=1).variance.dropna())[0][0]
    return FIGARCH11, res_FIGARCH11, CV_FIGARCH11, For_CV_FIGARCH11

def AR_Models(Data):
    GARCH, GARCH_Parameters, CV_GARCH, For_CV_GARCH = GARCH_Model_Student(Data)
    GJR_GARCH, GJR_GARCH_Parameters, CV_GJR_GARCH, For_CV_GJR_GARCH = GJR_GARCH_Model_Student(Data)
    TARCH, TARCH_Parameters, CV_TARCH, For_CV_TARCH = TARCH_Model_Student(Data)
    EGARCH, EGARCH_Parameters, CV_EGARCH, For_CV_EGARCH = EGARCH_Model_Student(Data)
    AVGARCH, AVGARCH_Parameters, CV_AVGARCH, For_CV_AVGARCH = AVGARCH_Model_Student(Data)
    FIGARCH, FIGARCH_Parameters, CV_FIGARCH, For_CV_FIGARCH = FIGARCH_Model_Student(Data)
    return (GARCH_Parameters, CV_GARCH, For_CV_GARCH, 
            GJR_GARCH_Parameters, CV_GJR_GARCH, For_CV_GJR_GARCH,
            TARCH_Parameters, CV_TARCH, For_CV_TARCH,
            EGARCH_Parameters, CV_EGARCH, For_CV_EGARCH,
            AVGARCH_Parameters, CV_AVGARCH, For_CV_AVGARCH,
            FIGARCH_Parameters, CV_FIGARCH, For_CV_FIGARCH)

In [12]:
# LSTM Model and Data Preparation for Deep Learning

def LSTM_Model(Shape1, Shape2, Dropout, LearningRate):
    Inputs = tf.keras.Input(shape=(Shape1, Shape2), name="Input")
    X = tf.keras.layers.LSTM(units=32, dropout=Dropout, return_sequences=False)(Inputs)
    X = tf.keras.layers.Dense(8, activation=tf.nn.sigmoid)(X)
    X = tf.keras.layers.Dropout(Dropout)(X)
    Output = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid, name="Output")(X)
    model = tf.keras.Model(inputs=Inputs, outputs=Output)
    Opt = tf.keras.optimizers.Adam(learning_rate=LearningRate)
    model.compile(optimizer=Opt, loss='mean_squared_error', metrics=['mean_squared_error'])
    return model

def LSTM_Database(Timestep, XData_AR, YData_AR):
    Features = XData_AR.shape[1]
    Sample = XData_AR.shape[0] - Timestep + 1
    XDataTrainScaledRNN = np.zeros([Sample, Timestep, Features])
    YDataTrainRNN = np.zeros(Sample)
    for i in range(Sample):
        XDataTrainScaledRNN[i, :, :] = XData_AR[i:(Timestep + i)]
        YDataTrainRNN[i] = YData_AR[Timestep + i - 1]
    return XDataTrainScaledRNN, YDataTrainRNN

# Build forecast database including ARCH model outputs.
def DatabaseGenerationForecast_AR(Database, Lag, LagSD, For_CV_GARCH, For_CV_GJR_GARCH, For_CV_TARCH, For_CV_EGARCH, For_CV_AVGARCH, For_CV_FIGARCH):
    Data_Forecast = DatabaseGenerationForecast(Database, Lag, LagSD).iloc[(-LagSD + 1)]
    Index_Forecast = DatabaseGenerationForecast(Database, Lag, LagSD).index[(-LagSD + 1)]
    XDataForecast = {
        'SD': Data_Forecast['SD'],
        'DailyReturnsOld': Data_Forecast['DailyReturnsOld'],
        'CV_GARCH': For_CV_GARCH / 100,
        'CV_GJR_GARCH': For_CV_GJR_GARCH / 100,
        'CV_TARCH': For_CV_TARCH / 100,
        'CV_EGARCH': For_CV_EGARCH / 100,
        'CV_AVGARCH': For_CV_AVGARCH / 100,
        'CV_FIGARCH': For_CV_FIGARCH / 100
    }
    return pd.DataFrame([XDataForecast], index=[Index_Forecast]), Data_Forecast['DailyReturns']

# LSTM-ARCH forecast function: scales, reshapes data and makes prediction.
def LSTM_ARCH_Forecast(Database, Lag, LagSD, For_CV_GARCH, For_CV_GJR_GARCH, For_CV_TARCH,
                       For_CV_EGARCH, For_CV_AVGARCH, For_CV_FIGARCH, Scaled_Norm, XData_AR, model, Timestep):
    XDataForecast, ReturnForecast = DatabaseGenerationForecast_AR(
        Database, Lag, LagSD,
        For_CV_GARCH, For_CV_GJR_GARCH, For_CV_TARCH,
        For_CV_EGARCH, For_CV_AVGARCH, For_CV_FIGARCH
    )
    XDataForecast = pd.concat([XData_AR, XDataForecast])
    XDataForecastTotalScaled = Scaled_Norm.transform(XDataForecast)
    XDataForecastTotalScaled_T, Y_T = LSTM_Database(Timestep, XDataForecastTotalScaled, 
                                                    np.zeros(XDataForecastTotalScaled.shape[0]))
    TransformerPrediction = model.predict(XDataForecastTotalScaled_T)
    return (TransformerPrediction[-1][0],
            XDataForecast.index[-1],
            TransformerPrediction[0:(XDataForecastTotalScaled_T.shape[0] - 1)],
            ReturnForecast)

In [13]:
# VaR Calculation Functions

def LSTM_ARCH_VaR(Alpha, HistoricalReturns, ForecastedSigma, DF):
    HistoricalMean = np.mean(HistoricalReturns)
    ScaleParameter = np.sqrt((ForecastedSigma**2) * ((DF - 2) / DF))
    VaR = -t.ppf(Alpha, DF, loc=HistoricalMean, scale=ScaleParameter)
    return VaR

def VaR_AR_Model(AR_Model, AR_Model_Results, Alpha):
    Cond_Var = AR_Model_Results.forecast(horizon=1).variance.dropna()
    Cond_Mean = AR_Model_Results.forecast(horizon=1).mean.dropna()
    Quantile_Dist = AR_Model.distribution.ppf([Alpha], AR_Model_Results.params[-1:])
    VaR = (-Cond_Mean - np.sqrt(Cond_Var) * Quantile_Dist) / 100
    return VaR.values

def VaR_AR_Total(Alpha, GARCH_fit, GJR_GARCH_fit, TARCH_fit, EGARCH_fit, AVGARCH_fit, FIGARCH_fit, 
                 GARCH, GJR_GARCH, TARCH, EGARCH, AVGARCH, FIGARCH):
    VaR_GARCH = VaR_AR_Model(GARCH, GARCH_fit, Alpha)
    VaR_GJR_GARCH = VaR_AR_Model(GJR_GARCH, GJR_GARCH_fit, Alpha)
    VaR_TARCH = VaR_AR_Model(TARCH, TARCH_fit, Alpha)
    VaR_EGARCH = VaR_AR_Model(EGARCH, EGARCH_fit, Alpha)
    VaR_AVGARCH = VaR_AR_Model(AVGARCH, AVGARCH_fit, Alpha)
    VaR_FIGARCH = VaR_AR_Model(FIGARCH, FIGARCH_fit, Alpha)
    return {
        'VaR_GARCH': VaR_GARCH,
        'VaR_GJR_GARCH': VaR_GJR_GARCH,
        'VaR_TARCH': VaR_TARCH,
        'VaR_EGARCH': VaR_EGARCH,
        'VaR_AVGARCH': VaR_AVGARCH,
        'VaR_FIGARCH': VaR_FIGARCH
    }


In [14]:
# Final LSTM-ARCH Fitting Function

def LSTM_ARCH_Fit(Data, Lag=1, LagSD=5, Timestep=10, Dropout=0.05, LearningRate=0.001,
                  Epochs=10000, Alpha=0.005, DF=4, BatchSize=64):
    # Generate ARCH input database using the modified returns
    Data_AR = DatabaseGeneration(Data, Lag, LagSD)
    
    # Fit ARCH models on the preprocessed data
    GARCH, GARCH_Parameters, CV_GARCH, For_CV_GARCH = GARCH_Model_Student(Data_AR)
    GJR_GARCH, GJR_GARCH_Parameters, CV_GJR_GARCH, For_CV_GJR_GARCH = GJR_GARCH_Model_Student(Data_AR)
    TARCH, TARCH_Parameters, CV_TARCH, For_CV_TARCH = TARCH_Model_Student(Data_AR)
    EGARCH, EGARCH_Parameters, CV_EGARCH, For_CV_EGARCH = EGARCH_Model_Student(Data_AR)
    AVGARCH, AVGARCH_Parameters, CV_AVGARCH, For_CV_AVGARCH = AVGARCH_Model_Student(Data_AR)
    FIGARCH, FIGARCH_Parameters, CV_FIGARCH, For_CV_FIGARCH = FIGARCH_Model_Student(Data_AR)
    
    # Append conditional volatilities from ARCH models to the database
    Data_AR = pd.concat([
        Data_AR,
        CV_GARCH.rename('CV_GARCH') / 100,
        CV_GJR_GARCH.rename('CV_GJR_GARCH') / 100,
        CV_TARCH.rename('CV_TARCH') / 100,
        CV_EGARCH.rename('CV_EGARCH') / 100,
        CV_AVGARCH.rename('CV_AVGARCH') / 100,
        CV_FIGARCH.rename('CV_FIGARCH') / 100
    ], axis=1)
    
    if Data_AR.shape[0] != DatabaseGeneration(Data, Lag, LagSD).shape[0]:
        print("Error in DB Generation")
    
    # Prepare explanatory (X) and response (Y) variables.
    # In this setting, we drop 'DailyReturns' and 'TrueSD' from features and use 'TrueSD' as Y.
    XData_AR = Data_AR.drop(Data_AR.columns[[0, 2]], axis=1)
    YData_AR = Data_AR['TrueSD']
    
    # Normalize explanatory variables.
    Scaled_Norm = preprocessing.StandardScaler().fit(XData_AR)
    XData_AR_Norm = Scaled_Norm.transform(XData_AR)
    
    # Rearrange data for LSTM input.
    XData_AR_Norm_T, YData_AR_Norm_T = LSTM_Database(Timestep, XData_AR_Norm, YData_AR)
    
    # Define and train the LSTM model.
    model = LSTM_Model(XData_AR_Norm_T.shape[1], XData_AR_Norm_T.shape[2], Dropout, LearningRate)
    model.fit(XData_AR_Norm_T, YData_AR_Norm_T, epochs=Epochs, verbose=0, batch_size=BatchSize)
    tf.keras.backend.clear_session()
    
    # Forecast the next volatility value using the hybrid model.
    Forecast, Date_Forecast, TrainPrediction, ReturnForecast = LSTM_ARCH_Forecast(
        Data, Lag, LagSD,
        For_CV_GARCH, For_CV_GJR_GARCH, For_CV_TARCH,
        For_CV_EGARCH, For_CV_AVGARCH, For_CV_FIGARCH,
        Scaled_Norm, XData_AR, model, Timestep
    )
    
    # Compute Value-at-Risk (VaR) using the forecasted volatility.
    VaR = LSTM_ARCH_VaR(Alpha, Data['log_return'].values, Forecast, DF)
    return {
        'LSTM_ARCH_model': model,
        'Forecast_LSTM_ARCH': Forecast,
        'Date_Forecast': Date_Forecast,
        'TrainPrediction': TrainPrediction,
        'Scaler': Scaled_Norm,
        'Forecast_GARCH': For_CV_GARCH,
        'Forecast_GJR_GARCH': For_CV_GJR_GARCH,
        'Forecast_TARCH': For_CV_TARCH,
        'Forecast_EGARCH': For_CV_EGARCH,
        'Forecast_AVGARCH': For_CV_AVGARCH,
        'Forecast_FIGARCH': For_CV_FIGARCH,
        'ReturnForecast': ReturnForecast,
        'GARCH_fit': GARCH_Parameters,
        'GJR_GARCH_fit': GJR_GARCH_Parameters,
        'TARCH_fit': TARCH_Parameters,
        'EGARCH_fit': EGARCH_Parameters,
        'AVGARCH_fit': AVGARCH_Parameters,
        'FIGARCH_fit': FIGARCH_Parameters,
        'GARCH': GARCH,
        'GJR_GARCH': GJR_GARCH,
        'TARCH': TARCH,
        'EGARCH': EGARCH,
        'AVGARCH': AVGARCH,
        'FIGARCH': FIGARCH,
        'YData_Train': YData_AR_Norm_T,
        'VaR': VaR
    }

In [16]:
result = LSTM_ARCH_Fit(df, Lag=1, LagSD=5, Timestep=10, Dropout=0.05,
                        LearningRate=0.001, Epochs=100, Alpha=0.005, DF=4, BatchSize=64)

print("Forecasted Volatility:", result['Forecast_LSTM_ARCH'])
print("Forecast Date:", result['Date_Forecast'])
print("Calculated VaR:", result['VaR'])

estimating the model parameters. The scale of y is 0.00263. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 10 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.00263. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 10 * y.

model or by setting rescale=False.

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.

estimating the model parameters. The scale of y is 0.00263. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 10 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.00263. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 10 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.00263. Parameter
estimation work bette

[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 550us/step
Forecasted Volatility: 0.00042326696
Forecast Date: 1970-01-01 00:00:00.000000373
Calculated VaR: 0.0013788617845196735
