In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense



df = pd.read_csv('combined_dataset.csv')

# Convert the index to datetime
df[df.columns[0]] = pd.to_datetime(df[df.columns[0]])
df.set_index(df.columns[0], inplace=True)

# Remove duplicate columns if any
duplicate_columns = df.columns[df.columns.duplicated()]
df = df.drop(columns=duplicate_columns)

# Split the data based on the index
split_date = pd.Timestamp('2023-10-01')
train = df.loc[df.index < split_date]
test = df.loc[df.index >= split_date]

# Define the features and target variable for training
X_train = train.drop(columns=['NASDAQ_Volatility']).values
y_train = train['NASDAQ_Volatility'].values

# Define the features and target variable for testing
X_test = test.drop(columns=['NASDAQ_Volatility']).values
y_test = test['NASDAQ_Volatility'].values

# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Reshape input to be 3D for LSTM [samples, timesteps, features]
n_timesteps = X_train.shape[1]  # This should be the number of features
X_train = X_train.reshape((X_train.shape[0], 1, n_timesteps))
X_test = X_test.reshape((X_test.shape[0], 1, n_timesteps))

# Build LSTM model
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(1, n_timesteps)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')

# Fit model
model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=1)

# Predict and evaluate
y_pred = model.predict(X_test)
r_squared = r2_score(y_test, y_pred)
print(f'R-squared on Test Set: {r_squared}')


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
R-squared on Test Set: -5.4711619840037535


In [19]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.layers import Dropout, Bidirectional

import os
import random
import numpy as np
import tensorflow as tf

def custom_set_seed(seed_value):
    os.environ['PYTHONHASHSEED'] = str(seed_value)
    random.seed(seed_value)
    np.random.seed(seed_value)
    tf.random.set_seed(seed_value)
    
custom_set_seed(42)

# Set seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Load the dataset
df = pd.read_csv('combined_dataset.csv')

# Set the index to the first column
df.set_index(df.columns[0], inplace=True)

# Remove duplicate columns if any
duplicate_columns = df.columns[df.columns.duplicated()]
df = df.drop(columns=duplicate_columns)

# Define features (X) and target (y)
X = df.drop(columns=['NASDAQ_Volatility']).values
y = df['NASDAQ_Volatility'].values

# Change the splitting of the data to use a date threshold
# Convert index to datetime
df.index = pd.to_datetime(df.index)

# Define the split date
split_date = pd.Timestamp('2023-10-01')

# Split the data into train and test sets
train = df.loc[df.index < split_date]
test = df.loc[df.index >= split_date]

X_train, y_train = train.drop(columns=['NASDAQ_Volatility']).values, train['NASDAQ_Volatility'].values
X_test, y_test = test.drop(columns=['NASDAQ_Volatility']).values, test['NASDAQ_Volatility'].values

# Normalize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Reshape data for LSTM layer
n_timesteps = 1 
X_train = X_train.reshape((X_train.shape[0], n_timesteps, X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0], n_timesteps, X_test.shape[1]))

# Build the model
model = Sequential()
model.add(Bidirectional(LSTM(150, activation='relu', return_sequences=True), input_shape=(n_timesteps, X_train.shape[2])))
model.add(LSTM(58, activation='relu', return_sequences=False))#58
model.add(Dropout(0.20))
#model.add(Dense(30, activation='relu'))
model.add(Dense(30, activation='relu'))
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=30, batch_size=50, verbose=1)#32

# Predict using the test set
y_pred = model.predict(X_test)

# Calculate R-squared
r_squared = r2_score(y_test, y_pred)
r_squared


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


-0.6632012177958557