In [1]:
!pip install pandas scikit-learn tensorflow



In [2]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

Loading the Options Data collected by the Chicago Board Options Exchange (CBOE), need to sieve through the data to find the right features for the Options Pricing Model.

In [3]:
data_2021 = pd.read_csv('daily_volume_2021.csv')
data_2022 = pd.read_csv('daily_volume_2022.csv')
data_2023 = pd.read_csv('daily_volume_2023.csv')

Now, the data needs to be merged, then the format of the time needs to be converted into a format that can be used to sort the various trades. Then, feature engineering is performed where, moving averages are created based on volume.

In [5]:
# Merging Darta
data_combined = pd.concat([data_2021, data_2022, data_2023])

# Changing the format
data_combined['Trade Date'] = pd.to_datetime(data_combined['Trade Date'])
data_combined = data_combined.sort_values('Trade Date')

# Feature Engineering
data_combined['Volume_MA_5'] = data_combined.groupby(['Options Class', 'Underlying'])['Volume'].transform(lambda x: x.rolling(window=5, min_periods=1).mean())
data_combined['Volume_MA_20'] = data_combined.groupby(['Options Class', 'Underlying'])['Volume'].transform(lambda x: x.rolling(window=20, min_periods=1).mean())
data_combined = data_combined.drop(columns=['Product Type', 'Exchange'])
data_combined.fillna(0, inplace=True)

Now that the data is easier to read for the Neural Network, we can split the data into train and test data, then it is normalized.

In [8]:
train_data = data_combined[data_combined['Trade Date'] < '2023-10-01']
test_data = data_combined[data_combined['Trade Date'] >= '2023-10-01']

X_train = train_data[['Volume', 'Volume_MA_5', 'Volume_MA_20']]
y_train = train_data['Volume']

X_test = test_data[['Volume', 'Volume_MA_5', 'Volume_MA_20']]
y_test = test_data['Volume']

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

Now, we can define the Neural Network model used for this particular program.

In [9]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1)
])

# Compile the model
model.compile(optimizer='adam', loss='mse')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


After the model is initialized, we can train it and evaluate the test data.

In [10]:
# Train the model
model.fit(X_train_scaled, y_train, validation_data=(X_test_scaled, y_test), epochs=10, batch_size=32, verbose=1)


Epoch 1/10
[1m66650/66650[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 2ms/step - loss: 1412648320.0000 - val_loss: 1873854080.0000
Epoch 2/10
[1m66650/66650[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 2ms/step - loss: 522726080.0000 - val_loss: 59508296.0000
Epoch 3/10
[1m66650/66650[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 2ms/step - loss: 30532234.0000 - val_loss: 52110504.0000
Epoch 4/10
[1m66650/66650[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m115s[0m 2ms/step - loss: 26483504.0000 - val_loss: 44515940.0000
Epoch 5/10
[1m66650/66650[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 2ms/step - loss: 21850314.0000 - val_loss: 38816188.0000
Epoch 6/10
[1m66650/66650[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 2ms/step - loss: 20984126.0000 - val_loss: 34233320.0000
Epoch 7/10
[1m66650/66650[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m117s[0m 2ms/step - loss: 17453838.0000 - val_loss: 27188216.0000
Epoch 8/10
[1m

<keras.src.callbacks.history.History at 0x795857eab910>

In [None]:
# Evaluate the model on the test data
test_loss = model.evaluate(X_test_scaled, y_test)

# Print the test loss (Mean Squared Error)
print(f'Test Loss (MSE): {test_loss}')

# Making predictions for the next year (2024) - this is hypothetical and depends on what future data you have
predictions = model.predict(X_test_scaled)

# Display some predictions
for i in range(10):
    print(f'Predicted: {predictions[i][0]:.2f}, Actual: {y_test.iloc[i]:.2f}')