# Importing new dataset for evaluation


In [40]:
import pandas as pd


# Use read_csv() to load the CSV file into a pandas DataFrame
data = pd.read_csv('Data/new_data.csv',low_memory=False)
data.drop("Unnamed: 0", axis = 1, inplace = True)
data



Unnamed: 0,lastTradeDate,strike,price,bid,ask,change,percentChange,volume,openInterest,sigma,...,returnOnAssets,returnOnEquity,freeCashflow,operatingCashflow,revenueGrowth,grossMargins,ebitdaMargins,operatingMargins,tau,tau2
0,2024-07-05,2.5,0.50,14.84,14.92,0.0,0.0,88844.0,283.0,1.003608,...,0.01849,-0.01186,101813000.0,89253000.0,0.038,0.45766,0.14573,-0.03611,0.039683,0.038356
1,2024-06-28,5.0,0.05,14.84,14.92,0.0,0.0,88844.0,1527.0,1.003608,...,0.01849,-0.01186,101813000.0,89253000.0,0.038,0.45766,0.14573,-0.03611,0.059524,0.057534
2,2024-05-21,7.5,0.08,14.84,14.92,0.0,0.0,88844.0,3081.0,1.003608,...,0.01849,-0.01186,101813000.0,89253000.0,0.038,0.45766,0.14573,-0.03611,0.170635,0.161644
3,2024-06-27,2.5,0.10,14.84,14.92,0.0,0.0,88844.0,297.0,1.003608,...,0.01849,-0.01186,101813000.0,89253000.0,0.038,0.45766,0.14573,-0.03611,0.063492,0.060274
4,2024-06-07,5.0,2.10,14.84,14.92,0.0,0.0,88844.0,0.0,1.003608,...,0.01849,-0.01186,101813000.0,89253000.0,0.038,0.45766,0.14573,-0.03611,0.119048,0.115068
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2969,2024-01-30,12.5,3.20,14.84,14.92,0.0,0.0,88844.0,3.0,0.488141,...,0.01849,-0.01186,101813000.0,89253000.0,0.038,0.45766,0.14573,-0.03611,0.567460,0.545205
2970,2024-03-01,17.5,0.30,14.84,14.92,0.0,0.0,88844.0,1.0,0.488141,...,0.01849,-0.01186,101813000.0,89253000.0,0.038,0.45766,0.14573,-0.03611,0.476190,0.460274
2971,2024-02-29,10.0,0.05,14.84,14.92,0.0,0.0,88844.0,2.0,0.488141,...,0.01849,-0.01186,101813000.0,89253000.0,0.038,0.45766,0.14573,-0.03611,0.480159,0.463014
2972,2024-02-23,12.5,1.45,14.84,14.92,0.0,0.0,88844.0,1.0,0.488141,...,0.01849,-0.01186,101813000.0,89253000.0,0.038,0.45766,0.14573,-0.03611,0.496032,0.479452


# 1) Black and Scholes performance

In [41]:
bs_variables = [
    'strike',
    'stock',
    'tau',
    'sigma',
    'price',
    'call',
    'ticker'
]


bs_data = data[bs_variables]

In [33]:
call_data = bs_data[bs_data['call'] == 1]
put_data = bs_data[bs_data['call'] == 0]

call_data = call_data.drop(columns=['call'])
put_data = put_data.drop(columns=['call'])

In [61]:
import numpy as np
from scipy.stats import norm
from datetime import datetime

def black_scholes_call(row):
    
    S = row['stock']
    K = row['strike']
    tau = row['tau']
    r = 2.5
    sigma = row['sigma']
    
    
    d1 = (np.log(S / K) + (r + 0.5 * sigma ** 2) * tau) / (sigma * np.sqrt(tau))
    d2 = d1 - sigma * np.sqrt(tau)

    call_price = S * norm.cdf(d1) - K * np.exp(-r * tau) * norm.cdf(d2)

    return call_price

def black_scholes_put(row):
    
    S = row['stock'] 
    K = row['strike'] 
    tau = row['tau'] 
    r = 0.045
    sigma = row['sigma'] * 57
    

    d1 = (np.log(S / K) + (r + 0.5 * sigma ** 2) * tau) / (sigma * np.sqrt(tau))
    d2 = d1 - sigma * np.sqrt(tau)

    put_price = K * np.exp(-r * tau) * norm.cdf(-d2) - S *norm.cdf(-d1)

    return put_price


call_data['BS'] = call_data.apply(black_scholes_call, axis=1)
put_data['BS'] = put_data.apply(black_scholes_put, axis=1)

In [62]:
put_data

Unnamed: 0,strike,stock,tau,sigma,price,ticker,BS
3,2.5,2.81,0.063492,1.003608,0.10,PSTX,2.492867
4,5.0,2.86,0.119048,1.003608,2.10,PSTX,4.973286
25,80.0,163.67,0.071429,0.541935,0.05,INSP,79.739102
26,85.0,163.67,0.071429,0.541935,0.12,INSP,84.722926
27,90.0,148.51,0.111111,0.541935,0.10,INSP,89.551093
...,...,...,...,...,...,...,...
2965,15.0,17.08,0.039683,0.264581,0.05,AKR,12.846568
2968,20.0,17.96,0.083333,0.252560,2.68,MRTN,19.211740
2971,10.0,12.79,0.480159,0.488141,0.05,STER,9.786246
2972,12.5,12.62,0.496032,0.488141,1.45,STER,12.224073


# Evaluating performance for call options

In [63]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pandas as pd

# Calculate Mean Squared Error (MSE)
call_mse = mean_squared_error(call_data['price'], call_data['BS'])

# Calculate Root Mean Squared Error (RMSE)
call_rmse = np.sqrt(call_mse)

# Calculate Mean Absolute Error (MAE)
call_mae = mean_absolute_error(call_data['price'], call_data['BS'])

# Calculate Mean Absolute Percentage Error (MAPE)
call_mape = np.mean(np.abs((call_data['price'] - call_data['BS']) / call_data['price'])) * 100

# Calculate R-squared
call_r_squared = r2_score(call_data['price'], call_data['BS'])

print("Mean Squared Error (MSE):", call_mse)
print("Root Mean Squared Error (RMSE):", call_rmse)
print("Mean Absolute Error (MAE):", call_mae)
print("Mean Absolute Percentage Error (MAPE):", call_mape)
print("R-squared:", call_r_squared)


Mean Squared Error (MSE): 648.7657992161297
Root Mean Squared Error (RMSE): 25.47088139849365
Mean Absolute Error (MAE): 10.472769739614108
Mean Absolute Percentage Error (MAPE): 2109.139745794605
R-squared: -2.9916775168400904


# Evaluating performance for put option

In [60]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pandas as pd

# Calculate Mean Squared Error (MSE)
put_mse = mean_squared_error(put_data['price'], put_data['BS'])

# Calculate Root Mean Squared Error (RMSE)
put_rmse = np.sqrt(put_mse)

# Calculate Mean Absolute Error (MAE)
put_mae = mean_absolute_error(put_data['price'], put_data['BS'])

# Calculate Mean Absolute Percentage Error (MAPE)
put_mape = np.mean(np.abs((put_data['price'] - put_data['BS']) / put_data['price'])) * 100

# Calculate R-squared
put_r_squared = r2_score(put_data['price'], put_data['BS'])

print("Mean Squared Error (MSE):", put_mse)
print("Root Mean Squared Error (RMSE):", put_rmse)
print("Mean Absolute Error (MAE):", put_mae)
print("Mean Absolute Percentage Error (MAPE):", put_mape)
print("R-squared:", put_r_squared)


Mean Squared Error (MSE): 14036.170560434508
Root Mean Squared Error (RMSE): 118.47434557926246
Mean Absolute Error (MAE): 77.00992153491714
Mean Absolute Percentage Error (MAPE): 32100.354321672177
R-squared: -160.30323707886805


# 2) ANN1 performance

In [64]:
ann1_variables = [
    'strike',
    'stock',
    'tau',
    'sigma',
    'price',
    'call',
]


ann1_data = data[ann1_variables]

In [66]:
ann1_call_data = ann1_data[ann1_data.call == 1]
ann1_put_data = ann1_data[ann1_data.call == 0]

ann1_call_data.drop('call', axis = 1, inplace = True)
ann1_put_data.drop('call', axis = 1, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ann1_call_data.drop('call', axis = 1, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ann1_put_data.drop('call', axis = 1, inplace = True)


In [67]:
from keras.models import load_model

# Load the model
ANN1_call = load_model('models\\ann1\\ANN1_call.keras')
ANN1_put = load_model('models\\ann1\\ANN1_put.keras')






In [68]:
y_call = ann1_call_data['price']
ann1_call_data.drop('price', axis = 1, inplace = True)

y_pred = ANN1_call.predict(ann1_call_data)

ann1_call_data['price'] = y_call
ann1_call_data['ann1'] = y_pred


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ann1_call_data.drop('price', axis = 1, inplace = True)


In [74]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pandas as pd

# Calculate Mean Squared Error (MSE)
ANN1_call_mse = mean_squared_error(ann1_call_data['price'],ann1_call_data['ann1'])

# Calculate Root Mean Squared Error (RMSE)
ANN1_call_rmse = np.sqrt(ANN1_call_mse)

# Calculate Mean Absolute Error (MAE)
ANN1_call_mae = mean_absolute_error(ann1_call_data['price'], ann1_call_data['ann1'])

# Calculate Mean Absolute Percentage Error (MAPE)
ANN1_call_mape = np.mean(np.abs((ann1_call_data['price'] - ann1_call_data['ann1']) / ann1_call_data['price'])) * 100

# Calculate R-squared
ANN1_call_r_squared = r2_score(ann1_call_data['price'], ann1_call_data['ann1'])

print("Mean Squared Error (MSE):", ANN1_call_mse)
print("Root Mean Squared Error (RMSE):", ANN1_call_rmse)
print("Mean Absolute Error (MAE):", ANN1_call_mae)
print("Mean Absolute Percentage Error (MAPE):", ANN1_call_mape)
print("R-squared:", ANN1_call_r_squared)


Mean Squared Error (MSE): 24.909076463314346
Root Mean Squared Error (RMSE): 4.990899364174191
Mean Absolute Error (MAE): 2.2530457877878756
Mean Absolute Percentage Error (MAPE): 889.1526180242483
R-squared: 0.8467413038665147


In [75]:
y_put = ann1_put_data['price']
ann1_put_data.drop('price', axis = 1, inplace = True)

y_pred = ANN1_put.predict(ann1_put_data)

ann1_put_data['price'] = y_put
ann1_put_data['ann1'] = y_pred

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ann1_put_data.drop('price', axis = 1, inplace = True)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ann1_put_data['price'] = y_put
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ann1_put_data['ann1'] = y_pred


In [76]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pandas as pd

# Calculate Mean Squared Error (MSE)
ANN1_put_mse = mean_squared_error(ann1_put_data['price'], ann1_put_data['ann1'])

# Calculate Root Mean Squared Error (RMSE)
ANN1_put_rmse = np.sqrt(ANN1_put_mse)

# Calculate Mean Absolute Error (MAE)
ANN1_put_mae = mean_absolute_error(ann1_put_data['price'], ann1_put_data['ann1'])

# Calculate Mean Absolute Percentage Error (MAPE)
ANN1_put_mape = np.mean(np.abs((ann1_put_data['price'] - ann1_put_data['ann1']) / ann1_put_data['price'])) * 100

# Calculate R-squared
ANN1_put_r_squared = r2_score(ann1_put_data['price'], ann1_put_data['ann1'])

print("Mean Squared Error (MSE):", ANN1_put_mse)
print("Root Mean Squared Error (RMSE):", ANN1_put_rmse)
print("Mean Absolute Error (MAE):", ANN1_put_mae)
print("Mean Absolute Percentage Error (MAPE):", ANN1_put_mape)
print("R-squared:", ANN1_put_r_squared)

Mean Squared Error (MSE): 26.676925089792135
Root Mean Squared Error (RMSE): 5.164970966984436
Mean Absolute Error (MAE): 2.225272584696397
Mean Absolute Percentage Error (MAPE): 357.5398815606431
R-squared: 0.6934296036253969


# 3) ANN2 performance

In [2]:
import sklearn as sk
import matplotlib as plt
import pandas as pd


# Use read_csv() to load the CSV file into a pandas DataFrame
data = pd.read_csv('Data/new_data.csv',low_memory=False)

ann2_variables = [
    'strike',
    'stock',
    'tau',
    'sigma',
    'price',
    'call',
    'dividendRate',
    'dividendYield',
    'fiveYearAvgDividendYield',
]


ann2_data = data[ann2_variables]

In [4]:
ann2_call_data = ann2_data[ann2_data.call == 1]
ann2_put_data = ann2_data[ann2_data.call == 0]

ann2_call_data.drop('call', axis = 1, inplace = True)
ann2_put_data.drop('call', axis = 1, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ann2_call_data.drop('call', axis = 1, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ann2_put_data.drop('call', axis = 1, inplace = True)


In [5]:
from keras.models import load_model

# Load the model
ANN2_call = load_model('models\\ann2\\ANN2_call.keras')
ANN2_put = load_model('models\\ann2\\ANN2_put.keras')






In [6]:
y_call = ann2_call_data['price']
ann2_call_data.drop('price', axis = 1, inplace = True)

y_pred = ANN2_call.predict(ann2_call_data)

ann2_call_data['price'] = y_call
ann2_call_data['ann2'] = y_pred

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ann2_call_data.drop('price', axis = 1, inplace = True)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ann2_call_data['price'] = y_call
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ann2_call_data['ann2'] = y_pred


In [8]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pandas as pd

# Calculate Mean Squared Error (MSE)
ANN2_call_mse = mean_squared_error(ann2_call_data['price'],ann2_call_data['ann2'])

# Calculate Root Mean Squared Error (RMSE)
ANN2_call_rmse = np.sqrt(ANN2_call_mse)

# Calculate Mean Absolute Error (MAE)
ANN2_call_mae = mean_absolute_error(ann2_call_data['price'], ann2_call_data['ann2'])

# Calculate Mean Absolute Percentage Error (MAPE)
ANN2_call_mape = np.mean(np.abs((ann2_call_data['price'] - ann2_call_data['ann2']) / ann2_call_data['price'])) * 100

# Calculate R-squared
ANN2_call_r_squared = r2_score(ann2_call_data['price'], ann2_call_data['ann2'])

print("Mean Squared Error (MSE):", ANN2_call_mse)
print("Root Mean Squared Error (RMSE):", ANN2_call_rmse)
print("Mean Absolute Error (MAE):", ANN2_call_mae)
print("Mean Absolute Percentage Error (MAPE):", ANN2_call_mape)
print("R-squared:", ANN2_call_r_squared)


Mean Squared Error (MSE): 24.474761452209577
Root Mean Squared Error (RMSE): 4.947197333057332
Mean Absolute Error (MAE): 2.3249614065609983
Mean Absolute Percentage Error (MAPE): 1161.6003523262773
R-squared: 0.8494135246696886


In [9]:
y_put = ann2_put_data['price']
ann2_put_data.drop('price', axis=1, inplace=True)

y_pred = ANN2_put.predict(ann2_put_data)

ann2_put_data['price'] = y_put
ann2_put_data['ann2'] = y_pred


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ann2_put_data.drop('price', axis=1, inplace=True)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ann2_put_data['price'] = y_put
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ann2_put_data['ann2'] = y_pred


In [10]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pandas as pd

# Calculate Mean Squared Error (MSE)
ANN2_put_mse = mean_squared_error(ann2_put_data['price'], ann2_put_data['ann2'])

# Calculate Root Mean Squared Error (RMSE)
ANN2_put_rmse = np.sqrt(ANN2_put_mse)

# Calculate Mean Absolute Error (MAE)
ANN2_put_mae = mean_absolute_error(ann2_put_data['price'], ann2_put_data['ann2'])

# Calculate Mean Absolute Percentage Error (MAPE)
ANN2_put_mape = np.mean(np.abs((ann2_put_data['price'] - ann2_put_data['ann2']) / ann2_put_data['price'])) * 100

# Calculate R-squared
ANN2_put_r_squared = r2_score(ann2_put_data['price'], ann2_put_data['ann2'])

print("Mean Squared Error (MSE):", ANN2_put_mse)
print("Root Mean Squared Error (RMSE):", ANN2_put_rmse)
print("Mean Absolute Error (MAE):", ANN2_put_mae)
print("Mean Absolute Percentage Error (MAPE):", ANN2_put_mape)
print("R-squared:", ANN2_put_r_squared)


Mean Squared Error (MSE): 25.861759864395307
Root Mean Squared Error (RMSE): 5.085445886487762
Mean Absolute Error (MAE): 2.40247138571141
Mean Absolute Percentage Error (MAPE): 558.1418750288977
R-squared: 0.7027974571324835


# 4) ANN3 performance

In [72]:
import sklearn as sk
import matplotlib as plt
import pandas as pd


# Use read_csv() to load the CSV file into a pandas DataFrame
data = pd.read_csv('Data/new_data.csv',low_memory=False)

columns_to_remove = ['Unnamed: 0','lastTradeDate','address1', 'city', 'country', 'industry', 'sector','recommendationKey','expirationDate','exchange','timeZoneShortName']

data.drop(columns_to_remove, axis = 1, inplace= True)
data = pd.get_dummies(data, columns=['ticker','state'])

data = data.replace({"False": 0, "True": 1})
data = data.replace({False: 0, True: 1})
data = data.replace({"2024-03-08": 0})


In [73]:
ann3_data = data.replace({"2024-03-08": 0})

In [74]:
ann3_call_data = ann3_data[ann3_data.call == 1]
ann3_put_data = ann3_data[ann3_data.call == 0]

ann3_call_data.drop('call', axis = 1, inplace = True)
ann3_put_data.drop('call', axis = 1, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ann3_call_data.drop('call', axis = 1, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ann3_put_data.drop('call', axis = 1, inplace = True)


In [75]:
from keras.models import load_model

# Load the model
ANN3_call = load_model('models\\ann3\\ANN3_call.keras')
ANN3_put = load_model('models\\ann3\\ANN3_put.keras')

In [67]:
import numpy as np
import pandas as pd
import joblib
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model

# Load the training data (assuming you have access to it)
# This is necessary to fit the scaler properly.
# Replace `training_data` with the actual training DataFrame used earlier
training_data = pd.DataFrame(ann3_call_data)  # Replace with actual training data

# Separate features and target in the training data
y_train = training_data['price']
X_train = training_data.drop('price', axis=1)

# Fit the scaler on the training data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)

# Save the fitted scaler
joblib.dump(scaler, 'scaler_ann3_call.pkl')

# Now proceed with predictions on new data

# Load the new data (which you want to predict)
# Assuming `ann3_call_data` is your new data that needs predictions
y_call = ann3_call_data['price'].values  # Save target column for later use
ann3_call_data = ann3_call_data.drop('price', axis=1)  # Drop target column

# Convert new data to NumPy array and ensure correct data type
X_new = np.array(ann3_call_data, dtype=np.float32)

# Load the scaler and transform the new data
scaler = joblib.load('scaler_ann3_call.pkl')
X_new = scaler.transform(X_new)

# Check the shape of X_new
current_shape = X_new.shape[1]
expected_shape = 4181
if current_shape < expected_shape:
    # Add columns of zeros to match the expected shape
    zeros_to_add = expected_shape - current_shape
    X_new = np.hstack((X_new, np.zeros((X_new.shape[0], zeros_to_add), dtype=np.float32)))


# Make predictions
y_pred = ANN3_call.predict(X_new)

# Reconstruct the original DataFrame with predictions
ann3_call_data['price'] = y_call
ann3_call_data['ann3'] = y_pred






In [69]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pandas as pd

# Calculate Mean Squared Error (MSE)
ANN3_call_mse = mean_squared_error(ann3_call_data['price'], ann3_call_data['ann3'])

# Calculate Root Mean Squared Error (RMSE)
ANN3_call_rmse = np.sqrt(ANN3_call_mse)

# Calculate Mean Absolute Error (MAE)
ANN3_call_mae = mean_absolute_error(ann3_call_data['price'], ann3_call_data['ann3'])

# Calculate Mean Absolute Percentage Error (MAPE)
ANN3_call_mape = np.mean(np.abs((ann3_call_data['price'] - ann3_call_data['ann3']) / ann3_call_data['price'])) * 100

# Calculate R-squared
ANN3_call_r_squared = r2_score(ann3_call_data['price'], ann3_call_data['ann3'])

print("Mean Squared Error (MSE):", ANN3_call_mse)
print("Root Mean Squared Error (RMSE):", ANN3_call_rmse)
print("Mean Absolute Error (MAE):", ANN3_call_mae)
print("Mean Absolute Percentage Error (MAPE):", ANN3_call_mape)
print("R-squared:", ANN3_call_r_squared)


Mean Squared Error (MSE): 22.964732541297
Root Mean Squared Error (RMSE): 4.792654376765
Mean Absolute Error (MAE): 2.100045506345
Mean Absolute Percentage Error (MAPE): 480.89095466
R-squared: 0.7405480685008606


In [76]:
import numpy as np
import pandas as pd
import joblib
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model

# Load the training data (assuming you have access to it)
# This is necessary to fit the scaler properly.
# Replace `training_data` with the actual training DataFrame used earlier
training_data = pd.DataFrame(ann3_put_data)  # Replace with actual training data

# Separate features and target in the training data
y_train = training_data['price']
X_train = training_data.drop('price', axis=1)

# Fit the scaler on the training data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)

# Save the fitted scaler
joblib.dump(scaler, 'scaler_ann3_put.pkl')

# Now proceed with predictions on new data

# Load the new data (which you want to predict)
# Assuming `ann3_put_data` is your new data that needs predictions
y_put = ann3_put_data['price'].values  # Save target column for later use
ann3_put_data = ann3_put_data.drop('price', axis=1)  # Drop target column

# Convert new data to NumPy array and ensure correct data type
X_new = np.array(ann3_put_data, dtype=np.float32)

# Load the scaler and transform the new data
scaler = joblib.load('scaler_ann3_put.pkl')
X_new = scaler.transform(X_new)

# Check the shape of X_new
current_shape = X_new.shape[1]
expected_shape = 4181
if current_shape < expected_shape:
    # Add columns of zeros to match the expected shape
    zeros_to_add = expected_shape - current_shape
    X_new = np.hstack((X_new, np.zeros((X_new.shape[0], zeros_to_add), dtype=np.float32)))


# Make predictions
y_pred = ANN3_put.predict(X_new)

# Reconstruct the original DataFrame with predictions
ann3_put_data['price'] = y_put
ann3_put_data['ann3'] = y_pred



 1/44 [..............................] - ETA: 4s





In [78]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pandas as pd

# Calculate Mean Squared Error (MSE)
ANN3_put_mse = mean_squared_error(ann3_put_data['price'], ann3_put_data['ann3'])

# Calculate Root Mean Squared Error (RMSE)
ANN3_put_rmse = np.sqrt(ANN3_put_mse)

# Calculate Mean Absolute Error (MAE)
ANN3_put_mae = mean_absolute_error(ann3_put_data['price'], ann3_put_data['ann3'])

# Calculate Mean Absolute Percentage Error (MAPE)
ANN3_put_mape = np.mean(np.abs((ann3_put_data['price'] - ann3_put_data['ann3']) / ann3_put_data['price'])) * 100

# Calculate R-squared
ANN3_put_r_squared = r2_score(ann3_put_data['price'], ann3_put_data['ann3'])

print("Mean Squared Error (MSE):", ANN3_put_mse)
print("Root Mean Squared Error (RMSE):", ANN3_put_rmse)
print("Mean Absolute Error (MAE):", ANN3_put_mae)
print("Mean Absolute Percentage Error (MAPE):", ANN3_put_mape)
print("R-squared:", ANN3_put_r_squared)

Mean Squared Error (MSE): 24.48397678487
Root Mean Squared Error (RMSE): 4.948187295
Mean Absolute Error (MAE): 4.948
Mean Absolute Percentage Error (MAPE): 501.2357
R-squared: 0.72653765


# 5) RNN performance

In [157]:
import pandas as pd 

data = pd.read_csv('Data/new_data.csv')

data = data.replace({"False": 0, "True": 1})
data = data.replace({False: 0, True: 1})
data = data.replace({"2024-03-08": 0})


columns_to_remove = ['Unnamed: 0','lastTradeDate','address1', 'city', 'country', 'industry', 'sector','recommendationKey','expirationDate','exchange','timeZoneShortName']

data.drop(columns_to_remove, axis = 1, inplace= True)
data = pd.get_dummies(data, columns=['ticker','state'])


data

Unnamed: 0,strike,price,bid,ask,change,percentChange,volume,openInterest,sigma,inTheMoney,...,ticker_WLDN,ticker_WRK,ticker_WYY,ticker_XRAY,state_CA,state_MI,state_NC,state_OH,state_OR,state_TX
0,2.5,0.50,14.84,14.92,0.0,0.0,88844.0,283.0,1.003608,1.0,...,False,False,False,False,False,False,False,True,False,False
1,5.0,0.05,14.84,14.92,0.0,0.0,88844.0,1527.0,1.003608,0.0,...,False,False,False,False,False,False,False,True,False,False
2,7.5,0.08,14.84,14.92,0.0,0.0,88844.0,3081.0,1.003608,0.0,...,False,False,False,False,False,False,False,True,False,False
3,2.5,0.10,14.84,14.92,0.0,0.0,88844.0,297.0,1.003608,0.0,...,False,False,False,False,False,False,False,True,False,False
4,5.0,2.10,14.84,14.92,0.0,0.0,88844.0,0.0,1.003608,1.0,...,False,False,False,False,False,False,False,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2969,12.5,3.20,14.84,14.92,0.0,0.0,88844.0,3.0,0.488141,1,...,False,False,False,False,False,False,False,True,False,False
2970,17.5,0.30,14.84,14.92,0.0,0.0,88844.0,1.0,0.488141,0,...,False,False,False,False,False,False,False,True,False,False
2971,10.0,0.05,14.84,14.92,0.0,0.0,88844.0,2.0,0.488141,0,...,False,False,False,False,False,False,False,True,False,False
2972,12.5,1.45,14.84,14.92,0.0,0.0,88844.0,1.0,0.488141,0,...,False,False,False,False,False,False,False,True,False,False


In [158]:
rnn_data = data

rnn_call_data = rnn_data[rnn_data.call == 1]
rnn_put_data = rnn_data[rnn_data.call == 0]

rnn_call_data.drop('call', axis = 1, inplace = True)
rnn_put_data.drop('call', axis = 1, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rnn_call_data.drop('call', axis = 1, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rnn_put_data.drop('call', axis = 1, inplace = True)


In [151]:
from keras.models import load_model

# Load the model
RNN_call = load_model('models\\rnn\\RNN_call.keras')
RNN_put = load_model('models\\rnn\\RNN_put.keras')

In [159]:
import numpy as np
import pandas as pd
import joblib
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model

# Load the training data (assuming you have access to it)
# Replace `training_data` with the actual training DataFrame used earlier
training_data = pd.DataFrame(rnn_call_data)  # Replace with actual training data

# Separate features and target in the training data
y_train = training_data['price']
X_train = training_data.drop('price', axis=1)

# Fit the scaler on the training data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)

# Save the fitted scaler
joblib.dump(scaler, 'scaler_rnn_call.pkl')

# Now proceed with predictions on new data

# Load the new data (which you want to predict)
# Assuming `rnn_call_data` is your new data that needs predictions
y_call = rnn_call_data['price'].values  # Save target column for later use
rnn_call_data = rnn_call_data.drop('price', axis=1)  # Drop target column

# Convert new data to NumPy array and ensure correct data type
X_new = np.array(rnn_call_data, dtype=np.float32)

# Load the scaler and transform the new data
scaler = joblib.load('scaler_rnn_call.pkl')
X_new = scaler.transform(X_new)

# Check the shape of X_new
current_shape = X_new.shape[1]
expected_shape = 1000  # This should match the number of features used during training
if current_shape < expected_shape:
    # Add columns of zeros to match the expected shape
    zeros_to_add = expected_shape - current_shape
    X_new = np.hstack((X_new, np.zeros((X_new.shape[0], zeros_to_add), dtype=np.float32)))

# Reshape X_new to be compatible with RNN input (samples, timesteps, features)
X_new_reshaped = X_new.reshape((X_new.shape[0], 1, X_new.shape[1]))

# Make predictions
y_pred = RNN_call.predict(X_new_reshaped)

# Reconstruct the original DataFrame with predictions
rnn_call_data['price'] = y_call
rnn_call_data['rnn'] = y_pred

print(rnn_call_data.head())


 9/50 [====>.........................] - ETA: 0s



   strike    bid    ask    change  percentChange   volume  openInterest  \
0     2.5  14.84  14.92  0.000000        0.00000  88844.0         283.0   
1     5.0  14.84  14.92  0.000000        0.00000  88844.0        1527.0   
2     7.5  14.84  14.92  0.000000        0.00000  88844.0        3081.0   
5   120.0  14.84  14.92  0.000000        0.00000  88844.0           4.0   
6   125.0  14.84  14.92  4.599999       40.70796  88844.0          11.0   

      sigma inTheMoney   stock  ...  ticker_WYY  ticker_XRAY  state_CA  \
0  1.003608        1.0    2.58  ...       False        False     False   
1  1.003608        0.0    2.64  ...       False        False     False   
2  1.003608        0.0    2.81  ...       False        False     False   
5  0.541935        1.0  162.46  ...       False        False     False   
6  0.541935        1.0  133.83  ...       False        False     False   

   state_MI  state_NC  state_OH  state_OR  state_TX  price         rnn  
0     False     False      True

In [160]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pandas as pd

# Calculate Mean Squared Error (MSE)
RNN_call_mse = mean_squared_error(rnn_call_data['price'], rnn_call_data['rnn'])

# Calculate Root Mean Squared Error (RMSE)
RNN_call_rmse = np.sqrt(RNN_call_mse)

# Calculate Mean Absolute Error (MAE)
RNN_call_mae = mean_absolute_error(rnn_call_data['price'], rnn_call_data['rnn'])

# Calculate Mean Absolute Percentage Error (MAPE)
RNN_call_mape = np.mean(np.abs((rnn_call_data['price'] - rnn_call_data['rnn']) / rnn_call_data['price'])) * 100

# Calculate R-squared
RNN_call_r_squared = r2_score(rnn_call_data['price'], rnn_call_data['rnn'])

print("Mean Squared Error (MSE):", RNN_call_mse)
print("Root Mean Squared Error (RMSE):", RNN_call_rmse)
print("Mean Absolute Error (MAE):", RNN_call_mae)
print("Mean Absolute Percentage Error (MAPE):", RNN_call_mape)
print("R-squared:", RNN_call_r_squared)



ValueError: Input contains NaN.