In [None]:
# General packages
import pandas as pd
import numpy as np
import hvplot.pandas
import datetime as dt

# Packages related to machine learning
#for nueral networs
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import tensorflow.keras.backend as K

# fix random seed for same reproducibility as my results due to stochastic nature of start point
K.clear_session()
tf.keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

#turn off warning signs for cleaner code
from warnings import filterwarnings
filterwarnings("ignore")

# import modules
from functions.vix_mod import vix_analysis
from functions.spy_mod import spy_analysis
from functions.econ_mod import get_econ_data
from functions.sent_mod import market_sent
from functions.create_train_test_mod import create_train_test_tables
from functions.test_scaled_mod import scale_test
from functions.train_scaled_mod import scale_train
from functions.nn_model_mod import nn_reg_model
from functions.nn_class_model_mod import nn_class_model

In [4]:
# Pull Data from APIs and divide into test and train datasets
X_train, y_train, X_test, y_test, X_prep_train, X_prep_test = create_train_test_tables()
print("data has been prepared")

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
data has been prepared


In [5]:
# Scale the training and test data and divide into three groups based on high, low, average volatility
# training
X_train_scaled, X0_train_scaled, X1_train_scaled, X2_train_scaled, y_train, y0_train, y1_train, y2_train = scale_train(X_train, X_prep_train)
# testing
X_test_scaled, X0_test_scaled, X1_test_scaled, X2_test_scaled, y_test, y0_test, y1_test, y2_test = scale_test(X_train, X_test, X_prep_test)
print("data has been scaled")

data has been scaled


In [None]:
# regression model runs on a loop until desirable testing outcomes have been created
saved_nn_model, nn_model = nn_reg_model(X_train_scaled, y_train)
print("finished training regression model")

In [None]:
# classifier model runs on a loop until desirable testing outcomes have been created
nn_class_fit, saved_nn_class_model = nn_class_model(X_train_scaled, X_prep_train)
print("finished training classification model")

In [None]:
# predict results
prediction1 = saved_nn_class_model.predict(X_test_scaled)
prediction2 = saved_nn_class_model.predict(X_test_scaled)
results_df = pd.DataFrame(prediction1, columns=['prediction_reg'], index = X_prep_test.index)
results_df['prediction_class'] = prediction2
results_df['real'] = X_prep_test['y']
results_df['type_1_correct_reg'] = np.where((results_df['real']>=0) & (results_df['prediction_reg']>=0),1,0)
results_df['type_2_correct_reg'] = np.where((results_df['real']<0) & (results_df['prediction_reg']<0),1,0)
results_df['type_1_correct_class'] = np.where((results_df['real']>=0) & (results_df['prediction_class']>=.5),1,0)
results_df['type_2_correct_class'] = np.where((results_df['real']<0) & (results_df['prediction_class']<.5),1,0)
results_df['real_returns'] = (1+results_df['real']/100).cumprod() - 1
results_df['strategy_returns'] = (1+(np.where(results_df['prediction_reg']>=0, 1,-.5)*(results_df['real']/100)+np.where(results_df['prediction_class']>=.5, 1,-.5)*(results_df['real']/100))).cumprod() - 1

# Calculate accuracy and type 1 & 2 errors for regression
accuracy_reg = round(((results_df['type_1_correct_reg'].sum()+results_df['type_2_correct_reg'].sum())/results_df['type_1_correct_reg'].count())*100,4)
type1_reg = round((results_df['type_1_correct_reg'].sum()/results_df[results_df['real']>=0]['real'].count())*100,4)
type2_reg = round((results_df['type_2_correct_reg'].sum()/results_df[results_df['real']<0]['real'].count())*100,4)
# calculate accuracy and type 1 & 2 errors for classification
accuracy_class = round(((results_df['type_1_correct_class'].sum()+results_df['type_2_correct_class'].sum())/results_df['type_1_correct_class'].count())*100,4)
type1_class = round((results_df['type_1_correct_class'].sum()/results_df[results_df['real']>=0]['real'].count())*100,4)
type2_class = round((results_df['type_2_correct_class'].sum()/results_df[results_df['real']<0]['real'].count())*100,4)

#print results
print(f'Our nural network regression model had total test accuracy of {accuracy_reg}%, with a type 1 accuracy of {type1_reg}% and a type 2 accuracy of {type2_reg}%.')
print(f'Our nural network classification model had total test accuracy of {accuracy_class}%, with a type 1 accuracy of {type1_class}% and a type 2 accuracy of {type2_class}%.')
display(results_df.tail())
display(pd.DataFrame(nn_class_fit.history).tail())
display(results_df.hvplot(y=['real_returns','strategy_returns'], title="Machine Learning Strategy Return vs Market"))
display(pd.DataFrame(nn_model.history)[['loss','val_loss']].hvplot(title = "Mean Square Error Training (blue) vs. Mean Square Error Validation (red)"))
display(pd.DataFrame(nn_class_fit.history)[['loss','val_loss']].hvplot(title = "Binary Crossentropy Training (blue) vs. Binary Crossentropy Validation (red)"))