In [2]:
#import Deep Learning related libraries
import pandas as pd
import sklearn
import tensorflow
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import Normalizer
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

ModuleNotFoundError: No module named 'gast'

In [None]:
#load data
filename = "SNP500Dataset.csv"
dataset = pd.read_csv(filename)

In [None]:
#show dataset
dataset

Unnamed: 0.1,Unnamed: 0,Date,SNP500_High,SNP500_Low,Bitcoin_High,Bitcoin_Low,Oil_High,Oil_Low,Gold_High,Gold_Low,EURO_To_USD_Rate,Interest
0,0,11/28/2022,76.17,73.84,16481.4,16060.8,76.17,73.84,1753.85,1745.80,1.0472,1.41
1,1,11/27/2022,76.35,75.84,16595.4,16414.6,76.35,75.84,1770.60,1750.10,1.0367,1.41
2,2,11/25/2022,79.89,76.23,16686.3,16387.9,79.89,76.23,1761.20,1745.90,1.0402,1.41
3,3,11/24/2022,78.19,76.92,16611.8,16360.9,78.19,76.92,1758.95,1752.55,1.0408,1.41
4,4,11/23/2022,81.95,76.83,16785.6,16472.0,81.95,76.83,1754.90,1719.00,1.0418,1.41
...,...,...,...,...,...,...,...,...,...,...,...,...
2039,2039,01/08/2015,49.65,47.73,1260.2,1119.0,49.65,47.73,1216.40,1206.90,1.1793,0.13
2040,2040,01/07/2015,49.31,46.83,1256.9,1236.4,49.31,46.83,1217.50,1210.00,1.1839,0.13
2041,2041,01/06/2015,50.37,47.55,1255.5,1218.4,50.37,47.55,1222.00,1206.00,1.1889,0.13
2042,2042,01/05/2015,52.73,49.68,1245.6,1205.6,52.73,49.68,1205.20,1189.10,1.1934,0.13


In [None]:
#processing dataset

#delete the columns that we can't generalize over, thus useless for predictive model.
dataset = dataset.drop(columns=['Unnamed: 0'])
dataset = dataset.drop(columns=['Date']) 
dataset = dataset.drop(columns=['SNP500_Low'])

dataset

#defining features and labels
labels = dataset.iloc[:,-1]
features = dataset.iloc[:,0:-1]
print(labels.shape, features.shape)

#apply one-hot-encoding to categorical columns
features = pd.get_dummies(features)

#split features and labels for training and testing
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size=0.33, random_state=12)

#select columns with numerical features (returns a list of column name with numerical data)
numerical_features = features.select_dtypes(include = ['float64', 'int64'])
numerical_columns = numerical_features.columns

#defines a ColumnTransformer
ct = ColumnTransformer([("only numeric", StandardScaler(), numerical_columns)], remainder='passthrough')

#normalize values in features dataset
features_train_scaled = ct.fit_transform(features_train)
features_test_scaled = ct.transform(features_test)

#ct.transform and fit_transform returns numpy array, convert back to dataframe
features_train_scaled = pd.DataFrame(features_train_scaled, columns = features_train.columns)
features_test_scaled = pd.DataFrame(features_test_scaled, columns = features_test.columns)

(2044,) (2044, 8)


In [None]:
#building the deep learning model

#create an instance of a Sequential model
my_model = Sequential()

#defining and adding an input layer
num_features = features_train_scaled.shape[1]
input = InputLayer(input_shape = (num_features),)
my_model.add(input)

#adding hidden layer with 64 units and relu activation function
my_model.add(Dense(64, activation = "relu"))

#adding an output layer
my_model.add(Dense(1))

#defining an Adam optimizer
my_opt = Adam(learning_rate = 0.01)

#specify loss function, metrics, and optimizer
my_model.compile(loss = 'mse', metrics = ['mae'], optimizer = my_opt)

In [None]:
features_train_scaled

Unnamed: 0,SNP500_High,Bitcoin_High,Bitcoin_Low,Oil_High,Oil_Low,Gold_High,Gold_Low,EURO_To_USD_Rate
0,1.869272,0.202120,0.238668,1.869272,1.845602,1.238635,1.263125,-1.902974
1,-1.333164,-0.383858,-0.421963,-1.333164,-1.368959,-1.307390,-1.334102,-0.646974
2,-0.536654,-0.662702,-0.659082,-0.536654,-0.494834,-0.623351,-0.680798,-0.080107
3,1.058482,1.111483,1.139609,1.058482,1.100473,1.102045,1.109560,0.575680
4,-1.346916,0.942454,0.929856,-1.346916,-1.444071,1.021184,1.044490,-0.713664
...,...,...,...,...,...,...,...,...
1364,0.132393,-0.593607,-0.600734,0.132393,0.222537,-0.523549,-0.491537,1.262960
1365,-0.096617,-0.756469,-0.747754,-0.096617,-0.004975,-0.728251,-0.698646,1.027692
1366,-0.702726,-0.889028,-0.884928,-0.702726,-0.652133,-0.669973,-0.682286,-0.167175
1367,-0.686330,-0.404151,-0.378003,-0.686330,-0.626007,-0.695470,-0.681914,0.042158


In [None]:
#training the model
my_model.fit(features_train_scaled, labels_train, epochs = 40, batch_size = 1, verbose = 1)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.callbacks.History at 0x7f4a5f4197d0>

In [None]:
#evaluating the model
res_mse, res_mae = my_model.evaluate(features_test_scaled, labels_test, verbose = 0)
print("mse: ",res_mse," mae: ",res_mae)

mse:  0.11001990735530853  mae:  0.22972986102104187
