In [166]:
#import Deep Learning related libraries
import pandas as pd
import sklearn
import tensorflow
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import Normalizer
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

In [167]:
#load data
dataset = pd.read_csv("full all label and feature.csv")

In [168]:
#show dataset
dataset

Unnamed: 0.1,Unnamed: 0,Date,SNP500_High,SNP500_Low,Bitcoin_High,Bitcoin_Low,Oil_High,Oil_Low,Gold_High,Gold_Low,EURO_To_USD_Rate,Interest
0,0,11/28/2022,76.17,73.84,16481.4,16060.8,76.17,73.84,1753.85,1745.80,1.0472,1.41
1,1,11/27/2022,76.35,75.84,16595.4,16414.6,76.35,75.84,1770.60,1750.10,1.0367,1.41
2,2,11/25/2022,79.89,76.23,16686.3,16387.9,79.89,76.23,1761.20,1745.90,1.0402,1.41
3,3,11/24/2022,78.19,76.92,16611.8,16360.9,78.19,76.92,1758.95,1752.55,1.0408,1.41
4,4,11/23/2022,81.95,76.83,16785.6,16472.0,81.95,76.83,1754.90,1719.00,1.0418,1.41
...,...,...,...,...,...,...,...,...,...,...,...,...
2039,2039,01/08/2015,49.65,47.73,1260.2,1119.0,49.65,47.73,1216.40,1206.90,1.1793,0.13
2040,2040,01/07/2015,49.31,46.83,1256.9,1236.4,49.31,46.83,1217.50,1210.00,1.1839,0.13
2041,2041,01/06/2015,50.37,47.55,1255.5,1218.4,50.37,47.55,1222.00,1206.00,1.1889,0.13
2042,2042,01/05/2015,52.73,49.68,1245.6,1205.6,52.73,49.68,1205.20,1189.10,1.1934,0.13


In [169]:
#processing dataset
dataset = dataset.drop(columns=['Unnamed: 0'])

#delete the column 'Date' because its not a column we can generalize over, thus useless for predictive model.
dataset = dataset.drop(columns=['Date']) 

dataset

Unnamed: 0,SNP500_High,SNP500_Low,Bitcoin_High,Bitcoin_Low,Oil_High,Oil_Low,Gold_High,Gold_Low,EURO_To_USD_Rate,Interest
0,76.17,73.84,16481.4,16060.8,76.17,73.84,1753.85,1745.80,1.0472,1.41
1,76.35,75.84,16595.4,16414.6,76.35,75.84,1770.60,1750.10,1.0367,1.41
2,79.89,76.23,16686.3,16387.9,79.89,76.23,1761.20,1745.90,1.0402,1.41
3,78.19,76.92,16611.8,16360.9,78.19,76.92,1758.95,1752.55,1.0408,1.41
4,81.95,76.83,16785.6,16472.0,81.95,76.83,1754.90,1719.00,1.0418,1.41
...,...,...,...,...,...,...,...,...,...,...
2039,49.65,47.73,1260.2,1119.0,49.65,47.73,1216.40,1206.90,1.1793,0.13
2040,49.31,46.83,1256.9,1236.4,49.31,46.83,1217.50,1210.00,1.1839,0.13
2041,50.37,47.55,1255.5,1218.4,50.37,47.55,1222.00,1206.00,1.1889,0.13
2042,52.73,49.68,1245.6,1205.6,52.73,49.68,1205.20,1189.10,1.1934,0.13


In [170]:
#defining features and labels
labels = dataset["SNP500_High"]
features = dataset.drop(columns=["SNP500_High","SNP500_Low"])
print(labels.shape, features.shape)

(2044,) (2044, 8)


In [171]:
labels

0       76.17
1       76.35
2       79.89
3       78.19
4       81.95
        ...  
2039    49.65
2040    49.31
2041    50.37
2042    52.73
2043    55.11
Name: SNP500_High, Length: 2044, dtype: float64

In [172]:
features

Unnamed: 0,Bitcoin_High,Bitcoin_Low,Oil_High,Oil_Low,Gold_High,Gold_Low,EURO_To_USD_Rate,Interest
0,16481.4,16060.8,76.17,73.84,1753.85,1745.80,1.0472,1.41
1,16595.4,16414.6,76.35,75.84,1770.60,1750.10,1.0367,1.41
2,16686.3,16387.9,79.89,76.23,1761.20,1745.90,1.0402,1.41
3,16611.8,16360.9,78.19,76.92,1758.95,1752.55,1.0408,1.41
4,16785.6,16472.0,81.95,76.83,1754.90,1719.00,1.0418,1.41
...,...,...,...,...,...,...,...,...
2039,1260.2,1119.0,49.65,47.73,1216.40,1206.90,1.1793,0.13
2040,1256.9,1236.4,49.31,46.83,1217.50,1210.00,1.1839,0.13
2041,1255.5,1218.4,50.37,47.55,1222.00,1206.00,1.1889,0.13
2042,1245.6,1205.6,52.73,49.68,1205.20,1189.10,1.1934,0.13


In [173]:

#split features and labels for training and testing
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size=0.33, random_state=12)

#select columns with numerical features (returns a list of column name with numerical data)
numerical_features = features.select_dtypes(include = ['float64', 'int64'])
numerical_columns = numerical_features.columns

#defines a ColumnTransformer
ct = ColumnTransformer([("only numeric", StandardScaler(), numerical_columns)], remainder='passthrough')

#normalize values in features dataset
features_train_scaled = ct.fit_transform(features_train)
features_test_scaled = ct.transform(features_test)

#ct.transform and fit_transform returns numpy array, convert back to dataframe
features_train_scaled = pd.DataFrame(features_train_scaled, columns = features_train.columns)
features_test_scaled = pd.DataFrame(features_test_scaled, columns = features_test.columns)

In [174]:
#building the deep learning model

#create an instance of a Sequential model
my_model = Sequential()

#defining and adding an input layer
num_features = dataset.shape[1]
input = InputLayer(input_shape = (num_features),)
my_model.add(input)

#adding hidden layer with 64 units and relu activation function
my_model.add(Dense(64, activation = "relu"))

#adding an output layer
my_model.add(Dense(1))

#defining an Adam optimizer
my_opt = Adam(learning_rate = 0.01)

#specify loss function, metrics, and optimizer
my_model.compile(loss = 'mse', metrics = ['mae'], optimizer = my_opt)

In [176]:
#training the model
my_model.fit(features_train, labels_train, epochs = 40, batch_size = 1, verbose = 1)

Epoch 1/40


ValueError: in user code:

    File "d:\Users\ryank\anaconda3\lib\site-packages\keras\engine\training.py", line 1051, in train_function  *
        return step_function(self, iterator)
    File "d:\Users\ryank\anaconda3\lib\site-packages\keras\engine\training.py", line 1040, in step_function  **
        Epoch at which to start training
    File "d:\Users\ryank\anaconda3\lib\site-packages\keras\engine\training.py", line 1030, in run_step  **
        Numpy array with the same length as the input samples
    File "d:\Users\ryank\anaconda3\lib\site-packages\keras\engine\training.py", line 889, in train_step
        train_function = tf.function(
    File "d:\Users\ryank\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "d:\Users\ryank\anaconda3\lib\site-packages\keras\engine\input_spec.py", line 264, in assert_input_compatibility
        'incompatible with the layer: '

    ValueError: Input 0 of layer "sequential_10" is incompatible with the layer: expected shape=(None, 10), found shape=(1, 8)


In [None]:
#evaluating the model
res_mse, res_mae = my_model.evaluate(features_test_scaled, labels_test, verbose = 0)
print("mse: ",res_mse," mae: ",res_mae)