In [1]:
# Initial imports
import pandas as pd
import numpy as np
from pathlib import Path
from keras.models import Sequential
from keras.layers import Dense, Activation

In [2]:
# Loading data
file_path = Path("../Datasets/1dayForward/algoData_gold_1dayForward.csv")
df = pd.read_csv(file_path)

In [None]:
# Generate the categorical outcome variable
df['outcome'] = np.nan
df.loc[(df['Signal']==1), 'outcome'] = 'Buy'
df.loc[(df['Signal']==-1), 'outcome'] = 'homeWin'

In [None]:
# Preview the output variable 
y = df['outcome']

# Save the unique number of labels for future use
number_of_classes = len(list(y.drop_duplicates()))

# Specify X (predictor) variables
X = df.drop(columns=["Result", "outcome"])

In [None]:
# Encode earnings labels to integers
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
encoder.fit(y)
encoded_y = encoder.transform(y)

In [None]:
# Convert labeled integers to a Keras `categorical` data type
from keras.utils.np_utils import to_categorical
y_categorical = to_categorical(encoded_y, num_classes=number_of_classes)

In [None]:
# Split into training and testing windows
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, random_state=42, stratify=y)

In [None]:
# Check for class balance
#pd.DataFrame(y_train).sum()

In [None]:
# Build the neural network layers

number_of_predictors = len(X.columns)
hidden_nodes_layer1 = 15
hidden_nodes_layer2 = 9

model = Sequential()
model.add(Dense(hidden_nodes_layer1, input_dim=number_of_predictors, activation='relu'))
model.add(Dense(hidden_nodes_layer2, activation='relu'))

In [None]:
# Add the final output layer
# With the # of possible outputs equal to the number classes 
model.add(Dense(number_of_classes, activation='sigmoid'))

In [None]:
# Compile the model (with multi-class specific parameters)
model.compile(loss= "binary_crossentropy", 
              optimizer= "adam", 
              metrics=['categorical_accuracy'])

In [None]:
# Summarise the structure of the model
model.summary()

In [None]:
# Fit the model
model.fit(X_train,y_train, 
                    epochs=500,
                    batch_size=750,
                    shuffle=True)

In [None]:
# Evaluate model on the test data
model.evaluate(X_test,y_test, verbose=2)

In [None]:
# Save predictions on the test data
predictions = model.predict(X_test)

In [None]:
# Get the most likely prediction for each observation
most_likely = np.argmax(predictions, axis=1)
#most_likely

results = most_likely

In [None]:
# Convert most likely category back to original labels
most_likely = encoder.inverse_transform((most_likely))
#most_likely

In [None]:
# Evaluate prediction balance
#pd.DataFrame(most_likely).value_counts()

In [None]:
from sklearn.metrics import classification_report

y_test_mod = []
count = len(y_test)

for i in range(0, count):
    if(y_test[i][0]==1):
        y_test_mod.append(0)
    else:
        y_test_mod.append(1)
        
test_results = np.array(y_test_mod)

print("Classification Report")
print(classification_report(test_results, results))