# Keras Seqeuntial Artifical Neural Network (Output: ReLU/Sigmoid Activation) for Market Trading - updated 12/03/2022 ver 2.0

## <i> i.) Import Libraries and Dependencies </i>

In [None]:
# Import libraries and dependencies
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from pathlib import Path
import os
import re

In [None]:
# Import Keras ANN Modeling Libraries & Sklearn Preprocessing Modules
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Dropout
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset

# Import Keras ANN Dropout & Regularizer Build Processing Modules
# Import dropout
from keras.layers import Dropout
# Import regularizers
from keras.regularizers import l1, l2

## <i> ii.) Prepare the DataFrames </i>

In [None]:
# Get filename from dir and store as 'algoData_asset' for each individual asset file to use later in labelling/filenaming
filename = Path("../Datasets/algoData_S&P500.csv")

display(os.path.basename(filename))
display(os.path.dirname(filename))
display(os.path.splitext(filename))
display(os.path.splitext(os.path.basename(filename)))
algoData_asset = os.path.splitext(os.path.basename(filename))
algoData_asset = algoData_asset[0] 
algoData_asset = algoData_asset.split('_')
algoData_asset = algoData_asset[1]
print(algoData_asset)

#print(os.path.splitext(filename))

In [None]:
# Confirm algoData_asset is type(str)
type(algoData_asset)

In [None]:
#Import .csv file as dataframe and shift 'ActualReturns' column forward .shift() 
#to re-align to neutral (as the 'Signal' column and 'ActualReturns' are both shifted backed (.shift(-1)) in the raw .csv file

# Import filename & conver to dataframe
df = pd.read_csv(filename, 
                index_col='Date', 
                parse_dates=True,
                infer_datetime_format=True)
display(df.head(3))
display(df.tail(3))

# Create empty dataframe for 'ActualReturns' column and shift(+1) to realign to neutral timeseries index position
actual_returns_shift_df = pd.DataFrame()
actual_returns_shift_df['Norm Actual Returns'] = df['ActualReturns'].shift()

In [None]:
actual_returns_shift_df = actual_returns_shift_df.dropna()

In [None]:
display(actual_returns_shift_df)

In [None]:
# Create list for non-normalized columns to be dropped out if required
%pprint
col_names = list(df.columns.values)
display(col_names)
display(f"Number of unique columns: {len(col_names)}")

## <i> ii.) Split Data into Training & Test Sets </i>

In [None]:
# Create X features dataframe
# Drop non-normalized & pre-Zscore calculated column values
# Drop all Bear & Bull signals for Elder Ray as they do not apply to forex strategy only
# I.e. 'lowerBB_normal', 'middleBB_normal', 'upperBB_normal', '2stdBB_normal'

must_drop_feat = ['Ticker', 'Date.1']

OHLCV = ['Open', 'High', 'Low', 'Close', 'Volume'] 

standard_feat = ['EMAShort', 'EMALong', 'RSIline', 'MOMline', 'ROCline', 'SMAline', 'lowerBB', \
                 'middleBB', 'upperBB', '2stdBB', 'MACDline', 'MACDHistogram', 'MACDSignal']

normal_feat = ['EMAShort_normal', 'EMALong_normal', 'RSIline_normal', 'MOMline_normal', 'ROCline_normal', \
               'SMAline_normal', 'lowerBB_normal', 'middleBB_normal', 'upperBB_normal', \
               '2stdBB_normal', 'MACDline_normal', 'MACDHistogram_normal', 'MACDSignal_normal']

zscore_feat = ['EMAShort_zscore', 'EMALong_zscore', 'RSIline_zscore', 'MOMline_zscore', 'ROCline_zscore', \
               'SMAline_zscore', 'lowerBB_zscore', 'middleBB_zscore', 'upperBB_zscore', \
               '2stdBB_zscore', 'MACDline_zscore', 'MACDHistogram_zscore', 'MACDSignal_zscore']



X = df.copy()
X = X.drop(columns = zscore_feat)
X = X.drop(columns = standard_feat)
X = X.drop(columns = OHLCV)
X = X.drop(columns = must_drop_feat)
#X = X.drop(columns = normal_feat)
X['ActualReturns'] = actual_returns_shift_df['Norm Actual Returns']
X = X.dropna()
y = X[['Signal']]
X = X.drop(columns = ['Signal'])

display(f"Number of included columns in final X-features: {len(X.columns)}")
display(X)
display(y)

In [None]:
# Splitting Datasets Using 145 Week Groupings
# Select trading begin
training_begin = X.index.min()
print(f"Training begin: {training_begin}.")

# Select trading end
# Note: months = 145 for all assets except ETH-USD (months= 14)
# Note: months = 145 for all assets except BTC-USD (months= 70)
training_end = X.index.min() + DateOffset(months = 145)
print(f"Training end: {training_end}.")

X_train = X.loc[training_begin:training_end] 
y_train = y.loc[training_begin:training_end] 

X_test = X.loc[training_end:]
y_test = y.loc[training_end:]

# Create Dataframe just to hold 'ActualReturns' values for end of calculations
X_ActualReturns = actual_returns_shift_df.loc[training_end:]
                
#Also save a second X_test_aligned_index for final dataframe
X_test_aligned_index = X.loc[training_end:]

In [None]:
# Test output y_train
y_train

In [None]:
# Test output y_test
y_test

In [None]:
# Function to convert all sell signals (-1) to be compatible with the output sigmoid activation function

def convert_neg(y_df):
    for i in (range(len(y_df))):
        if (y_df['Signal'][i] == -1):
            y_df['Signal'][i] = 0
        
    return y_df

In [None]:
y_train.copy()
y_train = convert_neg(y_train)

In [None]:
y_test.copy()
y_test = convert_neg(y_test)

In [None]:
# Re-test output y_train after -1->0 conversion for sigmoid output compatiblity 
y_train

In [None]:
# Re-test output y_test after -1->0 conversion for sigmoid output compatiblity 
y_test

In [None]:
# Training & Test Set Dimensions

display(f"X Dataframe shape: {X.shape}.")
display(f"y Dataframe shape: {y.shape}.")
display(f"X_train Dataframe shape: {X_train.shape}.")
display(f"y_train shape: {y_train.shape}.")
display(f"X_test Dataframe shape: {X_test.shape}.")
display(f"y_test shape: {y_test.shape}.")

In [None]:
# Convert y_train/y_test dataframe ['Signal'] columns values to array for Keras Sequential fitting 

y_train = y_train['Signal'].values
display(y_train[0:3])
display(y_train[-1])
y_test = y_test['Signal'].values
display(y_test[0:3])
display(y_test[-1])

In [None]:
# Finalize the y_train & y_test data into a trainable array
# Reshape into vertical vectors for use in Sequential Model algorithm

#y_train = y_train['Signal'].values.astype("int32")
#y_train = y_train.reshape(-1,1).astype("int32")
y_train = y_train.reshape(-1,1).astype("int32")

#y_test = y_test['Signal'].values.astype("int32")
y_test = y_test.reshape(-1,1).astype("int32")


display((f"Number of final X_test features to run into ANN: {len(X_test.columns)}."))
display(y_train[0:10])
display(y_test[0:10])

In [None]:
# Create the StandardScaler instance
X_scaler = StandardScaler()
#X_train_scaled = pd.DataFrame(X_scaler.fit_transform(X_train),columns =X_train.columns)
#X_test_scaled = pd.DataFrame(X_scaler.fit_transform(X_test),columns =X_test.columns)

# Fit the scaler
X_scaler.fit(X_train)

# Scale the data 
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Display X_train_scaled & X_test_scaled shapes
display(f"X_train_scaled shape: {X_train_scaled.shape}")
display(f"X_train_scaled shape: {X_test_scaled.shape}")

## <i> iii.) Constructing the ANN Model Using Sequential Model </i>

### <i> Defining the Input Parameters to the ANN: </i>

In [None]:
# Define the the number of inputs (features) to the model
number_input_features = len(X_train.iloc[0])

# Review the number of features
display(f"Number of input features (N_i): {number_input_features}.")

# Define the number of neurons in the output layer
number_output_neurons = 1
display(f"Number of output features (N_o): {number_output_neurons}.")

# Calculating optimal hidden layer neurons to use per layer (first layer primary)
# (Calculation code frame added by me 11/12/2022)

# N_h: Number of hidden layer neurons
N_h = 0

# N_i: Number of input neurons
N_i = number_input_features
display(f"N_i: {N_i}")

# N_o: Number of output neurons
N_o = number_output_neurons
display(f"N_o: {N_o}")

# N_s: Number of samples in training data set
N_s = len(X_train)
display(f"N_s: {N_s}")

# alpha_: Arbitrary scaler between (2-10)
alpha_ = 6
display(f"alpha_: {alpha_}")

# Calculate the new value of N_h:
print()
N_h = int(round((N_s / (alpha_ * (N_i + N_o))),0))
display(f"Final ideal value of N_h (ideal number of hidden nodes in [minimal] first layer) should be: {N_h}.")

In [None]:
# Define the number of hidden nodes for the first hidden layer
#hidden_nodes_layer1 = N_h

#hidden_nodes_layer1 = N_h
hidden_nodes_layer1 = 8
hidden_nodes_layer2 = 8
hidden_nodes_layer3 = 8

# Review the number of hidden nodes in the first layer
display(f"Number of hidden node layers (layer 1): {hidden_nodes_layer1}.")

# Instantiate the Artificial Neural Network 'Sequential' Model

nn = Sequential()

# Add LSTM (Long-short term memory layer)

#nn.add(LSTM(units=10, activation="relu", input_shape=(X_train_scaled.shape[0], X_train_scaled.shape[1])))

# First hidden layer
nn.add(Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))

# Second hidden layer
nn.add(Dense(units=(hidden_nodes_layer2), activation="relu"))

# Third hidden layer
nn.add(Dense(units=(hidden_nodes_layer3), activation="relu"))

# Fourth layer becomes dropout layer
#model.add(Dropout(.2,input_shape=(10,)))
#nn.add(Dropout(.1,input_shape=(number_input_features,)))

# Fifth layer to add regularizer layering to the model
#nn.add(Dense(5, activation='relu', kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)))
nn.add(Dense(units=(14), activation='relu', kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)))

# Output layer
nn.add(Dense(units=1, activation="sigmoid"))


# Check the structure of the model
nn.summary()

In [None]:
# Compile the Sequential Model
#nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Fit the model using 50 epochs and the training data
# 1 epoch = when all samples (images) go through an entire iteration
# batch size = fraction of the entire sample X_train data to be sent through one epoch
# [epoch/batch_size = iterations per epoch] 
# 1 epoch when all samples go through forward/backward iteration

print("Fit model on X_train_scaled data")
fit_model = nn.fit(X_train_scaled, y_train, epochs=275, verbose=2, batch_size=256, shuffle=False)

# Plot the Output of the fit_model history
fig, ax = plt.subplots(1) # Creates figure fig and add an axes, ax.
fig2, ax2 = plt.subplots(1) # Another figure

ax.set_title(f' Artificial Neural Network ((ReLU/Sigmoid Output Act.) - {algoData_asset} Accuracy Plot')
ax.plot(fit_model.history['accuracy'], label='train')
fig.savefig(f'../Datasets/algoData_ANN_model_acc_loss/{algoData_asset}_ann_accuracy_.png')


ax2.set_title(f' Artificial Neural Network (ReLU/Sigmoid Output Act.) - {algoData_asset} Loss Plot')
ax2.plot(fit_model.history['loss'], label='train')
fig2.savefig(f'../Datasets/algoData_ANN_model_acc_loss/{algoData_asset}_ann_loss_.png')

In [None]:
# Evaluate the model using testing data
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)

# Display evaluation results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Predict on X_test, y_test datasets

print("Evaluate model on X_test_scaled data")
#predictions = nn.predict(X_test_scaled).astype("int32")
predictions = (nn.predict(X_test_scaled) > 0.51).astype("int32")

In [None]:
display(predictions[0:10])

In [None]:
y_test = y_test.flatten()

In [None]:
results = pd.DataFrame({"predictions": predictions.flatten(), "actual": y_test.flatten()})
#results = pd.DataFrame({"predictions": predictions.ravel(), "actual": y_test}, index=[0])
display(results.value_counts())
display(results.head(10))
display(results.tail(10))

In [None]:
# Function to convert all -1 to 0

def convert_back_to_neg(y_df):
    for i in (range(len(y_df))):
        if (y_df['predictions'][i] == 0):
            y_df['predictions'][i] = -1
        
    return y_df

In [None]:
results.copy()
results = convert_back_to_neg(results)

In [None]:
results

In [None]:
results.value_counts()

In [None]:
# Get the original Actual Returns column from the original imported dataset

df_final = df[['ActualReturns']]
df_final = df_final.loc[training_end:]
df_final

In [None]:
# Create function to compile cumulative returns for Artificial Neural Network Predicted y_test values

def annReturns(results, X_test):

    # Make emtpy dataframe to store actual returns, predictions & cumulative returns
    
    # Inititialize first return 0 so cumulative ret begins at base 1
    #annData['Actual Returns'][0] = 0
    
    annData = pd.DataFrame()
    annData.index = X_test.index
    results.index = X_test.index
    annData['Prediction_Signal'] = results['predictions'] 
    annData['Actual_Returns'] = df_final['ActualReturns'].values
    annData['annStrategyReturns'] = annData['Actual_Returns'] * annData['Prediction_Signal'] 
    annData['cumActual_Returns'] = (1 + annData['Actual_Returns']).cumprod()
    annData['cumANNStrategyReturns'] = (1 + annData['annStrategyReturns']).cumprod()
    
    returns = annData
    
    return returns

In [None]:
# Run annReturns Function and display sample final cumulative returns dataframe

returns_df = annReturns(results, X_test)
display(returns_df.head(5))
display(returns_df.tail(5))

In [None]:
# Save Results to External .csv File
# Save Finalized Output 'returns_df' dataframe

returns_df.to_csv(f'../Datasets/algoData_ANN_results/{algoData_asset}_ANN_results.csv', index=True)