In [505]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Dropout, LSTM, GRU, Input

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, recall_score, precision_score, f1_score

In [506]:
# Load in data from CSV.
df = pd.read_csv("./data/source_price_alt.csv");

In [507]:
# Convert index to date.
df.index = df.date;
df.drop('date', axis=1, inplace = True);

In [508]:
# Extract adjusted close column.
adjust_close = df.iloc[:,-1].values; 

# Normalize adjusted close data.
scaler = MinMaxScaler();
scaled_adjust_close = scaler.fit_transform(adjust_close.reshape(-1,1));

# Remove un-normalized column.
source_data = df.drop(['Adj Close'], axis=1)

# Set new column with normalized data.
source_data["scaled_adj_close"] = scaled_adjust_close

In [509]:
# Split into train test split.
train_validation_data, test_data = train_test_split(source_data, train_size=0.85, test_size=0.15, shuffle = False);

In [510]:
validation_data = train_validation_data[83:];
train_data = train_validation_data[:83];

In [511]:
# Split data into input X and output Y, by splitting into 'n' past days as input X and `m` coming days as Y.
def processData(data, look_back, forward_days,jump=1):
    A,B = [],[]
    for i in range(0, len(data) - look_back, jump):
        A.append(data[i:(i + look_back)])
        B.append(data[(i + look_back):(i + look_back + forward_days)])
    return np.array(A), np.array(B)

In [512]:
# Set model configurations.
look_back = 5
forward_days = 1
NUM_NEURONS = 25
EPOCHES = 10
BATCH_SIZE = 32
DROUP_OUT = 0.2

In [513]:
# Split into past and coming days.
m_training_data = train_data.to_numpy()
m_validation_data = validation_data.to_numpy()
m_test_data = test_data.to_numpy()

X_train, Y_train = processData(m_training_data, look_back, forward_days)
X_validation, Y_validation = processData(m_validation_data, look_back, forward_days)
X_test, Y_test = processData(m_test_data, look_back, forward_days)

Y_train = Y_train[:,0,-1]
Y_validation = Y_validation[:,0,-1]
Y_test = Y_test[:,0,-1]

In [514]:
# Setup LSTM Model
lstm_model = Sequential()

# Input Layer
lstm_model.add(LSTM(units=NUM_NEURONS, return_sequences = True, input_shape=(X_train.shape[1], X_train.shape[2])))
lstm_model.add(Dropout(DROUP_OUT))

# Hidden Layer 1
lstm_model.add(LSTM(units=NUM_NEURONS, return_sequences = True))
lstm_model.add(Dropout(DROUP_OUT))

# Hidden Layer 2
lstm_model.add(LSTM(units=NUM_NEURONS, return_sequences = True))
lstm_model.add(Dropout(DROUP_OUT))

# Hidden Layer 3
lstm_model.add(LSTM(units=NUM_NEURONS))
lstm_model.add(Dropout(DROUP_OUT))

# Output Layer
lstm_model.add(Dense(forward_days))

In [515]:
# Compile LSTM models.
lstm_model.compile(loss='mean_squared_error', optimizer='adam')

# Fit LSTM models to training set.
lstm_model.fit(X_train, Y_train, epochs = EPOCHES, batch_size = BATCH_SIZE)

# Generate predictions from test set.
lstm_predictions = lstm_model.predict(X_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [516]:
# Scale values.
lstm_X_test_scaled = scaler.inverse_transform(lstm_predictions)
Y_test = Y_test.reshape(-1,1)
Y_test_scaled = scaler.inverse_transform(Y_test)

In [517]:
def convert_to_classifier(values, debug=False):
    binary_results = np.zeros(len(values) - 1)
    for idx, val in enumerate(values):
        if idx < 1:
            continue
        if values[idx] > values[idx - 1]:
            binary_results[idx - 1] = 1
            if debug:
                print(f'Increase: {values[idx]} vs {values[idx - 1]}')
        else:
            if debug:
                print(f'Decrease: {values[idx]} vs {values[idx - 1]}')
            
    return binary_results

In [518]:
def calc_mda(test_binary, prediction_binary, debug=False):
    correct_predictions = 0
    for idx, val in enumerate(test_binary):
        if debug:
            print(f'Predicted: {prediction_binary[idx]} vs actual {test_binary[idx]}')
        if prediction_binary[idx] == test_binary[idx]:
            correct_predictions = correct_predictions + 1
    
    if debug:
        print(f'Correct Predictions: {correct_predictions} out of {len(prediction_binary)}.')
    
    mda = correct_predictions / len(prediction_binary)
    return mda

In [519]:
lstm_mse = mean_squared_error(lstm_X_test_scaled, Y_test_scaled)
prediction_binary = convert_to_classifier(lstm_X_test_scaled, debug=False)
test_binary = convert_to_classifier(Y_test_scaled, debug=False)
mda = calc_mda(test_binary, prediction_binary)


In [520]:
# Calculate recall.
recall = recall_score(test_binary, prediction_binary)

# Calculate precision.
precision = precision_score(test_binary, prediction_binary)

# Calculate f1_score.
f1_score = f1_score(test_binary, prediction_binary)

In [521]:
recall

0.4

In [522]:
precision

0.2857142857142857

In [523]:
f1_score

0.3333333333333333

In [524]:
mda

0.38461538461538464

In [525]:
lstm_mse

1059.5492302373573