# Define Dynamic Parameters

In [12]:
file_no = 0
my_seed = 41291
lstm_units = 100
lstm_2_units = 0
no_epochs = 120
dropout = 0
shuffle_bool = False
activation = 'sigmoid'
trendpath = '\\5d'

# Import Packages

In [13]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import glob
import os
from sklearn.model_selection import  train_test_split
import tensorflow as tf
import ntpath
import kerastuner as kt
import csv 


tf.random.set_seed(my_seed)
np.random.seed(my_seed)

# Import CSVs

In [14]:
ROOT_DIR = os.path.abspath(os.curdir)
path = ROOT_DIR + '\\..\\..\\Datasets\\processed\\lstm' + trendpath
print(path)

csvfiles = glob.glob(path + "/*.csv")
dfs = []

for file in csvfiles:
    df = pd.read_csv(file)
    df.name = (ntpath.basename(file)).split(".",1)[0]
    df.filename = ntpath.basename(file)
    dfs.append(df)

df = dfs[file_no]
print("Stock: ")
print(df.name)
print("File: ")
print(df.filename)
df_name = df.name


C:\FYP - Luke Bezzina\Code\mlpLearning\..\..\Datasets\processed\lstm\5d
Stock: 
AAL_5dtrend
File: 
AAL_5dtrend.csv


# Encoding Out Variable

In [15]:
price_classification = ['Positive', 'Neutral', 'Negative']

price_direction = df.PriceDirection
one_hot_dummies = pd.get_dummies(price_direction)
one_hot_dummies = one_hot_dummies.reindex\
    (columns=price_classification, fill_value=0)

df = df.drop(columns=['PriceDirection'])
df = pd.concat([df, one_hot_dummies], axis=1)

print(df)


           Date  OpenPrice  ClosePrice  Trend  VolumeTrend  Volatility  \
0    2017-01-10      46.01       47.08 -0.107     0.004485    0.412824   
1    2017-01-11      47.00       48.48 -0.475    -0.017248    0.901230   
2    2017-01-12      49.29       48.64 -0.777    -0.007244    1.131777   
3    2017-01-13      48.67       48.10 -0.534     0.021062    0.922657   
4    2017-01-17      48.16       47.65 -0.076     0.061017    0.568929   
..          ...        ...         ...    ...          ...         ...   
995  2020-12-22      15.70       16.10  0.217    -0.014183    0.321907   
996  2020-12-23      16.35       15.48  0.346    -0.080775    0.511390   
997  2020-12-24      15.52       15.89  0.285    -0.046441    0.462800   
998  2020-12-28      16.04       15.66  0.191     0.227002    0.358408   
999  2020-12-29      15.96       16.06 -0.010     0.129281    0.236846   

     Positive  Neutral  Negative  
0           1        0         0  
1           0        0         1  
2     

# Splitting dataset in dependent and independent variables

In [16]:
#data = np.array(df.iloc[:, :].values)
data = np.array(df.iloc[:, 1:].values, dtype=np.float)

# Splitting dataset (training + testing)

In [17]:
data_train, data_test= train_test_split(data, test_size=0.1, shuffle=False, random_state=32)

print("Train")
print(data_train)
print("Test")
print(data_test)

Train
[[ 4.601e+01  4.708e+01 -1.070e-01 ...  1.000e+00  0.000e+00  0.000e+00]
 [ 4.700e+01  4.848e+01 -4.750e-01 ...  0.000e+00  0.000e+00  1.000e+00]
 [ 4.929e+01  4.864e+01 -7.770e-01 ...  0.000e+00  0.000e+00  1.000e+00]
 ...
 [ 1.103e+01  1.108e+01  1.660e-01 ...  1.000e+00  0.000e+00  0.000e+00]
 [ 1.123e+01  1.147e+01 -4.000e-03 ...  1.000e+00  0.000e+00  0.000e+00]
 [ 1.185e+01  1.256e+01 -3.110e-01 ...  1.000e+00  0.000e+00  0.000e+00]]
Test
[[ 1.30000000e+01  1.30400000e+01 -5.32000000e-01 -5.65895056e-01
   7.98789084e-01  1.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 1.28600000e+01  1.30300000e+01 -5.47000000e-01 -1.99340386e-01
   8.13009225e-01  1.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 1.33000000e+01  1.40000000e+01 -5.53000000e-01 -1.56421214e-01
   8.21583836e-01  0.00000000e+00  0.00000000e+00  1.00000000e+00]
 [ 1.50000000e+01  1.37300000e+01 -3.30000000e-01 -1.48536942e-02
   5.21359761e-01  0.00000000e+00  0.00000000e+00  1.00000000e+00]
 [ 1.40000000

# Split Dataset in LSTM sequence

In [18]:
# split a multivariate sequence into samples
def split_sequences(dataset, n):
	x, y = list(), list()

	for i in range(len(dataset)):
		# identifying end of sequence (determined by n)
		sequence_end = i + n
		# end of sequence should not exceed size of dataset since all sequences should contain n rows
		if sequence_end > len(dataset):
			break
		seq_x, seq_y = dataset[i:sequence_end, :-3], dataset[sequence_end-1, -3:]
		x.append(seq_x)
		y.append(seq_y)

	return np.array(x), np.array(y)

no_steps = 10
x_train, y_train = split_sequences(data_train, no_steps)
x_test, y_test = split_sequences(data_test, no_steps)

no_features = x_train.shape[2]
print(x_train.shape)

(891, 10, 5)


# Building LSTM model

In [19]:
# define model
lstm = tf.keras.models.Sequential()
if (lstm_2_units == 0):
    lstm.add(tf.keras.layers.LSTM(units=lstm_units, activation='relu', input_shape=(no_steps, no_features),dropout=dropout))

else:
    lstm.add(tf.keras.layers.LSTM(units=lstm_units, activation='relu',
                                  input_shape=(no_steps, no_features),dropout=dropout, return_sequences=True))
    lstm.add(tf.keras.layers.LSTM(units=lstm_2_units, activation='relu',
                                  input_shape=(no_steps, no_features),dropout=dropout))

lstm.add(tf.keras.layers.Dense(3, activation=activation))

optimizer = tf.keras.optimizers.Adam()

lstm.compile(optimizer = optimizer, loss='binary_crossentropy',metrics = ['accuracy'])
lstm.fit(x_train, y_train, batch_size=32, epochs=no_epochs, shuffle=shuffle_bool)

Train on 891 samples
Epoch 1/120
Epoch 2/120
Epoch 3/120
Epoch 4/120
Epoch 5/120
Epoch 6/120
Epoch 7/120
Epoch 8/120
Epoch 9/120
Epoch 10/120
Epoch 11/120
Epoch 12/120
Epoch 13/120
Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120
Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120
Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120
Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 65/120
Epoch 66/120
Epoch 67/120
Epoch 68/120
Epoch 69/120
Epoch 70/120
Epoch 71/120
Epoch 72/120
Epoch 73/120
Epoch 74/120
Epoch 75/120
Epoch 76/120


<tensorflow.python.keras.callbacks.History at 0x24599711c08>

# Predicting Test Set Results

In [20]:
y_pred = lstm.predict(x_test)
# boolean result of whether predicted probability is larger than 0.5
# result > 0.5 = 1, result <= 0.5 = 0
y_pred_binary = []
for i in y_pred:
    max = 0
    elem = 0
    for iter, x in enumerate(i):
        if x > max:
            max = x
            elem = iter
    rec = [False, False, False]
    rec[elem] = True
    y_pred_binary.append(rec)

y_pred_binary = np.array(y_pred_binary)

# Obtaining Confusion Matrix and Accuracy Score for predictions

In [21]:
%%capture cap

# To evaluate accuracy, a vector of labels is needed
# np.argmax(x, axis=1) - to output correct labels
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score, roc_auc_score

# Confusion Matrix: Columns = 0, 1 (Predicted Label)
# Confusion Matrix: Rows = 0, 1 (Actual Label)
cm = multilabel_confusion_matrix(np.argmax(y_test, axis=1), np.argmax(y_pred_binary, axis=1))
print(cm)

print()
print("Accuracy Score: ")
accuracy_score = accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_pred_binary, axis=1))
print(accuracy_score)

print("Area Under Curve Score: ")
roc_score = roc_auc_score(y_test, y_pred_binary)
print(roc_score)

[[[ 9 33]
  [12 37]]

 [[86  0]
  [ 5  0]]

 [[41 13]
  [29  8]]]

Accuracy Score: 
0.4945054945054945
0.4674723698770526


NameError: name 'fadf' is not defined

# Output Results

In [None]:
output_file_name = df_name + '_' + str(lstm_units) + 'units_' + str(no_epochs) + 'epc_' + str(my_seed) + '_' + str(dropout)+'_second'+str(lstm_2_units)
data = [output_file_name, accuracy_score, roc_score]

with open(ROOT_DIR+'\\lstm_results\\results_roc.csv', 'a', newline="") as f:
    writer = csv.writer(f)
    writer.writerow(data)

with open(ROOT_DIR+'\\lstm_results\\'+ output_file_name + '.txt', 'w') as f:
    f.write(cap.stdout)


