# Importing libraries

In [1]:
import matplotlib.pyplot as plt
import statsmodels.tsa.seasonal as smt
import numpy as np
import pandas as pd
import random
import datetime as dt
from sklearn import linear_model 
from sklearn.metrics import mean_absolute_error
import plotly

# import the relevant Keras modules
from keras.models import Sequential
from keras.layers import Activation, Dense
from keras.layers import LSTM
from keras.layers import Dropout

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

from subprocess import check_output
# print(check_output(["ls", "/run/media/sphericalkat/archive/datasets/us-stocks-dataset/Stocks"]).decode("utf8"))
import os
os.chdir('/run/media/sphericalkat/archive/datasets/us-stocks-dataset/Stocks')

Using TensorFlow backend.


# Loading data

In [2]:
filenames = ['prk.us.txt', 'bgr.us.txt', 'jci.us.txt', 'aa.us.txt', 'fr.us.txt', 'star.us.txt', 'sons.us.txt', 'ipl_d.us.txt', 'sna.us.txt', 'utg.us.txt']
# filenames = [filenames[1]]
print(filenames)

data = []
for filename in filenames:
    df = pd.read_csv(filename, sep=',')

    label, _, _ = filename.split(sep='.')
    df['Label'] = filename
    df['Date'] = pd.to_datetime(df['Date'])
    data.append(df)

['prk.us.txt', 'bgr.us.txt', 'jci.us.txt', 'aa.us.txt', 'fr.us.txt', 'star.us.txt', 'sons.us.txt', 'ipl_d.us.txt', 'sna.us.txt', 'utg.us.txt']


# Plotting the data

In [3]:
r = lambda: random.randint(0,255)
traces = []

for df in data:
    df = df.sample(n=1000, replace=True) # sampling makes plotting faster
    df = df.sort_values('Date')
    label = df['Label'].iloc[0].split('.')[0]

    trace = plotly.graph_objs.Scattergl(
        x=df['Date'],
        y=df['Close'],
        mode='lines',
        line=dict(
            color = 'blue'
        ),
        name=label
    )
    traces.append(trace)
    
layout = plotly.graph_objs.Layout(
    title='Plot',
)
fig = plotly.graph_objs.Figure(data=traces, layout=layout)

plotly.offline.init_notebook_mode(connected=True)
plotly.offline.iplot(fig, filename='dataplot')

# Creating windows and normalizing the data

In [4]:
window_len = 10

# Create a data point (i.e. a date) which splits the training and testing set
split_date = list(data[0]["Date"][-(2*window_len+1):])[0]

# Split the training and test set
training_set, test_set = df[df['Date'] < split_date], df[df['Date'] >= split_date]

# Drop unnecessary columns
training_set = training_set.drop(['Date','Label', 'OpenInt'], 1)
test_set = test_set.drop(['Date','Label','OpenInt'], 1)

# Create windows for training
LSTM_training_inputs = []
for i in range(len(training_set)-window_len):
    temp_set = training_set[i:(i+window_len)].copy()
    
    for col in list(temp_set):
        temp_set[col] = temp_set[col]/temp_set[col].iloc[0] - 1
    
    LSTM_training_inputs.append(temp_set)
LSTM_training_outputs = (training_set['Close'][window_len:].values/training_set['Close'][:-window_len].values)-1

LSTM_training_inputs = [np.array(LSTM_training_input) for LSTM_training_input in LSTM_training_inputs]
LSTM_training_inputs = np.array(LSTM_training_inputs)

# Create windows for testing
LSTM_test_inputs = []
for i in range(len(test_set)-window_len):
    temp_set = test_set[i:(i+window_len)].copy()
    
    for col in list(temp_set):
        temp_set[col] = temp_set[col]/temp_set[col].iloc[0] - 1
    
    LSTM_test_inputs.append(temp_set)
LSTM_test_outputs = (test_set['Close'][window_len:].values/test_set['Close'][:-window_len].values)-1

LSTM_test_inputs = [np.array(LSTM_test_inputs) for LSTM_test_inputs in LSTM_test_inputs]
LSTM_test_inputs = np.array(LSTM_test_inputs)

print(LSTM_training_inputs.shape)

(986, 10, 5)


# Create sequential model

In [5]:
model = Sequential()

model.add(LSTM(32, input_shape=(LSTM_training_inputs.shape[1], LSTM_training_inputs.shape[2])))
model.add(Dropout(0.1))
model.add(Dense(units=1, activation='linear'))

model.compile(loss='mae', optimizer='adam')
model.summary()

model.fit(LSTM_training_inputs, LSTM_training_outputs, epochs=20, batch_size=512, verbose=1, validation_data=(LSTM_test_inputs, LSTM_test_outputs))

accuracy = model.evaluate(LSTM_test_inputs, LSTM_test_outputs, verbose=1)
print('\n', 'Test_Accuracy: ', accuracy[1])

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 32)                4864      
_________________________________________________________________
dropout_1 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 33        
Total params: 4,897
Trainable params: 4,897
Non-trainable params: 0
_________________________________________________________________


ValueError: Error when checking input: expected lstm_1_input to have 3 dimensions, but got array with shape (0, 1)

In [None]:
plt.plot(LSTM_test_outputs, label = "actual")
plt.plot(model.predict(LSTM_test_inputs), label = "predicted")
plt.legend()
plt.show()
MAE = mean_absolute_error(LSTM_test_outputs, model.predict(LSTM_test_inputs))

print(model.predict(LSTM_test_inputs))
print('The Mean Absolute Error is: {}'.format(MAE))