In [1]:
#import packages
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, Activation
import time
import plotly.plotly as py
import plotly.graph_objs as go

Using TensorFlow backend.


In [2]:
df = pd.read_csv('C:/Users/Desktop/Desktop/Dissertation/snp500/stock_dfs/IBM.csv')

In [3]:
df.head()

Unnamed: 0,Date,High,Low,Open,Close,Volume,Adj Close
0,2000-01-03,116.0,111.875,112.4375,116.0,10347700.0,80.076149
1,2000-01-04,114.5,110.875,114.0,112.0625,8227800.0,77.358032
2,2000-01-05,119.75,112.125,112.9375,116.0,12733200.0,80.076149
3,2000-01-06,118.9375,113.5,118.0,114.0,7971900.0,78.695503
4,2000-01-07,117.9375,110.625,117.25,113.5,11856700.0,78.350372


In [4]:
df.shape

(4800, 7)

In [5]:
df.index = df['Date']
df_adjclose = df['Adj Close']

In [6]:
#creating dataframe
data = df.sort_index(ascending=True, axis=0)
new_data = pd.DataFrame(index=range(0,len(df)),columns=['Date', 'Adj Close'])
for i in range(0,len(data)):
    new_data['Date'][i] = data['Date'][i]
    new_data['Adj Close'][i] = data['Adj Close'][i]

In [7]:
trace = go.Scatter(
    x = df.index,
    y = df_adjclose, mode = 'lines'
)
data = [trace]
layout = dict(title = 'SP500',
              xaxis = dict(title = 'Year'),
              yaxis = dict(title = 'Adj Close'),
              )
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='SP500_LSTM')


Consider using IPython.display.IFrame instead



In [8]:
#setting index
new_data.index = new_data.Date
new_data.drop('Date', axis=1, inplace=True)

In [9]:
#split into train and validation
#creating train and test sets
dataset = new_data.values
train = dataset[:4700]
valid = dataset[4700:]

In [10]:
#Normalising data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(dataset)


Data with input dtype object was converted to float64 by MinMaxScaler.



In [11]:
x_train, y_train = [], []
for i in range(60,len(train)):
    x_train.append(scaled_data[i-60:i,0])
    y_train.append(scaled_data[i,0])
x_train, y_train = np.array(x_train), np.array(y_train)

In [12]:
x_train = np.reshape(x_train, (x_train.shape[0],x_train.shape[1],1))

In [13]:
# create and fit the LSTM network
model = Sequential()

model.add(LSTM(input_dim=1, output_dim=50, return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(100, return_sequences=False))
model.add(Dropout(0.2))

model.add(Dense(output_dim=1))
model.add(Activation('linear'))

start = time.time()
model.compile(loss='mse', optimizer='adam')
print ('compilation time : ', time.time() - start)


The `input_dim` and `input_length` arguments in recurrent layers are deprecated. Use `input_shape` instead.


Update your `LSTM` call to the Keras 2 API: `LSTM(return_sequences=True, input_shape=(None, 1), units=50)`



compilation time :  0.0991201400756836



Update your `Dense` call to the Keras 2 API: `Dense(units=1)`



In [14]:
#model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(x_train, y_train, epochs=18, batch_size=128, validation_split=0.05)

Train on 4408 samples, validate on 232 samples
Epoch 1/18
Epoch 2/18
Epoch 3/18
Epoch 4/18
Epoch 5/18
Epoch 6/18
Epoch 7/18
Epoch 8/18
Epoch 9/18
Epoch 10/18
Epoch 11/18
Epoch 12/18
Epoch 13/18
Epoch 14/18
Epoch 15/18
Epoch 16/18
Epoch 17/18
Epoch 18/18


<keras.callbacks.History at 0x241f2b94be0>

In [15]:
#predicting values, using past 60 from the train data

inputs = new_data[len(new_data) - len(valid) - 60:].values
inputs = inputs.reshape(-1,1)
inputs = scaler.transform(inputs)
X_test = []
for i in range(60, inputs.shape[0]):
    X_test.append(inputs[i-60:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
predicted_stock_price = model.predict(X_test)
predicted_stock_price = scaler.inverse_transform(predicted_stock_price)

In [16]:
rms=np.sqrt(np.mean(np.power((valid-predicted_stock_price),2)))
rms

5.973112427782613

In [17]:
print(predicted_stock_price)

[[141.91306 ]
 [141.90092 ]
 [141.85907 ]
 [141.84479 ]
 [141.85536 ]
 [142.00755 ]
 [142.21492 ]
 [142.42549 ]
 [142.65697 ]
 [142.91492 ]
 [143.28513 ]
 [143.72069 ]
 [144.10492 ]
 [144.37167 ]
 [144.68465 ]
 [145.00742 ]
 [145.3036  ]
 [145.65869 ]
 [146.0736  ]
 [146.47757 ]
 [146.75023 ]
 [146.79831 ]
 [146.65898 ]
 [146.34389 ]
 [145.69292 ]
 [144.66185 ]
 [143.53378 ]
 [142.42702 ]
 [141.61319 ]
 [140.4639  ]
 [138.9625  ]
 [137.23175 ]
 [135.49689 ]
 [133.92583 ]
 [132.34512 ]
 [130.80527 ]
 [129.29056 ]
 [127.60193 ]
 [125.661255]
 [123.6603  ]
 [121.80887 ]
 [120.10333 ]
 [118.82722 ]
 [118.07679 ]
 [117.81196 ]
 [117.887955]
 [118.187675]
 [118.467766]
 [118.70513 ]
 [118.85988 ]
 [119.01432 ]
 [119.16671 ]
 [119.240555]
 [119.08756 ]
 [118.86363 ]
 [118.53266 ]
 [118.27893 ]
 [118.120865]
 [118.19877 ]
 [118.35006 ]
 [118.690216]
 [119.19352 ]
 [119.57443 ]
 [119.97057 ]
 [120.10632 ]
 [120.15295 ]
 [120.12657 ]
 [120.06961 ]
 [119.97436 ]
 [119.81524 ]
 [119.4167  ]
 [118.

In [18]:
#for plotting
train = new_data[:4700]
valid = new_data[4700:]
trace0 = go.Scatter(x = train.index, y=train['Adj Close'], mode = 'lines', name ='Train')
trace1 = go.Scatter(x = valid.index, y=valid['Adj Close'], mode = 'lines', name = 'Valid')
trace2 = go.Scatter(x = valid.index, y=predicted_stock_price, mode = 'lines', name = 'Predition')
layout = dict(title = 'LSTM',
              xaxis = dict(title = 'Date'),
              yaxis = dict(title = 'Adj Close'),
              )
data = [trace0, trace1, trace2]
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='LSTM')


Consider using IPython.display.IFrame instead

