In [1]:
import pandas
import numpy

from keras.layers.core import Dense, Activation, Dropout, Flatten
from keras.layers.recurrent import LSTM
from keras.models import Sequential

pandas.set_option('display.max_rows', 10)
pandas.set_option('display.max_columns', 10)

# set a fixed seed for numpy pseudo random generator
numpy.random.seed(100)

Using TensorFlow backend.


In [2]:
data = pandas.read_csv("./datasets/gold_data_20170407to20170810.csv", parse_dates=['date'])

In [3]:
data.head()

Unnamed: 0,date,time,derham.close,derham.high,derham.low,...,iran_gold.close,iran_gold.high,iran_gold.low,iran_gold.open,iran_gold.volume
0,2017-02-28,151200,,,,...,5035000,5035000,5035000,5035000,1
1,2017-02-28,151300,,,,...,5035000,5035000,5035000,5035000,1
2,2017-02-28,151400,,,,...,5035000,5035000,5035000,5035000,1
3,2017-02-28,151500,,,,...,5035000,5035000,5035000,5035000,1
4,2017-02-28,151600,,,,...,5035000,5035000,5035000,5035000,1


In [4]:
data.columns

Index(['date', 'time', 'derham.close', 'derham.high', 'derham.low',
       'derham.open', 'derham.volume', 'dollar.close', 'dollar.high',
       'dollar.low', 'dollar.open', 'dollar.volume', 'iran_gold.close',
       'iran_gold.high', 'iran_gold.low', 'iran_gold.open',
       'iran_gold.volume'],
      dtype='object')

In [5]:
target = 'iran_gold.close'

In [7]:
train_percent = 0.8
lag_step = 1
margin = int(train_percent*data.shape[0])
new_data = pandas.DataFrame({"x": [i for i in data[target]],
                             "y": [i for i in data[target].shift(lag_step)]})
new_data.dropna(inplace=True)
train = new_data.iloc[0:margin]
test = new_data.iloc[margin:]

In [8]:
train.head()
test.head()

Unnamed: 0,x,y
65561,4936000,4936000.0
65562,4936000,4936000.0
65563,4936000,4936000.0
65564,4936000,4936000.0
65565,4936000,4936000.0


In [9]:
def load_data(pdf, mem=100):
    
    docX, docY = [], []
    for i in range(len(pdf)-mem):
        docX.append(pdf.iloc[i:i+mem].as_matrix())
        docY.append(pdf.iloc[i+mem].as_matrix())
        
    return numpy.array(docX), numpy.array(docY)

X_train, Y_train = load_data(train[0:1000])
X_test, Y_test = load_data(test[0:1000])
Y_train

array([[ 5036000.,  5036000.],
       [ 5036000.,  5036000.],
       [ 5036000.,  5036000.],
       ..., 
       [ 5028000.,  5028000.],
       [ 5028000.,  5028000.],
       [ 5028000.,  5028000.]])

In [10]:
in_out_neurons = (None, 2)
hidden_neurons = 100

model = Sequential()
model.add(LSTM(hidden_neurons, input_shape=in_out_neurons, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(in_out_neurons[1], input_dim=hidden_neurons))
model.add(Activation("linear"))
model.compile(loss="mean_squared_error", optimizer="rmsprop")
model.fit(X_train, Y_train, batch_size=500, epochs=10, validation_split=0.1)

pred = model.predict(X_test)
y_hat = pandas.DataFrame(pred, columns=['t-1', 't'])
y = pandas.DataFrame(Y_test, columns=['t-1', 't'])

Train on 810 samples, validate on 90 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [11]:
from bokeh.layouts import gridplot
from bokeh.plotting import figure, show, output_file

In [12]:
p1 = figure(x_axis_type="datetime", title="Gold High Prices")
p1.grid.grid_line_alpha = 0.3
p1.xaxis.axis_label = 'Date'
p1.yaxis.axis_label = 'Price'

p1.line(y_hat.index, y_hat['t'], color='#A6CEE3', legend='y_hat')
p1.line(y.index, y['t'], color='#B2DF8A', legend='y')
p1.legend.location = "top_left"

output_file("/tmp/index.html", title="Iran Gold[High] Price Prediction")

show(gridplot([[p1]], plot_width=600, plot_height=600, sizing_mode='stretch_both'))