# Data pre-processing

In [1]:
# importing libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
# importing the training set
dataset_train = pd.read_csv('concat.csv')

# Print the total no of rows in the dataset - dataset_train
print("Total number of rows: {0}".format(len(dataset_train)))

# Remove all the rows which has all NA values
dataset_train.dropna(how="all")


# No of rows and columns in the dataset after dropping the rows which contain the missing values
print("Total no: of rows and columns after dropping the rows which contain missing values: ", dataset_train.shape)

training_set = dataset_train.iloc[:,1:2].values

Total number of rows: 744314
Total no: of rows and columns after dropping the rows which contain missing values:  (744314, 6)


In [3]:
# print the first 5 rows of the dataset
print(dataset_train.head(5))

    Date time stamp  Bar OPEN Bid Quote  Bar HIGH Bid Quote  \
0  2016.01.03 17:00             1.08701             1.08713   
1  2016.01.03 17:01             1.08712             1.08712   
2  2016.01.03 17:02             1.08708             1.08722   
3  2016.01.03 17:03             1.08717             1.08723   
4  2016.01.03 17:04             1.08718             1.08718   

   Bar LOW Bid Quote  Bar CLOSE Bid Quote  Volume  
0            1.08701              1.08713       0  
1            1.08712              1.08712       0  
2            1.08708              1.08722       0  
3            1.08717              1.08723       0  
4            1.08711              1.08711       0  


In [4]:
# print the last 5 rows of the dataset
print(dataset_train.tail(5))

         Date time stamp  Bar OPEN Bid Quote  Bar HIGH Bid Quote  \
744309  2017.12.29 16:53             1.19972             1.19987   
744310  2017.12.29 16:54             1.19985             1.19985   
744311  2017.12.29 16:55             1.19969             1.20014   
744312  2017.12.29 16:56             1.20009             1.20023   
744313  2017.12.29 16:57             1.19982             1.20074   

        Bar LOW Bid Quote  Bar CLOSE Bid Quote  Volume  
744309            1.19972              1.19987       0  
744310            1.19970              1.19970       0  
744311            1.19961              1.20010       0  
744312            1.19974              1.19983       0  
744313            1.19980              1.20005       0  


In [5]:
# feature scaling
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range = (0,1))
training_set_scaled = sc.fit_transform(training_set)

In [6]:
# creating a data structure with 60 time steps and 1 output
X_train = []  # input of the neural network
y_train = []  # output of the neural network
for i in range(60,744313):
    X_train.append(training_set_scaled[i-60:i,0])
    y_train.append(training_set_scaled[i,0])
X_train, y_train = np.array(X_train),np.array(y_train)

In [7]:
# Reshaping
X_train = np.reshape(X_train, (X_train.shape[0],X_train.shape[1], 1))

# Building RNN

In [8]:
# importing keras libraries and packages
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [9]:
# Initializing the RNN
regressor = Sequential()

In [10]:
# Adding the first LSTM layer and some Dropout regularization
# units -> no: of neurons
# return_sequences -> as a stacked LSTM layers are created return_sequences should be set as True
# input_shape -> conatins the last 2 dimensions of the time steps along with the indicators
regressor.add(LSTM(units = 50, return_sequences = True, input_shape = (X_train.shape[1], 1)))

# 20% of the neurons willl be dropeed when training the 
regressor.add(Dropout(0.4))

In [11]:
# Adding the second LSTM layer and some Dropout regularization
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.4))

In [12]:
# Adding the third LSTM layer and some Dropout regularization
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.4))

In [13]:
# Adding the fourth LSTM layer and some Dropout regularization
regressor.add(LSTM(units = 50))
regressor.add(Dropout(0.4))

In [14]:
# Adding the output layer
regressor.add(Dense(units = 1))
# activation='linear'

In [15]:
# Compiling the RNN
regressor.compile(optimizer = 'rmsprop', loss = 'mean_squared_error', metrics = ['accuracy'])
# rmsprop

In [16]:
# Fitting the RNN to training set
# epochs -> no: of iterations neural network wants to be trained
regressor.fit(X_train, y_train, epochs = 2, batch_size = 42 )

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f32289b1518>

# Making the prediction and visualizing the results

In [17]:
# Getting the real forex rates for 2018
dataset_test = pd.read_csv('2018-test_data.csv')
real_forex_rate = dataset_test.iloc[:,1:2].values

In [18]:
# Getting the predicted forex rates for 2018
# verical concatanation - axis = 0
# horizontal concatanation - axis = 1
dataset_total = pd.concat((dataset_train['Bar OPEN Bid Quote'], dataset_test['Bar OPEN Bid Quote']), axis = 0)
#inputs = dataset_total

In [None]:
# computes the RMSE
# import math
# from sklearn.metrics import mean_squared_error
# rmse = math.sqrt(mean_squared_error(real_forex_rate, predicted_stock_price))

In [18]:
#new instance where we do not know the answer
Xnew = (2018/1/20 10:3)

SyntaxError: invalid syntax (<ipython-input-18-cf6a181f83b6>, line 2)

In [None]:
#make a prediction
ynew = regressor.predict(Xnew)

In [None]:
# show the inputs and predicted outputs
#print("X=%s, Predicted=%s" % (Xnew[0], ynew[0]))

In [None]:
# Visualizing the results
