## Import Libiray

In [1]:
import pandas as pd
import numpy as np
import math
import torch
import torch.nn as nn
from torch.autograd import Variable
import matplotlib.pyplot as plt

## Define Parameters

In [2]:
ts_data_save = 'CNN_Pred/'
origin_data_save = 'TS_data/'
model_save = 'lstm_model_save/'
header = ['Open Price', 'High', 'Low', 'Close']
window_size = 4      
pred_days = 1      

# Network Parameters
LSTM_input_dim = 4        
LSTM_output_dim = 2       
n_layers = 2         
n_hidden_size = 64   

# Hyper parameters
EPOCH = 1500        
train_ratio = 0.8
LR = 1e-2 #Learning rate

## DataLoader

In [3]:
# Training data processed by CNN
product = 'AUDUSD'
ts_d = pd.read_csv(ts_data_save+product+'.csv')


ts_d = ts_d.dropna(axis=0,how='any')

# Minimize the first three days
ts_d_len = len(ts_d) - window_size+1
print(ts_d_len)
# data.shape: (len(ts_w)*4*4) 
data = np.zeros((len(ts_d), window_size, LSTM_input_dim), dtype=float)

ts_d_n = ts_d.iloc[:,:].values
ts_d_n

1597


array([[0.9094634 , 0.9125382 , 0.90548056, 0.90901625],
       [0.90289366, 0.9061216 , 0.89917237, 0.9027511 ],
       [0.905483  , 0.9086966 , 0.90171885, 0.905323  ],
       ...,
       [0.7137139 , 0.7168654 , 0.7111266 , 0.7142442 ],
       [0.7131185 , 0.7163243 , 0.7105553 , 0.713661  ],
       [0.71819025, 0.7213686 , 0.7156482 , 0.71872145]])

In [4]:
# Generate Traing set
for i in range(0, ts_d_len):
    # Generate 4*4 temporary array
    temp = np.zeros((window_size, LSTM_input_dim))
    for j in range(i, i+window_size):
        # i=0, j=0,1,2,3 
        temp[j-i]=ts_d_n[j]
    # Fill data array
    data[i] = temp

train_num = ts_d_len
train_data = data[0:train_num]
# Initialize the training matrices
train_X = np.zeros((train_num, window_size-pred_days, LSTM_input_dim), dtype=float)
train_Y = np.zeros((train_num, LSTM_output_dim), dtype=float)
# Split 4*4 data into 3*4 and 1*2
for i in range(train_num):
    train_X[i] = train_data[i][0:window_size-1]
    for j in range(LSTM_output_dim):
        train_Y[i][j] = train_data[i][window_size-1][j+1]

# Encapsulate the data into torch tensor
train_X = torch.tensor(train_X,dtype=torch.float32).cuda()
train_Y = torch.tensor(train_Y,dtype=torch.float32).cuda()

print('Finish Loading........')
print('training data:',train_num)

Finish Loading........
training data: 1597


In [5]:
# Load Test Data from the origin data
ts_d = pd.read_csv(origin_data_save+product+'1440_Data.csv')
# Change Header


ts_d = ts_d.dropna(axis=0,how='any')
ts_d = ts_d.drop(['Date','Year','Month','Day','Hour','Minute','Volume','RSI','MACD_M','MACD_S','STO_K','STO_D'],axis=1)
# Minimize the first three days
ts_d_len = len(ts_d) - window_size+1
print(ts_d_len)
# data.shape: (len(ts_w)*4*4) 
data = np.zeros((len(ts_d), window_size, LSTM_input_dim), dtype=float)

ts_d_n = ts_d.iloc[:,:].values
ts_d_n

1997


array([[0.91307, 0.91366, 0.89976, 0.903  ],
       [0.903  , 0.90761, 0.9002 , 0.90641],
       [0.9064 , 0.91164, 0.89889, 0.9101 ],
       ...,
       [0.64233, 0.64755, 0.64163, 0.64399],
       [0.64398, 0.64523, 0.63795, 0.63847],
       [0.63851, 0.6508 , 0.63784, 0.65068]])

In [6]:
# Generate Test set
for i in range(0, ts_d_len):
    # Generate 4*4 temporary array
    temp = np.zeros((window_size, LSTM_input_dim))
    for j in range(i, i+window_size):
        # i=0, j=0,1,2,3 
        temp[j-i]=ts_d_n[j]
    # Fill data array
    data[i] = temp
train_num = math.ceil(train_ratio*ts_d_len)
test_num = ts_d_len-train_num

test_data = data[train_num: ts_d_len]

test_X = np.zeros((test_num, window_size-pred_days, LSTM_input_dim), dtype=float)
test_Y = np.zeros((test_num, LSTM_output_dim), dtype=float)

for i in range(test_num):
    test_X[i] = test_data[i][0:window_size-1]
    for j in range(LSTM_output_dim):
        test_Y[i][j] = test_data[i][window_size-1][j+1]


test_X = torch.tensor(test_X,dtype=torch.float32).cuda()
test_Y = torch.tensor(test_Y,dtype=torch.float32).cuda()

print('Finish Loading........')
print('test_data:',test_num)
test_Y

Finish Loading........
test_data: 399


tensor([[0.7176, 0.7138],
        [0.7273, 0.7150],
        [0.7295, 0.7246],
        [0.7284, 0.7237],
        [0.7250, 0.7243],
        [0.7254, 0.7211],
        [0.7265, 0.7194],
        [0.7246, 0.7102],
        [0.7117, 0.7089],
        [0.7101, 0.7061],
        [0.7108, 0.7082],
        [0.7108, 0.7057],
        [0.7108, 0.7054],
        [0.7136, 0.7085],
        [0.7132, 0.7072],
        [0.7149, 0.7079],
        [0.7148, 0.7134],
        [0.7161, 0.7125],
        [0.7174, 0.7104],
        [0.7183, 0.7141],
        [0.7207, 0.7071],
        [0.7150, 0.7083],
        [0.7158, 0.7131],
        [0.7184, 0.7134],
        [0.7199, 0.7142],
        [0.7196, 0.7127],
        [0.7167, 0.7090],
        [0.7122, 0.7070],
        [0.7107, 0.7093],
        [0.7103, 0.7075],
        [0.7097, 0.7059],
        [0.7092, 0.7021],
        [0.7052, 0.7005],
        [0.7052, 0.7003],
        [0.7042, 0.7031],
        [0.7080, 0.7027],
        [0.7092, 0.7057],
        [0.7098, 0.7049],
        [0.7

## Define LSTM Network

In [7]:
class LSTM(nn.Module): # extend nn.Module
    # Define the LSTM network
    def __init__(self,input_size,hidden_size, output_size=LSTM_output_dim,num_layers=2):
        super(LSTM,self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        # LSTM layer
        self.lstm = nn.LSTM(input_size,hidden_size,num_layers)
        self.linear = nn.Linear(hidden_size*(window_size-pred_days),output_size)
        self.batch_first = True
    
    def forward(self,x): # x: input
        # Only get the result， no hidden state
        lstm_out, _ = self.lstm(x)
        # get the index of hidden state
        batch_n,win_s,hidden_s = lstm_out.shape
        # Flatten to 2D: s*b rows and h cols
        linear_in = lstm_out.view(batch_n, win_s*hidden_s)
        out = self.linear(linear_in)
        output = out.view(batch_n,2)
        return output


In [8]:
lstm = LSTM(input_size=LSTM_input_dim, hidden_size=n_hidden_size).cuda()
print(lstm)

LSTM(
  (lstm): LSTM(4, 64, num_layers=2)
  (linear): Linear(in_features=192, out_features=2, bias=True)
)


In [9]:
optimizer = torch.optim.Adam(lstm.parameters(), lr=LR)
loss_function = nn.MSELoss()

In [10]:
def RMSELoss(yhat,y):
    return torch.sqrt(torch.mean((yhat-y)**2))

def train_model(train_X, save_model_name):
    losses = []
    losses2 = []
    print("Training Start. Epochs = ", EPOCH)
    for i in range(EPOCH):
        output = lstm(train_X)
        loss = loss_function(output, train_Y)
        loss2 = RMSELoss(output,train_Y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        losses.append(loss.item())
        losses2.append(loss2.item())
        if (i+1) % 50 == 0:
            print('Epoch:',i+1,'Loss:', loss.item())
            
    torch.save(lstm, model_save+save_model_name)
    return loss.item(), losses,loss2.item()



In [11]:
def test_model(model_name, test_X):
    # Load the trained model
    lstm64 = torch.load(model_save+model_name)
    # Forward the test data
    test_output = lstm64.forward(test_X)
    # Calculate the loss
    loss = loss_function(test_output, test_Y)
    loss2 = RMSELoss(test_output,test_Y)
    return test_output, loss.item(),loss2.item()



In [None]:
if __name__ == '__main__':
    model_name=product+'.pkl'
    train_MSE, losses, train_RMSE = train_model(train_X, model_name)

In [66]:
test_output, test_MSE, test_RMSE = test_model(model_name, test_X)

print("Train MSE ", train_MSE,";test MSE;", test_MSE,";Train RMSE;",train_RMSE,"Test RMSE",test_RMSE)


Train MSE  2.9288774385349825e-05 ;test MSE; 0.00010366209608037025 ;Train RMSE; 0.005411910358816385 Test RMSE 0.010181458666920662


In [13]:
test_output, _, _ = test_model(model_name, test_X)
test_output_plot = test_output.cpu().detach().numpy()
test_y_plot = test_Y.cpu().detach().numpy()

In [70]:
out = pd.DataFrame(test_output_plot)
out.to_csv('TS_data_Pred//'+product+'.csv',index=False)