In [1]:
import pandas as pd
import torch.nn as nn
import torch
import numpy as np
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.autograd import Variable
import os
from sklearn.preprocessing import MinMaxScaler

In [2]:
# data
# data.to_csv('data.csv')
data = pd.read_csv('data.csv')
data = data.drop(['PE','Ratio'], axis=1)
data['Date'] = pd.to_datetime(data['Date'], format='%Y-%m-%d')
# price['Date'] = price['Date'].dt.strftime('%m/%d/%Y')
data.columns = ['', 'Date', 'Close', 'Open', 'High', 'Low']
data

Unnamed: 0,Unnamed: 1,Date,Close,Open,High,Low
0,0,1960-01-04,59.910000,59.910000,59.910000,59.910000
1,1,1960-01-05,60.389999,60.389999,60.389999,60.389999
2,2,1960-01-06,60.130001,60.130001,60.130001,60.130001
3,3,1960-01-07,59.689999,59.689999,59.689999,59.689999
4,4,1960-01-08,59.500000,59.500000,59.500000,59.500000
...,...,...,...,...,...,...
15671,15671,2022-02-11,4418.640000,4506.270000,4526.330000,4401.410000
15672,15672,2022-02-14,4401.670000,4412.610000,4426.220000,4364.840000
15673,15673,2022-02-15,4471.070000,4429.280000,4472.770000,4429.280000
15674,15674,2022-02-16,4475.010000,4455.750000,4489.550000,4429.680000


In [3]:
dataset = data[['Close', 'Open', 'High', 'Low']].values
scaler = MinMaxScaler(feature_range = (0, 1))
dataset = scaler.fit_transform(dataset)
dataset

array([[0.00162509, 0.01246953, 0.01243302, 0.01253337],
       [0.00172626, 0.01256944, 0.01253263, 0.01263379],
       [0.00167146, 0.01251532, 0.01247868, 0.01257939],
       ...,
       [0.93139433, 0.92190046, 0.92822634, 0.92661986],
       [0.93222479, 0.92740987, 0.93170866, 0.92670354],
       [0.91225371, 0.92747439, 0.92475854, 0.91501536]])

In [4]:
seq_len = 180
input_size = 4
output_size = 4

real_list = []
label_list = []
for i in range(dataset.shape[0] - seq_len - output_size + 1):
    real_list.append(dataset[i: i + seq_len, ])
    label_list.append(dataset[i + seq_len: i + seq_len + output_size, 0])
real_list = np.array(real_list)
label_list = np.array(label_list)
label_list = np.reshape(label_list, (label_list.shape[0], 1, label_list.shape[1]))
print("real_list: " , real_list.shape)
print("label_list: " , label_list.shape)

real_list:  (15493, 180, 4)
label_list:  (15493, 1, 4)


In [5]:
class SP500_Dataset(Dataset):
    def __init__(self, dataset, label_list):
        self.dataset = dataset
        self.label_list = label_list
    def __len__(self):
        return len(self.label_list)
    def __getitem__(self, idx):
        data = self.dataset[idx]
        label = self.label_list[idx]
        return {
            'data': torch.from_numpy(data).float(),
            'label': torch.from_numpy(label).float()
        }

In [6]:
class self_made_RNN(nn.Module):
    # implement RNN from scratch rather than using nn.RNN
    def __init__(self, input_size, hidden_size, output_size):
        super(self_made_RNN, self).__init__()
        
        self.hidden_size = hidden_size
        self.i2h = nn.Linear(input_size, hidden_size)
        self.i2o = nn.Linear(hidden_size, output_size)
        self.tanh = nn.Tanh()
        
    def forward(self, input_tensor, hidden_tensor):
        combined = torch.cat((input_tensor, hidden_tensor), 0)
        
        hidden = self.i2h(combined)
        output = self.i2o(combined)
        output = self.tanh(output)
        return output, hidden
    
    def init_hidden(self):
        return torch.zeros(1, self.hidden_size)

# class RNN(self_made_RNN):
#     def __init__(self, input_size, output_size, hidden_size, num_layers):
#         super(RNN, self).__init__(input_size, hidden_size, output_size)
#         self.hidden_size = hidden_size
#         self.num_layers = num_layers
        
#         self.rnn = self_made_RNN.RNN(input_size = input_size, hidden_size = hidden_size, output_size = output_size)
#         self.linear = self_made_RNN.Linear(hidden_size, output_size)
    
#     def forward(self, x):
#         batch_size = x.size(0)
#         hidden = self.init_hidden(batch_size)
#         res, hidden = self.rnn(x, hidden)
#         res = self.linear(res)
#         return res[:, -1, :], hidden
        
#     def init_hidden(self, batch_size):
#         # This method generates the first hidden state of zeros which we'll use in the forward pass
#         # We'll send the tensor holding the hidden state to the device we specified earlier as well
#         hidden = torch.zeros(self.num_layers, batch_size, self.hidden_size)
#         return hidden

In [7]:
hidden_size = 16
num_layers = 1
learning_rate = 0.001
num_epochs = 100

In [8]:
train_dataset = SP500_Dataset(dataset = real_list, label_list = label_list)
train_loader = DataLoader(dataset = train_dataset, shuffle=False)

In [9]:
model = self_made_RNN(input_size, hidden_size, output_size)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()

for epoch in range(num_epochs):
    # make training sets in torch
    inputs = Variable(torch.from_numpy(real_list).float())
    hidden_tensor = model.init_hidden()
    labels = Variable(torch.from_numpy(label_list).float())
    
    # zero the parameter (weight) gradients
    optimizer.zero_grad()

    # Perform forward pass
    output, hidden = model(inputs,hidden_tensor)   
    
    # Compute loss
    loss = criterion(output.reshape(-1), labels.view(-1))
#     loss_all_1 += [loss.item()]
       
    # Backward pass
    loss.backward(retain_graph=True)
       
    #loss for validation
#     inputs_val = torch.from_numpy(X_val).float()
#     labels_val = torch.from_numpy(y_val).float()
#     output_val, hidden_val = rnn(inputs_val)
#     loss_val = criterion(output_val.reshape(-1), labels_val.view(-1))
#     loss_all_1_val += [loss_val.item()]
    
    if (epoch % 10 == 0 or epoch == num_epochs - 1):
        print("[%d/%d] Loss: %.4f" % (epoch, num_epochs, loss.item()))
    
    # Update parameters
    optimizer.step()                                

RuntimeError: Tensors must have same number of dimensions: got 3 and 2

In [None]:
prediction = scaler.inverse_transform(output.detach().numpy())
prediction

In [None]:
def calculate_prediction_error(predicted, actual):
    return abs(predicted - actual)/actual

In [None]:
actual = np.concatenate( (label_list[:, :, 0], label_list[:, :, 1], label_list[:, :, 2], label_list[:, :, 3]), axis = 1)
actual = scaler.inverse_transform(actual)
actual

In [None]:
error = calculate_prediction_error(prediction, actual)
error