In [1]:
import numpy as np 
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets

In [2]:
# NOTE current order of orderbook is ask1 ,bid1, ask2, bid2, which performs better than the ragular form.
# 股票预测很容易被local minimum卡住，需要更小的学习率

In [3]:
# X shape = ([price, volume], N, time_span, level)
# y shape = (N) binary label
i      = 5
stock  = 600030
Xtrain = np.load(f'dataset/{stock}-{i}.train.X.npy')
Ytrain = np.load(f'dataset/{stock}-{i}.train.y.npy')
Xtest  = np.load(f'dataset/{stock}-{i}.test.X.npy')
Ytest  = np.load(f'dataset/{stock}-{i}.test.y.npy')
Xtrain = np.log10(Xtrain + 1)
Xtest  = np.log10(Xtest + 1)
Xtrain.shape, Ytrain.shape, Xtest.shape, Ytest.shape

((2, 54890, 20, 20), (54890,), (2, 13908, 20, 20), (13908,))

In [4]:
Xtrain = torch.from_numpy(Xtrain).float().permute(1, 0, 2, 3)
Ytrain = torch.from_numpy(Ytrain).long()
Xtest  = torch.from_numpy(Xtest).float().permute(1, 0, 2, 3)
Ytest  = torch.from_numpy(Ytest).long()


In [5]:
trainset = torch.utils.data.TensorDataset(Xtrain, Ytrain)
testset = torch.utils.data.TensorDataset(Xtest, Ytest)

# Create data loaders.
batch_size = 128
train_dataloader = DataLoader(trainset, batch_size=batch_size)
test_dataloader = DataLoader(testset, batch_size=batch_size)

In [6]:
class LSTM(nn.Module):
    
    def __init__(self, input_size:int, output_size:int, hidden_size=64,
                 num_layers=1, dropout=0):
        super().__init__()
        self.linear  = nn.Linear(hidden_size * 2, output_size)
        self.lstm1_1 = nn.LSTM(input_size, hidden_size, num_layers,
                               dropout=dropout, batch_first=True)
        self.lstm1_2 = nn.LSTM(input_size, hidden_size, num_layers,
                               dropout=dropout, batch_first=True)
        self.__device = device
          
    def forward(self, x):
        x1, x2 = x[:,0], x[:,1]
        x1, _ = self.lstm1_1(x1)
        x1 = x1[:,-1,:]
        x2, _ = self.lstm1_2(x2)
        x2 = x2[:,-1,:]
        x = torch.cat([x1, x2], dim=1)
        x = F.relu(x)
        x = self.linear(x)
        return F.softmax(x, dim=1)

In [7]:
# device = "cuda" if torch.cuda.is_available() else "cpu"
device = 'cpu'
print(f"Using {device} device")

torch.manual_seed(0)
model = LSTM(input_size=20, output_size=2).to(device)
print(model)

Using cpu device
LSTM(
  (linear): Linear(in_features=128, out_features=2, bias=True)
  (lstm1_1): LSTM(20, 64, batch_first=True)
  (lstm1_2): LSTM(20, 64, batch_first=True)
)


In [8]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [9]:
def test(dataloader, model):
    n = len(dataloader.dataset)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= n
    correct /= n
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [10]:
epochs = 100
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=5e-5)

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    print("train ", end ='')
    test(train_dataloader, model)
    test(test_dataloader, model)
print("Done!")

acy: 63.4%, Avg loss: 0.005323 

Epoch 29
-------------------------------
loss: 0.765420  [    0/54890]
loss: 0.661468  [12800/54890]
loss: 0.665467  [25600/54890]
loss: 0.692176  [38400/54890]
loss: 0.688960  [51200/54890]
train Test Error: 
 Accuracy: 58.0%, Avg loss: 0.005360 

Test Error: 
 Accuracy: 63.4%, Avg loss: 0.005321 

Epoch 30
-------------------------------
loss: 0.767272  [    0/54890]
loss: 0.660703  [12800/54890]
loss: 0.664846  [25600/54890]
loss: 0.692236  [38400/54890]
loss: 0.688901  [51200/54890]
train Test Error: 
 Accuracy: 58.0%, Avg loss: 0.005359 

Test Error: 
 Accuracy: 63.4%, Avg loss: 0.005318 

Epoch 31
-------------------------------
loss: 0.769076  [    0/54890]
loss: 0.659966  [12800/54890]
loss: 0.664245  [25600/54890]
loss: 0.692299  [38400/54890]
loss: 0.688848  [51200/54890]
train Test Error: 
 Accuracy: 58.0%, Avg loss: 0.005357 

Test Error: 
 Accuracy: 63.4%, Avg loss: 0.005315 

Epoch 32
-------------------------------
loss: 0.770836  [    0/

KeyboardInterrupt: 