In [50]:
import yfinance as yf
import numpy as np
import torch

from torch import nn

In [70]:
from datetime import datetime
from sklearn.model_selection import train_test_split

In [71]:
START = datetime(1990,1,1)
END = datetime(2020,6,5)

UVXY = yf.Ticker('UVXY')
hist = UVXY.history(start=START, end=END)
hist.drop(['Volume', 'Dividends', 'Stock Splits'], axis=1, inplace=True)

In [72]:
def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
        end_ix = i + n_steps
        if end_ix > len(sequence):
            break
        seq_x, seq_y = sequence[i:end_ix, :-1], sequence[end_ix-1, -1]
        X.append(seq_x)
        y.append(seq_y)
    
    return np.array(X), np.array(y)

In [73]:
hist['RET'] = hist['Close'].pct_change()
hist.dropna(inplace=True)
hist['y'] = hist['RET'].rolling(10).sum()
hist['y_binary'] = (hist['y'] > 0).astype(int)
hist['y'] = hist['y'].shift(-9)
hist['y_binary'] = hist['y_binary'].shift(-9)
hist.dropna(inplace=True)
hist

Unnamed: 0_level_0,Open,High,Low,Close,RET,y,y_binary
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2011-10-05,1.974600e+08,1.989000e+08,1.813800e+08,1.813800e+08,-0.118659,-0.405426,0.0
2011-10-06,1.811400e+08,1.878000e+08,1.740000e+08,1.740000e+08,-0.040688,-0.175707,0.0
2011-10-07,1.705800e+08,1.857600e+08,1.684800e+08,1.765200e+08,0.014483,-0.133360,0.0
2011-10-10,1.654800e+08,1.656000e+08,1.559400e+08,1.559400e+08,-0.116587,-0.249292,0.0
2011-10-11,1.570200e+08,1.572600e+08,1.491000e+08,1.506000e+08,-0.034244,-0.254364,0.0
...,...,...,...,...,...,...,...
2020-05-15,4.358000e+01,4.461000e+01,3.919000e+01,3.923000e+01,-0.037301,-0.176722,0.0
2020-05-18,3.529000e+01,3.643000e+01,3.466000e+01,3.544000e+01,-0.096610,-0.131081,0.0
2020-05-19,3.565000e+01,3.817000e+01,3.449000e+01,3.800000e+01,0.072235,-0.074649,0.0
2020-05-20,3.539000e+01,3.656000e+01,3.435000e+01,3.450000e+01,-0.092105,-0.211211,0.0


In [74]:
in_seq1 = np.array(hist['Open'].values.reshape(-1,1))
in_seq2 = np.array(hist['High'].values.reshape(-1,1))
in_seq3 = np.array(hist['Low'].values.reshape(-1,1))
in_seq4 = np.array(hist['Close'].values.reshape(-1,1))


In [75]:
out_seq = np.array(hist['y'].values.reshape(-1,1))
out_seq_bin = np.array(hist['y_binary'].values.reshape(-1,1))

In [76]:
dataset = np.hstack((in_seq1, in_seq2, in_seq3, in_seq4, out_seq))
dataset_binary = np.hstack((in_seq1, in_seq2, in_seq3, in_seq4, out_seq_bin))

In [77]:
class MV_LSTM(nn.Module):
    def __init__(self, n_features, seq_length, hidden_dim=20, num_layers=1):
        super(MV_LSTM, self).__init__()
        self.n_features = n_features
        self.seq_length = seq_length
        self.n_hidden = hidden_dim
        self.n_layers = num_layers

        self.lstm = nn.LSTM(input_size=self.n_features,
                            hidden_size=self.n_hidden,
                            num_layers=self.n_layers,
                            batch_first=True)
        self.linear = nn.Linear(self.n_hidden*self.seq_length, 1)

    def init_hidden(self, batch_size):
        hidden_state = torch.zeros(self.n_layers, batch_size, self.n_hidden)
        cell_state = torch.zeros(self.n_layers, batch_size, self.n_hidden)
        self.hidden = (hidden_state, cell_state)

    def forward(self, x):
        batch_size, seq_length, _ = x.size()

        lstm_out, self.hidden = self.lstm(x, self.hidden)
        x = lstm_out.contiguous().view(batch_size, -1)
        return self.linear(x)
        

In [78]:
MV_LSTM(4, 10, 20, 1)

MV_LSTM(
  (lstm): LSTM(4, 20, batch_first=True)
  (linear): Linear(in_features=200, out_features=1, bias=True)
)

In [80]:
n_features = 4
n_timesteps = 30

X, y = split_sequence(dataset, n_timesteps)
train_X, train_y, test_x, test_y = train_test_split(X, y, test_size=0.2, random_state=42)
print(train_X.shape, train_y.shape)

(1713, 30, 4)(429, 30, 4)


In [68]:
mv_net = MV_LSTM(n_features, n_timesteps)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(mv_net.parameters(), lr=1e-3)

train_episides = 500
batch_size = 16

In [69]:
mv_net.train()

for t in range(train_episides):
    for b in range(0, len(train_X), batch_size):
        optimizer.zero_grad()
        inpt = train_X[b:b+batch_size, :, :]
        target = train_y[b:b+batch_size]

        x_batch = torch.tensor(inpt, dtype=torch.float32)
        y_batch = torch.tensor(target, dtype=torch.float32)

        mv_net.init_hidden(x_batch.size(0))
        output = mv_net(x_batch)

        loss_train = criterion(output.view(-1), y_batch)
        loss_train.backward()

        optimizer.step()

    
    print("Step: () Train Loss: () Val Loss: ()".format(t, round(loss_train.item(), 4), round(loss_val.item(), 4))



step:0loss:0.008446290157735348
step:1loss:0.048511676490306854
step:2loss:0.09100346267223358
step:3loss:0.008460832759737968
step:4loss:0.030830413103103638
step:5loss:0.009046810679137707
step:6loss:0.03293280303478241
step:7loss:0.0054622734896838665
step:8loss:0.049574293196201324
step:9loss:0.017918260768055916
step:10loss:0.01324972789734602
step:11loss:0.06455705314874649
step:12loss:0.006266560405492783
step:13loss:0.03446706756949425
step:14loss:0.016926784068346024
step:15loss:0.0057577816769480705
step:16loss:0.025672750547528267
step:17loss:0.03512733802199364
step:18loss:0.033383194357156754
step:19loss:0.0104270800948143
step:20loss:0.034760456532239914
step:21loss:0.02677108719944954
step:22loss:0.0056812032125890255
step:23loss:0.03612607344985008
step:24loss:0.043954525142908096
step:25loss:0.009082377888262272
step:26loss:0.03563332185149193
step:27loss:0.0068196398206055164
step:28loss:0.05103341117501259
step:29loss:0.018935034051537514
step:30loss:0.01299994159489

KeyboardInterrupt: 

In [56]:
hist

Unnamed: 0_level_0,Open,High,Low,Close,RET,y,y_binary
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2011-10-05,1.974600e+08,1.989000e+08,1.813800e+08,1.813800e+08,-0.118659,-0.405426,0.0
2011-10-06,1.811400e+08,1.878000e+08,1.740000e+08,1.740000e+08,-0.040688,-0.175707,0.0
2011-10-07,1.705800e+08,1.857600e+08,1.684800e+08,1.765200e+08,0.014483,-0.133360,0.0
2011-10-10,1.654800e+08,1.656000e+08,1.559400e+08,1.559400e+08,-0.116587,-0.249292,0.0
2011-10-11,1.570200e+08,1.572600e+08,1.491000e+08,1.506000e+08,-0.034244,-0.254364,0.0
...,...,...,...,...,...,...,...
2020-05-15,4.358000e+01,4.461000e+01,3.919000e+01,3.923000e+01,-0.037301,-0.176722,0.0
2020-05-18,3.529000e+01,3.643000e+01,3.466000e+01,3.544000e+01,-0.096610,-0.131081,0.0
2020-05-19,3.565000e+01,3.817000e+01,3.449000e+01,3.800000e+01,0.072235,-0.074649,0.0
2020-05-20,3.539000e+01,3.656000e+01,3.435000e+01,3.450000e+01,-0.092105,-0.211211,0.0


In [None]:
f