In [5]:
#conda install pytorch torchvision torchaudio cudatoolkit=10.2 -c pytorch

import numpy as np
import pandas as pd

import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using {device} device')

Using cuda device


In [None]:
#attention network, can be used on a sequence of any legnth of word embeddings
class AttentionRnn(nn.Module):
    def __init__(self, dim_input, dim_output):
        super(AttentionRnn, self).__init__()
        self.attention = nn.RNN(dim_input, dim_output, batch_first = True)
        self.activation = nn.Sequential(nn.Sigmoid(),
            nn.Softmax(1)
        )
    def forward(self, x):
        weights, hidden = self.attention(x)
        weights = torch.squeeze(weights)
        weights = self.activation(weights)
        weighted_vector = torch.sum(x*weights[:,:,None],1)
        return weighted_vector



class NeuralNetwork(nn.Module):
    def __init__(self, k = 768):
        super(NeuralNetwork, self).__init__()
        self.k = k
        self.attention = AttentionRnn(768,1)
        self.trading_strategy = nn.Sequential(
            nn.Linear(self.k+5,1),
            nn.Tanh()
        )
    def forward(self, x):
        returns, topic_vectors = torch.tensor_split(x,[1], dim = 2) #split into word signals and price signals
        weighted_vector = self.attention(topic_vectors)
        #U,S,V = torch.pca_lowrank(weighted_vector,self.k)
        #weighted_vector = torch.matmul(weighted_vector, V[:, :self.k])
        returns = torch.squeeze(returns)
        x = torch.cat((returns, weighted_vector), dim = 1)
        return torch.squeeze(self.trading_strategy(x))

class LSTM(nn.Module):
    def __init__(self, num_layers=1):
        super(LSTM, self).__init__()
        self.lstm = nn.LSTM(768+1,1, num_layers)
        self.trading_strategy = nn.Tanh()
    def forward(self, x):
        output, hidden = self.lstm(x)
        output = torch.squeeze(output)
        output = self.trading_strategy(output)
        return output
    
    
    
#model = NeuralNetwork()
model = LSTM(5)

In [6]:
#import btc price history
btc_prices = pd.read_csv("btc_prices.csv", index_col = 0)
btc_prices.index = pd.to_datetime(btc_prices.index).strftime('%Y-%m-%d')
btc_prices['log_ret'] = np.log(btc_prices.Close/btc_prices.Close.shift(1))
btc_ret = btc_prices[["log_ret"]].dropna()

#get the right daterange
btc_ret_train = btc_ret[(btc_ret.index >= '2016-01-01') & (btc_ret.index < '2021-01-01')].sort_index()


In [4]:
btc_ret_train

Unnamed: 0_level_0,log_ret
Date,Unnamed: 1_level_1
2016-01-01,0.008711
2016-01-02,-0.002065
2016-01-03,-0.007938
2016-01-04,0.007137
2016-01-05,-0.002615
...,...
2020-12-27,-0.006251
2020-12-28,0.030458
2020-12-29,0.010198
2020-12-30,0.052625


In [7]:
topic_vectors = pd.read_csv("bitcoin/btc_2016.csv", index_col = 0)
for i in [2017,2018,2019,2020]:
    topic_vectors = topic_vectors.append(pd.read_csv(f"bitcoin/btc_{i}.csv", index_col = 0))
topic_vectors.index = pd.date_range('2016-01-01', periods=len(topic_vectors))
topic_vectors = btc_ret_train.join(topic_vectors, how= "left").dropna()

In [8]:
#get the time series nicely streamlined into the machine learning models
rolling_window = 5

X_data = np.empty([len(topic_vectors)-rolling_window,rolling_window,769])
y_series_data = np.empty([len(topic_vectors)-rolling_window,rolling_window])
for i in range(0,len(topic_vectors)-rolling_window):
    X_data[i] = np.array(topic_vectors.iloc[i:i+rolling_window,:])
    y_series_data[i] = np.array(topic_vectors.iloc[i+1:i+1+rolling_window,0])
y_data = np.array(topic_vectors.iloc[rolling_window:,0])

X = X_data[:-200]
X_tune = X_data[-200:-100]
X_test = X_data[-100:]


y = y_data[:-200]
y_tune = y_data[-200:-100]
y_test = y_data[-100:]

y_series = y_series_data[:-200]
y_series_tune = y_series_data[-200:-100]
y_series_test = y_series_data[-100:]

In [127]:
model_print = NeuralNetwork().to(device)
print(model_print)

NeuralNetwork(
  (attention): AttentionRnn(
    (attention): RNN(768, 1, batch_first=True)
    (activation): Sequential(
      (0): Sigmoid()
      (1): Softmax(dim=1)
    )
  )
  (trading_strategy): Linear(in_features=773, out_features=1, bias=True)
)


In [11]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [12]:
train_dataset = TensorDataset(torch.tensor(X).float(), torch.tensor(y_series).float())

In [13]:
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
#test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)

In [14]:
learning_rate = 1e-3
batch_size = 64
epochs = 500

loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)


for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    #test_loop(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 0.010295  [    0/ 1618]
Epoch 2
-------------------------------
loss: 0.010066  [    0/ 1618]
Epoch 3
-------------------------------
loss: 0.010838  [    0/ 1618]
Epoch 4
-------------------------------
loss: 0.009193  [    0/ 1618]
Epoch 5
-------------------------------
loss: 0.010151  [    0/ 1618]
Epoch 6
-------------------------------
loss: 0.011321  [    0/ 1618]
Epoch 7
-------------------------------
loss: 0.009777  [    0/ 1618]
Epoch 8
-------------------------------
loss: 0.009915  [    0/ 1618]
Epoch 9
-------------------------------
loss: 0.011708  [    0/ 1618]
Epoch 10
-------------------------------
loss: 0.009976  [    0/ 1618]
Epoch 11
-------------------------------
loss: 0.010426  [    0/ 1618]
Epoch 12
-------------------------------
loss: 0.009681  [    0/ 1618]
Epoch 13
-------------------------------
loss: 0.009803  [    0/ 1618]
Epoch 14
-------------------------------
loss: 0.009865  [    0/ 1618]
Epoch 15
------

Epoch 117
-------------------------------
loss: 0.009140  [    0/ 1618]
Epoch 118
-------------------------------
loss: 0.008991  [    0/ 1618]
Epoch 119
-------------------------------
loss: 0.008458  [    0/ 1618]
Epoch 120
-------------------------------
loss: 0.008300  [    0/ 1618]
Epoch 121
-------------------------------
loss: 0.010316  [    0/ 1618]
Epoch 122
-------------------------------
loss: 0.008458  [    0/ 1618]
Epoch 123
-------------------------------
loss: 0.008695  [    0/ 1618]
Epoch 124
-------------------------------
loss: 0.008091  [    0/ 1618]
Epoch 125
-------------------------------
loss: 0.008615  [    0/ 1618]
Epoch 126
-------------------------------
loss: 0.008997  [    0/ 1618]
Epoch 127
-------------------------------
loss: 0.010142  [    0/ 1618]
Epoch 128
-------------------------------
loss: 0.010806  [    0/ 1618]
Epoch 129
-------------------------------
loss: 0.008482  [    0/ 1618]
Epoch 130
-------------------------------
loss: 0.008106  [    0

Epoch 231
-------------------------------
loss: 0.007624  [    0/ 1618]
Epoch 232
-------------------------------
loss: 0.007378  [    0/ 1618]
Epoch 233
-------------------------------
loss: 0.008597  [    0/ 1618]
Epoch 234
-------------------------------
loss: 0.007109  [    0/ 1618]
Epoch 235
-------------------------------
loss: 0.008625  [    0/ 1618]
Epoch 236
-------------------------------
loss: 0.007857  [    0/ 1618]
Epoch 237
-------------------------------
loss: 0.007325  [    0/ 1618]
Epoch 238
-------------------------------
loss: 0.007841  [    0/ 1618]
Epoch 239
-------------------------------
loss: 0.007493  [    0/ 1618]
Epoch 240
-------------------------------
loss: 0.007380  [    0/ 1618]
Epoch 241
-------------------------------
loss: 0.008280  [    0/ 1618]
Epoch 242
-------------------------------
loss: 0.007148  [    0/ 1618]
Epoch 243
-------------------------------
loss: 0.006925  [    0/ 1618]
Epoch 244
-------------------------------
loss: 0.007454  [    0

Epoch 345
-------------------------------
loss: 0.006681  [    0/ 1618]
Epoch 346
-------------------------------
loss: 0.005749  [    0/ 1618]
Epoch 347
-------------------------------
loss: 0.006176  [    0/ 1618]
Epoch 348
-------------------------------
loss: 0.006441  [    0/ 1618]
Epoch 349
-------------------------------
loss: 0.006649  [    0/ 1618]
Epoch 350
-------------------------------
loss: 0.006359  [    0/ 1618]
Epoch 351
-------------------------------
loss: 0.007720  [    0/ 1618]
Epoch 352
-------------------------------
loss: 0.005965  [    0/ 1618]
Epoch 353
-------------------------------
loss: 0.005767  [    0/ 1618]
Epoch 354
-------------------------------
loss: 0.007009  [    0/ 1618]
Epoch 355
-------------------------------
loss: 0.006513  [    0/ 1618]
Epoch 356
-------------------------------
loss: 0.006005  [    0/ 1618]
Epoch 357
-------------------------------
loss: 0.006435  [    0/ 1618]
Epoch 358
-------------------------------
loss: 0.007027  [    0

Epoch 459
-------------------------------
loss: 0.005979  [    0/ 1618]
Epoch 460
-------------------------------
loss: 0.005714  [    0/ 1618]
Epoch 461
-------------------------------
loss: 0.005317  [    0/ 1618]
Epoch 462
-------------------------------
loss: 0.005806  [    0/ 1618]
Epoch 463
-------------------------------
loss: 0.005271  [    0/ 1618]
Epoch 464
-------------------------------
loss: 0.006763  [    0/ 1618]
Epoch 465
-------------------------------
loss: 0.005522  [    0/ 1618]
Epoch 466
-------------------------------
loss: 0.005209  [    0/ 1618]
Epoch 467
-------------------------------
loss: 0.006293  [    0/ 1618]
Epoch 468
-------------------------------
loss: 0.006560  [    0/ 1618]
Epoch 469
-------------------------------
loss: 0.005873  [    0/ 1618]
Epoch 470
-------------------------------
loss: 0.005565  [    0/ 1618]
Epoch 471
-------------------------------
loss: 0.005161  [    0/ 1618]
Epoch 472
-------------------------------
loss: 0.006933  [    0

KeyboardInterrupt: 

In [15]:
def accuracy(true, pred):
    temp = np.sign(true*pred)+1
    print(temp)
    return sum(temp)/len(temp)/2

In [16]:
x_test = torch.tensor(X).float()
y_value = y

In [28]:
x_test = torch.tensor(X_test).float()
y_value = y_test

In [24]:
pred =model(x_test).detach().numpy()
accuracy(y_value,pred)

ValueError: operands could not be broadcast together with shapes (1618,) (1618,5) 

In [29]:
pred = torch.squeeze(torch.tensor_split(model(x_test),[4], dim = 1)[1]).detach().numpy()
accuracy(y_value,pred)

[0. 0. 2. 0. 2. 0. 2. 2. 0. 2. 0. 0. 0. 0. 2. 2. 0. 2. 2. 2. 2. 2. 2. 0.
 2. 2. 2. 2. 2. 2. 0. 2. 0. 2. 2. 0. 2. 2. 2. 0. 0. 2. 2. 2. 0. 0. 2. 0.
 0. 2. 2. 2. 0. 0. 2. 2. 2. 2. 2. 2. 0. 0. 2. 0. 0. 0. 2. 2. 2. 0. 2. 2.
 0. 2. 2. 0. 0. 2. 0. 0. 2. 2. 2. 2. 2. 2. 2. 2. 0. 0. 2. 0. 2. 2. 2. 0.
 2. 2. 2. 2.]


0.63

In [30]:
torch.save(model.state_dict(), "LSTM_weights.pt")

In [315]:
model.load_state_dict(torch.load("LSTM_weights.pt"))

<All keys matched successfully>