In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import pandas_ta as ta
import getData

## Getting stock price data

In [2]:
preprocess_param = {
    'win_size':22,
    'stride':1,
    'split':True,
    'number_y':1,
    'random_state':420,
    'test_size':0.2,
}

v_preprocess_param = {
    'win_size':22,
    'stride':1,
    'split':False,
    'number_y':1,
    'random_state':420,
}

In [3]:
tickers = 'BTC-USD'

prices_df = getData.loader(tickers=tickers, interval="1d", period='max', end="2023-01-01").dataframe
prices_df_val = getData.loader(tickers=tickers, interval="1d", start='2023-01-01').dataframe

datasets = getData.preprocessor(prices_df, preprocess_param=preprocess_param).dataset
val_sets = getData.preprocessor(prices_df_val, preprocess_param=v_preprocess_param).dataset

## Initialize Dataloader

In [4]:
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

class PriceHistoryDataset(Dataset):
    def __init__(self, dataset, to_predict=['Open', 'High', 'Low', 'Close']):
        y = dataset['y'][:,:,self.__map_to_indices(to_predict)]
        x = dataset['x']
        self.columns = dataset['columns']
        self.initial_price = dataset['initial price']
        self.current_date = dataset['current date']
        
        self.X = torch.from_numpy(x).float()
        self.y = torch.from_numpy(y).float()
        
    def __len__(self):
        return self.X.shape[0]
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]
    
    def __map_to_indices(self, args):
        mapping = {'Open': 0, 'High': 1, 'Low': 2, 'Close': 3}
        return [mapping[arg] for arg in args]

In [5]:
column_name = datasets['columns']

to_predict = ['High', 'Low', 'Close']

train_set = PriceHistoryDataset(datasets['train'], to_predict)
test_set = PriceHistoryDataset(datasets['test'], to_predict)
val_set = PriceHistoryDataset(val_sets, to_predict)

train_loader= DataLoader(train_set, batch_size=256, shuffle=False)
test_loader = DataLoader(test_set, batch_size=256, shuffle=False)
val_loader = DataLoader(val_set, batch_size=256, shuffle=False)

## Train model

In [6]:
import torch.nn as nn
import pytorch_lightning as pl

class LSTMModel(pl.LightningModule):

    def __init__(self, hidden_size, lstm_layers, input_size=8, output_size=3, dropout=0.05):
        super(LSTMModel, self).__init__()
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=lstm_layers, dropout=dropout)
        
        self.out_linear = nn.Linear(hidden_size, output_size)
        
        # keep track of losses function.
        self.train_losses = []
        self.test_losses = []
        self.loss_func = nn.L1Loss()
        
        
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        lstm_out = lstm_out[:,-1:,:]
        
        output = self.out_linear(lstm_out)
        return output


    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.loss_func(y, y_hat)#.mean()
        self.train_losses.append(loss)
        return loss

    
    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.loss_func(y, y_hat)#.mean()
        self.test_losses.append(loss)
        return loss
    
    
    def on_test_epoch_end(self):
        avg_loss = torch.stack(self.test_losses).mean()
        print(f'Test Loss: {avg_loss}')
        return {'L1_loss': avg_loss}
    
    
    def on_train_epoch_end(self):
        avg_loss = torch.stack(self.train_losses).mean()
        print(f'Train Loss: {avg_loss}')
        return {'L1_loss': avg_loss}
    
    
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.02)


# Initialize the model and trainer
model = LSTMModel(output_size=len(to_predict), hidden_size=256, lstm_layers=6, dropout=0.0)

# checkpoint = torch.load("model\LSTM_BTC\checkpoints\epoch=999-step=9000.ckpt")
# model.load_state_dict(checkpoint['state_dict'])



In [7]:
print("Number of parameters:", sum(p.numel() for p in model.parameters()))
print("Number of layers:", len(list(model.children())))

Number of parameters: 2904835
Number of layers: 3


In [8]:
# train the model
trainer = pl.Trainer(default_root_dir="model/", max_epochs=2000)

# Train the model
trainer.fit(model, train_loader, test_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
c:\Users\dylan\AppData\Local\Programs\Python\Python310\lib\site-packages\pytorch_lightning\trainer\configuration_validator.py:72: You passed in a `val_dataloader` but have no `validation_step`. Skipping val loop.
You are using a CUDA device ('NVIDIA GeForce RTX 3080 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type   | Params
--------------------------------------
0 | lstm       | LSTM   | 2.9 M 
1 | out_linear | Linear | 771   
2 | loss_func  | L1Loss | 0     
--------------------------------------
2.9 M  

Training: |          | 0/? [00:00<?, ?it/s]

Train Loss: 0.8840090036392212
Train Loss: 0.5281427502632141
Train Loss: 0.40561819076538086
Train Loss: 0.34356728196144104
Train Loss: 0.3058818578720093
Train Loss: 0.28047439455986023
Train Loss: 0.2624101936817169
Train Loss: 0.24886280298233032
Train Loss: 0.23815207183361053
Train Loss: 0.22950105369091034
Train Loss: 0.2224210500717163
Train Loss: 0.21652688086032867
Train Loss: 0.21153706312179565
Train Loss: 0.20725451409816742
Train Loss: 0.20354041457176208
Train Loss: 0.20029059052467346
Train Loss: 0.19742253422737122
Train Loss: 0.19488702714443207
Train Loss: 0.192610964179039
Train Loss: 0.19056235253810883
Train Loss: 0.18871037662029266
Train Loss: 0.18702828884124756
Train Loss: 0.18549567461013794
Train Loss: 0.18409326672554016
Train Loss: 0.18280351161956787
Train Loss: 0.18161506950855255
Train Loss: 0.18051232397556305
Train Loss: 0.17948530614376068
Train Loss: 0.17852699756622314
Train Loss: 0.17763052880764008
Train Loss: 0.1767897605895996
Train Loss: 0.17

`Trainer.fit` stopped: `max_epochs=2000` reached.


Train Loss: 0.15208230912685394


In [9]:
result = trainer.test(model=model, dataloaders=val_loader)
print(result)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
c:\Users\dylan\AppData\Local\Programs\Python\Python310\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]

Test Loss: 0.09540335088968277
[{}]


## Use model to predict btc Price

In [27]:
x = val_loader.dataset.X
y = val_loader.dataset.y
ref = val_loader.dataset.initial_price
date = val_loader.dataset.current_date

with torch.no_grad():
    y_hat = model(x)
    
predict_out = np.multiply( (np.array(y_hat)+1)[:,0,:].T, ref)

print({
    'losses':nn.L1Loss()(y, y_hat),
})


{'losses': tensor(0.0954)}


In [31]:
import plotly.graph_objects as go

import pandas as pd
from datetime import datetime

df = prices_df_val.iloc[200:]

fig = go.Figure(data=[
        go.Candlestick(
            x=df['Date'],
            open=df['Open'],
            high=df['High'],
            low=df['Low'],
            close=df['Close'],
            name='Actual Price'
        ),
        go.Scatter(
            x=date,
            y=predict_out[0],
            line=dict(color='red'),
            name='Pred. High'
        ),
        go.Scatter(
            x=date,
            y=predict_out[1],
            line=dict(color='blue'),
            name='Pred. Low'
        ),
        go.Scatter(
            x=date,
            y=predict_out[2],
            line=dict(color='green'),
            name='Pred. Close'
        ),
    ]).update_layout(title_text=tickers+' price predictions', title_x=0.3)

fig.show()