In [337]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import pandas_ta as ta
import getData

## Getting stock price data

In [355]:
preprocess_param = {
    'win_size':22,
    'stride':1,
    'split':True,
    'number_y':1,
    'random_state':420,
    'test_size':0.2,
}

v_preprocess_param = {
    'win_size':22,
    'stride':1,
    'split':False,
    'number_y':1,
    'random_state':420,
}

In [356]:
tickers = 'BTC-USD'

prices_df = getData.loader(tickers=tickers, interval="1d", period='max', end="2023-01-01").dataframe
prices_df_val = getData.loader(tickers=tickers, interval="1d", start='2023-01-01').dataframe

datasets = getData.preprocessor(prices_df, preprocess_param=preprocess_param).dataset
val_sets = getData.preprocessor(prices_df_val, preprocess_param=v_preprocess_param).dataset

## Initialize Dataloader

In [357]:
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

class PriceHistoryDataset(Dataset):
    def __init__(self, dataset, to_predict=['Open', 'High', 'Low', 'Close']):
        y = dataset['y'][:,:,self.__map_to_indices(to_predict)]
        x = dataset['x']
        self.columns = dataset['columns']
        self.initial_price = dataset['initial price']
        self.current_date = dataset['current date']
        
        self.X = torch.from_numpy(x).float()
        self.y = torch.from_numpy(y).float()
        
    def __len__(self):
        return self.X.shape[0]
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]
    
    def __map_to_indices(self, args):
        mapping = {'Open': 0, 'High': 1, 'Low': 2, 'Close': 3}
        return [mapping[arg] for arg in args]

In [358]:
column_name = datasets['columns']

to_predict = ['Close']

train_set = PriceHistoryDataset(datasets['train'], to_predict)
test_set = PriceHistoryDataset(datasets['test'], to_predict)
val_set = PriceHistoryDataset(val_sets, to_predict)

train_loader= DataLoader(train_set, batch_size=256, shuffle=False)
test_loader = DataLoader(test_set, batch_size=256, shuffle=False)
val_loader = DataLoader(val_set, batch_size=256, shuffle=False)

## Train model

In [441]:
import torch.nn as nn
import pytorch_lightning as pl

class LSTMModel(pl.LightningModule):

    def __init__(self, hidden_size, lstm_layers, head_layers, input_size=8, output_size=3, dropout=0.05):
        super(LSTMModel, self).__init__()
        
        self.gru = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=lstm_layers)
        
        self.linears = nn.ModuleList([
            nn.Linear(hidden_size, hidden_size) for _ in range(head_layers-1)
        ])
        
        self.out_linear = nn.Linear(hidden_size, output_size)
        
        # keep track of losses function.
        self.train_losses = []
        self.test_losses = []
        self.loss_func = nn.L1Loss()
        
        
    def forward(self, x):
        lstm_out, _ = self.gru(x)
        o = lstm_out[:,-1:,:]
        
        for linear in self.linears:
            o = linear(o)
        
        output = self.out_linear(o)
        return output


    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.loss_func(y, y_hat)#.mean()
        self.train_losses.append(loss)
        return loss

    
    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.loss_func(y, y_hat)#.mean()
        self.test_losses.append(loss)
        return loss
    
    
    def on_test_epoch_end(self):
        avg_loss = torch.stack(self.test_losses).mean()
        print(f'Test Loss: {avg_loss}')
        return {'L1_loss': avg_loss}
    
    
    def on_train_epoch_end(self):
        avg_loss = torch.stack(self.train_losses).mean()
        print(f'Train Loss: {avg_loss}')
        return {'L1_loss': avg_loss}
    
    
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.01)


# Initialize the model and trainer
model = LSTMModel(output_size=len(to_predict), hidden_size=128, lstm_layers=5, head_layers=2, dropout=0.0)
# model = LSTMModel.load_from_checkpoint("/model/lightning_logs/vsrsion_.../checkpoints/....ckpt")

In [442]:
print("Number of parameters:", sum(p.numel() for p in model.parameters()))
print("Number of layers:", len(list(model.children())))

Number of parameters: 615681
Number of layers: 4


In [443]:
# train the model
trainer = pl.Trainer(default_root_dir="model/", max_epochs=2000)

# Train the model
trainer.fit(model, train_loader, test_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type       | Params
------------------------------------------
0 | gru        | LSTM       | 599 K 
1 | linears    | ModuleList | 16.5 K
2 | out_linear | Linear     | 129   
3 | loss_func  | L1Loss     | 0     
------------------------------------------
615 K     Trainable params
0         Non-trainable params
615 K     Total params
2.463     Total estimated model params size (MB)


Training: |          | 0/? [00:00<?, ?it/s]

Train Loss: 0.2592051029205322
Train Loss: 0.22667977213859558
Train Loss: 0.21543721854686737
Train Loss: 0.20782046020030975
Train Loss: 0.4430702328681946
Train Loss: 0.41358327865600586
Train Loss: 0.39541900157928467
Train Loss: 0.3890368938446045
Train Loss: 0.3690817952156067
Train Loss: 0.3517449200153351
Train Loss: 0.33669567108154297
Train Loss: 0.32104912400245667
Train Loss: 0.30618926882743835
Train Loss: 0.2905607521533966
Train Loss: 0.2756863534450531
Train Loss: 0.2618788182735443
Train Loss: 0.24914328753948212
Train Loss: 0.23775362968444824
Train Loss: 0.22763007879257202
Train Loss: 0.21848131716251373
Train Loss: 0.21010039746761322
Train Loss: 0.2022065371274948
Train Loss: 0.1951187700033188
Train Loss: 0.1889791488647461
Train Loss: 0.1838812679052353
Train Loss: 0.1786920577287674
Train Loss: 0.17354103922843933
Train Loss: 0.16871875524520874
Train Loss: 0.16410665214061737
Train Loss: 0.15977592766284943
Train Loss: 0.15570507943630219
Train Loss: 0.1518512

`Trainer.fit` stopped: `max_epochs=2000` reached.


In [444]:
result = trainer.test(model=model, dataloaders=val_loader)
print(result)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.



Testing: |          | 0/? [00:00<?, ?it/s]

Test Loss: 0.05884575843811035
[{}]


## Use model to predict btc Price

In [445]:
x = val_loader.dataset.X
y = val_loader.dataset.y
ref = val_loader.dataset.initial_price
date = val_loader.dataset.current_date

with torch.no_grad():
    y_hat = model(x)
    
predict_out = np.multiply( (np.array(y_hat)+1).flatten(), ref)    

print({
    'predict':ref*(1+np.array(y_hat)), 
    'actual':ref*(1+np.array(y)), 
    'losses':nn.L1Loss()(y, y_hat),
    'date':date,
})


{'predict': array([[[26192.94712865, 26096.51282143, 26187.2097397 , ...,
         32764.27369493, 32655.60019463, 33027.80368398]],

       [[27136.57148337, 27036.66304395, 27130.62739985, ...,
         33944.63596847, 33832.04740809, 34217.65986114]],

       [[26826.49428941, 26727.72745804, 26820.61812621, ...,
         33556.76613467, 33445.46407228, 33826.67030816]],

       ...,

       [[36171.37419829, 36038.20241754, 36163.45110197, ...,
         45246.10378258, 45096.03018354, 45610.02777323]],

       [[35872.89242461, 35740.81956118, 35865.03470872, ...,
         44872.73844581, 44723.90323583, 45233.65938003]],

       [[34833.28445095, 34705.03910162, 34825.65445419, ...,
         43572.31203647, 43427.7901188 , 43922.77336579]]]), 'actual': array([[[26096.20569737, 26000.12756209, 26090.48949897, ...,
         32643.26162567, 32534.98950174, 32905.81828903]],

       [[26286.36310109, 26189.58486523, 26280.60525   , ...,
         32881.12600917, 32772.06492969, 33145.5

In [447]:
import plotly.graph_objects as go

import pandas as pd
from datetime import datetime

df = prices_df_val.iloc[200:]

fig = go.Figure(data=[
        go.Candlestick(
            x=df['Date'],
            open=df['Open'],
            high=df['High'],
            low=df['Low'],
            close=df['Close'],
            name='Actual Price'
        ),
        go.Scatter(
            x=date,
            y=predict_out,
            line=dict(color='blue'),
            name='Predicted Price'
            )
    ]).update_layout(title_text=tickers+' price predictions', title_x=0.3)

fig.show()