In [337]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import pandas_ta as ta
import getData

## Getting stock price data

In [355]:
preprocess_param = {
    'win_size':22,
    'stride':1,
    'split':True,
    'number_y':1,
    'random_state':420,
    'test_size':0.2,
}

v_preprocess_param = {
    'win_size':22,
    'stride':1,
    'split':False,
    'number_y':1,
    'random_state':420,
}

In [356]:
tickers = 'BTC-USD'

prices_df = getData.loader(tickers=tickers, interval="1d", period='max', end="2023-01-01").dataframe
prices_df_val = getData.loader(tickers=tickers, interval="1d", start='2023-01-01').dataframe

datasets = getData.preprocessor(prices_df, preprocess_param=preprocess_param).dataset
val_sets = getData.preprocessor(prices_df_val, preprocess_param=v_preprocess_param).dataset

## Initialize Dataloader

In [357]:
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

class PriceHistoryDataset(Dataset):
    def __init__(self, dataset, to_predict=['Open', 'High', 'Low', 'Close']):
        y = dataset['y'][:,:,self.__map_to_indices(to_predict)]
        x = dataset['x']
        self.columns = dataset['columns']
        self.initial_price = dataset['initial price']
        self.current_date = dataset['current date']
        
        self.X = torch.from_numpy(x).float()
        self.y = torch.from_numpy(y).float()
        
    def __len__(self):
        return self.X.shape[0]
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]
    
    def __map_to_indices(self, args):
        mapping = {'Open': 0, 'High': 1, 'Low': 2, 'Close': 3}
        return [mapping[arg] for arg in args]

In [358]:
column_name = datasets['columns']

to_predict = ['Close']

train_set = PriceHistoryDataset(datasets['train'], to_predict)
test_set = PriceHistoryDataset(datasets['test'], to_predict)
val_set = PriceHistoryDataset(val_sets, to_predict)

train_loader= DataLoader(train_set, batch_size=256, shuffle=False)
test_loader = DataLoader(test_set, batch_size=256, shuffle=False)
val_loader = DataLoader(val_set, batch_size=256, shuffle=False)

## Train model

In [413]:
import torch.nn as nn
import pytorch_lightning as pl

class LSTMModel(pl.LightningModule):

    def __init__(self, hidden_size, lstm_layers, head_layers, input_size=8, output_size=3, dropout=0.05):
        super(LSTMModel, self).__init__()
        
        self.gru = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=lstm_layers)
        
        self.linears = nn.ModuleList([
            nn.Linear(hidden_size, hidden_size) for _ in range(head_layers-1)
        ])
        
        self.out_linear = nn.Linear(hidden_size, output_size)
        
        # keep track of losses function.
        self.train_losses = []
        self.test_losses = []
        self.loss_func = nn.L1Loss()
        
        
    def forward(self, x):
        lstm_out, _ = self.gru(x)
        o = lstm_out[:,-1:,:]
        
        for linear in self.linears:
            o = linear(o)
        
        output = self.out_linear(o)
        return output


    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.loss_func(y, y_hat)#.mean()
        self.train_losses.append(loss)
        return loss

    
    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.loss_func(y, y_hat)#.mean()
        self.test_losses.append(loss)
        return loss
    
    
    def on_test_epoch_end(self):
        avg_loss = torch.stack(self.test_losses).mean()
        print(f'Test Loss: {avg_loss}')
        return {'L1_loss': avg_loss}
    
    
    def on_train_epoch_end(self):
        avg_loss = torch.stack(self.train_losses).mean()
        print(f'Train Loss: {avg_loss}')
        return {'L1_loss': avg_loss}
    
    
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.01)


# Initialize the model and trainer
model = LSTMModel(output_size=len(to_predict), hidden_size=128, lstm_layers=5, head_layers=1, dropout=0.0)
# model = LSTMModel.load_from_checkpoint("/model/lightning_logs/vsrsion_.../checkpoints/....ckpt")

In [414]:
print("Number of parameters:", sum(p.numel() for p in model.parameters()))
print("Number of layers:", len(list(model.children())))

Number of parameters: 2178305
Number of layers: 4


In [415]:
# train the model
trainer = pl.Trainer(default_root_dir="model/", max_epochs=2000)

# Train the model
trainer.fit(model, train_loader, test_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type       | Params
------------------------------------------
0 | gru        | GRU        | 2.2 M 
1 | linears    | ModuleList | 0     
2 | out_linear | Linear     | 257   
3 | loss_func  | L1Loss     | 0     
------------------------------------------
2.2 M     Trainable params
0         Non-trainable params
2.2 M     Total params
8.713     Total estimated model params size (MB)


Training: |          | 0/? [00:00<?, ?it/s]

Train Loss: 1.317134141921997
Train Loss: 0.8205096125602722
Train Loss: 0.627629816532135
Train Loss: 0.5258581042289734
Train Loss: 0.460901141166687
Train Loss: 0.41622135043144226
Train Loss: 0.3842693269252777
Train Loss: 0.36048194766044617
Train Loss: 0.34196341037750244
Train Loss: 0.3271113932132721
Train Loss: 0.31503671407699585
Train Loss: 0.305474191904068
Train Loss: 0.2974328398704529
Train Loss: 0.2905840277671814
Train Loss: 0.28457939624786377
Train Loss: 0.2796282172203064
Train Loss: 0.27535876631736755
Train Loss: 0.2716987431049347
Train Loss: 0.2679847478866577
Train Loss: 0.2647670805454254
Train Loss: 0.2617131173610687
Train Loss: 0.25931400060653687
Train Loss: 0.25694510340690613
Train Loss: 0.2548977732658386
Train Loss: 0.2529011368751526
Train Loss: 0.251144140958786
Train Loss: 0.24951927363872528
Train Loss: 0.24806205928325653
Train Loss: 0.24649740755558014
Train Loss: 0.24514292180538177
Train Loss: 0.24391278624534607
Train Loss: 0.24285782873630524

`Trainer.fit` stopped: `max_epochs=3000` reached.


Train Loss: 0.20320424437522888


In [416]:
result = trainer.test(model=model, dataloaders=val_loader)
print(result)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

Test Loss: 0.13442549109458923
[{}]


## Use model to predict btc Price

In [417]:
x = val_loader.dataset.X
y = val_loader.dataset.y
ref = val_loader.dataset.initial_price
date = val_loader.dataset.current_date

with torch.no_grad():
    y_hat = model(x)
    
print({
    'predict':ref*(1+np.array(y_hat)), 
    'actual':ref*(1+np.array(y)), 
    'losses':nn.L1Loss()(y, y_hat),
    'date':date,
})


{'predict': array([[[37978.48024124, 37838.65525654, 37970.16131049, ...,
         47506.57934862, 47349.00813818, 47888.68482277]],

       [[34528.58326685, 34401.45973278, 34521.02001285, ...,
         43191.1669541 , 43047.90922958, 43538.56265284]],

       [[34528.58326685, 34401.45973278, 34521.02001285, ...,
         43191.1669541 , 43047.90922958, 43538.56265284]],

       ...,

       [[34528.58326685, 34401.45973278, 34521.02001285, ...,
         43191.1669541 , 43047.90922958, 43538.56265284]],

       [[34528.58326685, 34401.45973278, 34521.02001285, ...,
         43191.1669541 , 43047.90922958, 43538.56265284]],

       [[34528.59039921, 34401.46683887, 34521.02714363, ...,
         43191.17587582, 43047.91812171, 43538.57164633]]]), 'actual': array([[[26096.20569737, 26000.12756209, 26090.48949897, ...,
         32643.26162567, 32534.98950174, 32905.81828903]],

       [[26286.36310109, 26189.58486523, 26280.60525   , ...,
         32881.12600917, 32772.06492969, 33145.5

In [422]:
predict_out = np.multiply( (np.array(y_hat)+1).flatten(), ref)

In [423]:
import plotly.graph_objects as go

import pandas as pd
from datetime import datetime

df = prices_df_val.iloc[200:]

fig = go.Figure(data=[
        go.Candlestick(
            x=df['Date'],
            open=df['Open'],
            high=df['High'],
            low=df['Low'],
            close=df['Close'],
            name='Actual Price'
        ),
        go.Scatter(
            x=date,
            y=predict_out,
            line=dict(color='blue'),
            name='Predicted Price'
            )
    ]).update_layout(title_text=tickers+' price predictions', title_x=0.3)

fig.show()