In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import pandas_ta as ta
import getData

## Getting stock price data

In [2]:
features_x = ['High_delta', 'Low_delta', 'Close_delta', 'RSI_14', 'WMA_100_delta', 'WMA_200_delta']
features_y = ['Close_delta']
number_y = 1
random_state = 420
test_size = 0.2
win_size = 22

preprocess_param = {
    'win_size':win_size,
    'stride':1,
    'split':True,
    'test_size':test_size,
    'number_y': number_y,
    'random_state':random_state,
    'features_x':features_x,
    'features_y':features_y,
    'convert_to_torch':True,
}

v_preprocess_param = {
    'win_size':win_size,
    'stride':1,
    'split':False,
    'number_y': number_y,
    'random_state':random_state,
    'features_x':features_x,
    'features_y':features_y,
    'convert_to_torch':True,
}

In [3]:
tickers = 'BTC-USD'

prices_df = getData.loader(tickers=tickers, interval="1d", period='max', end="2023-01-01").dataframe
prices_df_val = getData.loader(tickers=tickers, interval="1d", start='2023-01-01').dataframe

datasets = getData.preprocessor(prices_df, preprocess_param=preprocess_param).dataset
val_sets = getData.preprocessor(prices_df_val, preprocess_param=v_preprocess_param).dataset

## Initialize Dataloader

In [24]:
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

class PriceHistoryDataset(Dataset):
    def __init__(self, dataset, features_x=['High_delta', 'Low_delta', 'Close_delta', 'RSI_14', 'WMA_100_delta', 'WMA_200_delta'], features_y=['Close_delta'], num_y=1):
        self.dataframes = dataset
        self.features_x = features_x
        self.features_y = features_y
        self.num_y = num_y
        
        self.X, self.y = self.preprocess(dataset)
        
    def __len__(self):
        return self.X.shape[0]
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]
    
    def preprocess(self, dataset):
        X, y = self.__split_X_y(dataset, num_y=self.num_y)
        X = self.__feature_select(X, self.features_x)
        y = self.__feature_select(y, self.features_y)
        X = self.__to_torch(X)
        y = self.__to_torch(y)
        return X, y
    
    def __split_X_y(self, dataset, num_y):
        X = [df.iloc[:-num_y] for df in dataset]
        y = [df.iloc[-num_y:] for df in dataset]
        return X, y
    
    def __feature_select(self, dataset, feature):
        return [df[feature] for df in dataset]
    
    def __to_torch(self, dataset):
        arr = np.stack([df.to_numpy() for df in dataset])
        return torch.from_numpy(arr).float()
    

In [25]:
features_x=['High_delta', 'Low_delta', 'Close_delta', 'RSI_14', 'WMA_100_delta', 'WMA_200_delta']
features_y=['Close_delta']
num_y=1

train_set = PriceHistoryDataset(datasets['train'], features_x, features_y, num_y)
test_set = PriceHistoryDataset(datasets['test'],  features_x, features_y, num_y)
val_set = PriceHistoryDataset(val_sets,  features_x, features_y, num_y)

train_loader= DataLoader(train_set, batch_size=256, shuffle=False)
test_loader = DataLoader(test_set, batch_size=256, shuffle=False)
val_loader = DataLoader(val_set, batch_size=256, shuffle=False)

## Train model

In [26]:
import torch.nn as nn
import pytorch_lightning as pl

class LSTMModel(pl.LightningModule):

    def __init__(self, hidden_size, lstm_layers, input_size=8, output_size=3, dropout=0.05):
        super(LSTMModel, self).__init__()
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=lstm_layers, dropout=dropout)
        
        self.out_linear = nn.Linear(hidden_size, output_size)
        
        # keep track of losses function.
        self.train_losses = []
        self.test_losses = []
        self.loss_func = nn.L1Loss()
        
        
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        lstm_out = lstm_out[:,-1:,:]
        
        output = self.out_linear(lstm_out)
        return output


    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.loss_func(y, y_hat)#.mean()
        self.train_losses.append(loss)
        return loss

    
    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.loss_func(y, y_hat)#.mean()
        self.test_losses.append(loss)
        return loss
    
    
    def on_test_epoch_end(self):
        avg_loss = torch.stack(self.test_losses).mean()
        print(f'Test Loss: {avg_loss}')
        return {'L1_loss': avg_loss}
    
    
    def on_train_epoch_end(self):
        avg_loss = torch.stack(self.train_losses).mean()
        print(f'Train Loss: {avg_loss}')
        return {'L1_loss': avg_loss}
    
    
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.02)


# Initialize the model and trainer
model = LSTMModel(input_size=len(features_x), output_size=len(features_y), hidden_size=256, lstm_layers=6, dropout=0.0)

# checkpoint = torch.load("model\LSTM_BTC\checkpoints\epoch=999-step=9000.ckpt")
# model.load_state_dict(checkpoint['state_dict'])

In [27]:
print("Number of parameters:", sum(p.numel() for p in model.parameters()))
print("Number of layers:", len(list(model.children())))

Number of parameters: 2904321
Number of layers: 3


In [29]:
# train the model
trainer = pl.Trainer(default_root_dir="model/", max_epochs=20)

# Train the model
trainer.fit(model, train_loader, test_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type   | Params
--------------------------------------
0 | lstm       | LSTM   | 2.9 M 
1 | out_linear | Linear | 257   
2 | loss_func  | L1Loss | 0     
--------------------------------------
2.9 M     Trainable params
0         Non-trainable params
2.9 M     Total params
11.617    Total estimated model params size (MB)


Training: |          | 0/? [00:00<?, ?it/s]

RuntimeError: input.size(-1) must be equal to input_size. Expected 8, got 6

In [None]:
result = trainer.test(model=model, dataloaders=val_loader)
print(result)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
c:\Users\dylan\AppData\Local\Programs\Python\Python310\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]

Test Loss: 0.09540335088968277
[{}]


## Use model to predict btc Price

In [None]:
x = val_loader.dataset.X
y = val_loader.dataset.y
ref = val_loader.dataset.initial_price
date = val_loader.dataset.current_date

with torch.no_grad():
    y_hat = model(x)
    
predict_out = np.multiply( (np.array(y_hat)+1)[:,0,:].T, ref)

print({
    'losses':nn.L1Loss()(y, y_hat),
})


{'losses': tensor(0.0954)}


In [None]:
import plotly.graph_objects as go

import pandas as pd
from datetime import datetime

df = prices_df_val.iloc[200:]

fig = go.Figure(data=[
        go.Candlestick(
            x=df['Date'],
            open=df['Open'],
            high=df['High'],
            low=df['Low'],
            close=df['Close'],
            name='Actual Price'
        ),
        go.Scatter(
            x=date,
            y=predict_out[0],
            line=dict(color='red'),
            name='Pred. High'
        ),
        go.Scatter(
            x=date,
            y=predict_out[1],
            line=dict(color='blue'),
            name='Pred. Low'
        ),
        go.Scatter(
            x=date,
            y=predict_out[2],
            line=dict(color='green'),
            name='Pred. Close'
        ),
    ]).update_layout(title_text=tickers+' price predictions', title_x=0.3)

fig.show()