In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import pandas_ta as ta
import getData

## Getting stock price data

In [2]:
tickers = 'BTC-USD'

prices_df = getData.loader(tickers=tickers, interval="1d", period='max', end="2023-01-01").dataframe
prices_df_val = getData.loader(tickers=tickers, interval="1d", start='2023-01-01').dataframe

## Initialize Dataloader

In [3]:
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader, random_split

class PriceHistoryDataset(Dataset):
    def __init__(self, df, preprocess_param):
        self.dataframe = df
        self.preprocess_param = preprocess_param
        
        preprocessor = getData.preprocessor(df, preprocess_param)
        self.X, self.y = preprocessor.dataset
        self.date = preprocessor.date
        
    def __len__(self):
        return self.X.shape[0]
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]
    

In [4]:
features_x = ['High_delta', 'Low_delta', 'Close_delta', 'RSI_14', 'WMA_100_delta', 'WMA_200_delta']
features_y = ['Close_delta']
number_y = 1
win_size = 7
test_size = 0.2

preprocess_param = {
    'win_size':win_size,
    'stride':1,
    'number_y': number_y,
    'features_x':features_x,
    'features_y':features_y,
    'convert_to_torch':True,
}

v_preprocess_param = {
    'win_size':win_size,
    'stride':1,
    'number_y': number_y,
    'features_x':features_x,
    'features_y':features_y,
    'convert_to_torch':True,
}

dataset = PriceHistoryDataset(prices_df, preprocess_param)
val_set = PriceHistoryDataset(prices_df_val,  v_preprocess_param)

# split train test set
train_set, test_set = random_split(dataset, lengths=[1-test_size,  test_size])

train_loader= DataLoader(train_set, batch_size=256, shuffle=False)
test_loader = DataLoader(test_set, batch_size=256, shuffle=False)
val_loader = DataLoader(val_set, batch_size=256, shuffle=False)

## Train model

In [15]:
import torch.nn as nn
import pytorch_lightning as pl

class LSTMModel(pl.LightningModule):

    def __init__(self, hidden_size, lstm_layers, input_size=8, output_size=3, dropout=0.05):
        super(LSTMModel, self).__init__()
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=lstm_layers, dropout=dropout)
        
        self.out_linear = nn.Linear(hidden_size, output_size)
        
        # keep track of losses function.
        self.train_losses = []
        self.test_losses = []
        self.loss_func = nn.L1Loss()
        
        
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        lstm_out = lstm_out[:,-1:,:]
        
        output = self.out_linear(lstm_out)
        return output


    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.loss_func(y, y_hat)#.mean()
        self.train_losses.append(loss)
        return loss

    
    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.loss_func(y, y_hat)#.mean()
        self.test_losses.append(loss)
        return loss
    
    
    def on_test_epoch_end(self):
        avg_loss = torch.stack(self.test_losses).mean()
        print(f'Test Loss: {avg_loss}')
        return {'L1_loss': avg_loss}
    
    
    def on_train_epoch_end(self):
        avg_loss = torch.stack(self.train_losses).mean()
        print(f'Train Loss: {avg_loss}')
        return {'L1_loss': avg_loss}
    
    
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.02)


# Initialize the model and trainer
model = LSTMModel(input_size=len(features_x), output_size=len(features_y), hidden_size=64, lstm_layers=6, dropout=0.0)

# checkpoint = torch.load("model\LSTM_BTC\checkpoints\epoch=999-step=9000.ckpt")
# model.load_state_dict(checkpoint['state_dict'])

In [16]:
print("Number of parameters:", sum(p.numel() for p in model.parameters()))
print("Number of layers:", len(list(model.children())))

Number of parameters: 184897
Number of layers: 3


In [17]:
# train the model
trainer = pl.Trainer(default_root_dir="model/", max_epochs=300)

# Train the model
trainer.fit(model, train_loader, test_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

You passed in a `val_dataloader` but have no `validation_step`. Skipping val loop.

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type   | Params
--------------------------------------
0 | lstm       | LSTM   | 184 K 
1 | out_linear | Linear | 65    
2 | loss_func  | L1Loss | 0     
--------------------------------------
184 K     Trainable params
0         Non-trainable params
184 K     Total params
0.740     Total estimated model params size (MB)



The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


The number of training batches (9) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.



Training: |          | 0/? [00:00<?, ?it/s]

Train Loss: 0.25461435317993164
Train Loss: 0.14376579225063324
Train Loss: 0.10498038679361343
Train Loss: 0.08528586477041245
Train Loss: 0.07319655269384384
Train Loss: 0.06513329595327377
Train Loss: 0.059354301542043686
Train Loss: 0.05500384420156479
Train Loss: 0.051624979823827744
Train Loss: 0.048915017396211624
Train Loss: 0.04670380428433418
Train Loss: 0.044851116836071014
Train Loss: 0.0432945191860199
Train Loss: 0.0419512577354908
Train Loss: 0.040791530162096024
Train Loss: 0.039778340607881546
Train Loss: 0.03888154774904251
Train Loss: 0.038084566593170166
Train Loss: 0.03736812621355057
Train Loss: 0.03672530874609947
Train Loss: 0.03614521771669388
Train Loss: 0.035613540560007095
Train Loss: 0.035128187388181686
Train Loss: 0.03468842804431915
Train Loss: 0.03427521511912346
Train Loss: 0.03389672935009003
Train Loss: 0.033545851707458496
Train Loss: 0.033214643597602844
Train Loss: 0.03291332349181175
Train Loss: 0.03262097388505936
Train Loss: 0.03235310316085815

`Trainer.fit` stopped: `max_epochs=300` reached.


Train Loss: 0.017815107479691505


In [18]:
result = trainer.test(model=model, dataloaders=val_loader)
print(result)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.



Testing: |          | 0/? [00:00<?, ?it/s]

Test Loss: 0.01959775574505329
[{}]


## Use model to predict btc Price

In [19]:
x = val_loader.dataset.X
y = val_loader.dataset.y
date = val_loader.dataset.date

with torch.no_grad():
    y_hat = model(x)

print({
    'losses':nn.L1Loss()(y, y_hat),
})

predict_out = pd.DataFrame({
    'Date':[d.iloc[-1] for d in date],
    'y hat':np.array(y_hat).reshape(-1),
})

df = val_loader.dataset.dataframe.iloc[206:]

source_price = df['Open'].to_numpy()
p = predict_out['y hat'].to_numpy()
predict_out['Predict Close'] = (source_price * p) + source_price

{'losses': tensor(0.0196)}


In [20]:
import plotly.graph_objects as go

import pandas as pd
from datetime import datetime

df = val_loader.dataset.dataframe.iloc[221:]

date = val_loader.dataset.date
y_hat = np.array(y_hat).reshape(-1)

fig = go.Figure(data=[
        go.Candlestick(
            x=df['Date'],
            open=df['Open'],
            high=df['High'],
            low=df['Low'],
            close=df['Close'],
            name='Actual Price'
        ),
        go.Scatter(
            x=df['Date'],
            y=df['Close'],
            line=dict(color='blue'),
            name='Actual Close'
        ),
        go.Scatter(
            x=predict_out['Date'],
            y=predict_out['Predict Close'],
            line=dict(color='red'),
            name='Pred. Close'
        ),
    ]).update_layout(title_text=tickers+' price predictions', title_x=0.3)

fig.show()