In [1]:
import torch
from torch import nn

from tqdm.notebook import tqdm

import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping, TQDMProgressBar
from pytorch_lightning.loggers import TensorBoardLogger, CSVLogger

# Not precize but faster
torch.set_float32_matmul_precision('medium')

In [2]:
from Data.DataModels.datasetFromSequences import SequenceDataset
from Data.DataModels.dataModule import DataModule
from Models.modelLSTM import LSTMModel
from Models.modelMAMBA import MAMBAModel
from Models.modelTransformer import TransformerModel
from Models.moduleAdamW import AdamWModule
from Data.preprocessing import PreprocessCSVtoSequences

In [3]:
pl.seed_everything(42)

Seed set to 42


42

# Hyperparameters

In [4]:
N_EPOCHS = 10
BATCH_SIZE = 64
LEARNING_RATE = 0.0001
SEQUENCE_LENGTH = 120

VAL_PERC = .1
TEST_PERC = .1
CSV_PATH = 'Data/CSV/Binance_BTCUSDT_2024_minute.csv'
DATE_COLUMN = 'Date'
COLUMN_TO_SHIFT = 'Close'

# Data

In [5]:
btc_data = PreprocessCSVtoSequences(
    csv_path = CSV_PATH,
    date_column = DATE_COLUMN ,
    column_to_shift = COLUMN_TO_SHIFT,
    use_scaler = True,
    sequence_length = SEQUENCE_LENGTH,
    val_percentage = VAL_PERC,
    test_percentage = TEST_PERC
)

btc_data.preprocess()

Getting the change of price in a day...


  0%|          | 0/201460 [00:00<?, ?it/s]

Transforming date column to day_of_week, day_of_month, week_of_year and year columns...


  0%|          | 0/201460 [00:00<?, ?it/s]

Current shape after transformation of date column:  (201460, 10)
Current train and test shapes:  (161168, 10) (20146, 10)
Creating sequences from pandas DataFrame...


  0%|          | 0/161048 [00:00<?, ?it/s]

  0%|          | 0/20026 [00:00<?, ?it/s]

  0%|          | 0/20026 [00:00<?, ?it/s]

Train shape:  (120, 10)
Val shape:  (120, 10)
Test shape:  (120, 10)


# PyTorch Dataset

In [6]:
data_module = DataModule(
    train_sequences = btc_data.train_sequences,
    val_sequences = btc_data.val_sequences,
    test_sequences = btc_data.test_sequences,
    dataset = SequenceDataset,
    batch_size = BATCH_SIZE
)
data_module.setup()

In [7]:
train_dataset = SequenceDataset(btc_data.train_sequences)

In [8]:
for item in train_dataset:
    print(item['sequence'].shape)
    print(item['label'].shape)
    print(item['label'])
    break

torch.Size([120, 10])
torch.Size([])
tensor(-0.7706)


# LSTM Model

In [9]:
module = AdamWModule(
    n_features = btc_data.train_sequences[0][0].shape[1],
    model = LSTMModel,
    criterion = nn.MSELoss(),
    learning_rate = LEARNING_RATE
)

/home/agata/.local/lib/python3.10/site-packages/pytorch_lightning/utilities/parsing.py:208: Attribute 'criterion' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['criterion'])`.


In [10]:
predictor_name = 'btc'
model_name = 'lstm'

checkpoint_callback = ModelCheckpoint(
    dirpath = f'Checkpoints/{predictor_name}-price/best_models',
    filename = f'{model_name}-{predictor_name}-checkpoint',
    save_top_k = 3,
    monitor = 'val_loss',
    mode = 'min'
)

tensorboard_logger = TensorBoardLogger(
    save_dir = f'Checkpoints/{predictor_name}-price/train_logs',
    name = f'{model_name}-{predictor_name}-logs'
)

csv_logger = CSVLogger(
    save_dir = f'Checkpoints/{predictor_name}-price/train_logs',
    name = f'{model_name}-{predictor_name}-logs'
)

early_stopping_callback = EarlyStopping(monitor = ' val_loss', patience = 2)

trainer = pl.Trainer(
    logger = [tensorboard_logger, csv_logger],
    callbacks=[checkpoint_callback, TQDMProgressBar(refresh_rate=30)],
    max_epochs = N_EPOCHS,
    accelerator='cuda',
    devices=1
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [11]:
trainer.fit(module, data_module)

2024-06-19 21:44:17.628425: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-06-19 21:44:18.038535: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
/home/agata/.local/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:652: Checkpoint directory /home/agata/Desktop/Faks/Strojno_i_dubinsko_ucenje/Zavrsni/Checkpoints/btc-price/best_models exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type      | Params | Mode 
------------------------------------------------
0 | model 

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


# Transformer Model

In [12]:
module = AdamWModule(
    n_features = btc_data.train_sequences[0][0].shape[1],
    model = TransformerModel,
    criterion = nn.MSELoss(),
    learning_rate = LEARNING_RATE
)

/home/agata/.local/lib/python3.10/site-packages/pytorch_lightning/utilities/parsing.py:208: Attribute 'criterion' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['criterion'])`.


In [13]:
predictor_name = 'btc'
model_name = 'transformer'

checkpoint_callback = ModelCheckpoint(
    dirpath = f'Checkpoints/{predictor_name}-price/best_models',
    filename = f'{model_name}-{predictor_name}-checkpoint',
    save_top_k = 3,
    monitor = 'val_loss',
    mode = 'min'
)

tensorboard_logger = TensorBoardLogger(
    save_dir = f'Checkpoints/{predictor_name}-price/train_logs',
    name = f'{model_name}-{predictor_name}-logs'
)

csv_logger = CSVLogger(
    save_dir = f'Checkpoints/{predictor_name}-price/train_logs',
    name = f'{model_name}-{predictor_name}-logs'
)

early_stopping_callback = EarlyStopping(monitor = ' val_loss', patience = 2)

trainer = pl.Trainer(
    logger = [tensorboard_logger, csv_logger],
    callbacks=[checkpoint_callback, TQDMProgressBar(refresh_rate=30)],
    max_epochs = N_EPOCHS,
    accelerator='cuda',
    devices=1
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [14]:
trainer.fit(module, data_module)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type             | Params | Mode 
-------------------------------------------------------
0 | model     | TransformerModel | 50.9 K | train
1 | criterion | MSELoss          | 0      | train
-------------------------------------------------------
50.9 K    Trainable params
0         Non-trainable params
50.9 K    Total params
0.203     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


# MAMBA Model

In [15]:
module = AdamWModule(
    n_features = btc_data.train_sequences[0][0].shape[1],
    model = MAMBAModel,
    criterion = nn.MSELoss(),
    learning_rate = LEARNING_RATE
)

In [16]:
predictor_name = 'btc'
model_name = 'mamba'

checkpoint_callback = ModelCheckpoint(
    dirpath = f'Checkpoints/{predictor_name}-price/best_models',
    filename = f'{model_name}-{predictor_name}-checkpoint',
    save_top_k = 3,
    monitor = 'val_loss',
    mode = 'min'
)

tensorboard_logger = TensorBoardLogger(
    save_dir = f'Checkpoints/{predictor_name}-price/train_logs',
    name = f'{model_name}-{predictor_name}-logs'
)

csv_logger = CSVLogger(
    save_dir = f'Checkpoints/{predictor_name}-price/train_logs',
    name = f'{model_name}-{predictor_name}-logs'
)

early_stopping_callback = EarlyStopping(monitor = ' val_loss', patience = 2)

trainer = pl.Trainer(
    logger = [tensorboard_logger, csv_logger],
    callbacks=[checkpoint_callback, TQDMProgressBar(refresh_rate=30)],
    max_epochs = N_EPOCHS,
    accelerator='cuda',
    devices=1
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [17]:
trainer.fit(module, data_module)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type       | Params | Mode 
-------------------------------------------------
0 | model     | MAMBAModel | 163 K  | train
1 | criterion | MSELoss    | 0      | train
-------------------------------------------------
163 K     Trainable params
0         Non-trainable params
163 K     Total params
0.652     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.
