In [1]:
import sys

sys.path.append('.')

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import torch
import torch.optim as optim
import numpy as np

np.set_printoptions(linewidth=np.inf, precision=2, floatmode='fixed')

In [16]:
from datamining.mine_json_data import mineData, preparePDData
from indicators.prepare_indicators import prepareIndicators
from predictions.prepare_model_data import prepareModelDataScaleX, prepareDataLoaders
from common.datasets import TimeseriesDataset
from common.models import LSTMLinear
from common.training import fit
from common.checkpoints import loadLSTMLinearFromCheckpoint
from common.losses import R2Loss
from common.plots import printScores
from common.training import getMSEScore, getR2Score


In [9]:
# mineData("../data/raw", "binance", "BTCUSDT", "1m")
df = preparePDData("../data/raw/binance_BTCUSDT_1m.json")
print(f"df.value_counts: {df.count()}")

df.value_counts: timestamp_open              3083787
price_open                  3083787
price_high                  3083787
price_low                   3083787
price_close                 3083787
volume                      3083787
timestamp_close             3083787
asset_volume_quote          3083787
trades_number               3083787
asset_volume_taker_base     3083787
asset_volume_taker_quote    3083787
dtype: int64


In [63]:

df_1h = preparePDData("../data/raw/binance_BTCUSDT_1h.json")
prepareIndicators(df_1h, 500)
df_1h[["trend_direction", "trend_direction_change", "trend_direction_consecutive", "trend_direction_swing", 'rsi', '24h_price_diff']]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[trend_name_change].iloc[i] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[trend_name_consecutive].iloc[i] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[trend_name].iloc[i] = df[trend_name].iloc[i] / trend_change_amount
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[trend_name_consecutive

Unnamed: 0,trend_direction,trend_direction_change,trend_direction_consecutive,trend_direction_swing
0,0.09470,1,0,0.10460
1,0.01298,0,1,0.07464
2,-0.01188,1,0,0.07216
3,0.06674,1,0,0.12516
4,0.05474,0,1,0.08906
...,...,...,...,...
51407,0.08472,0,3,0.15854
51408,-0.04618,1,0,0.13020
51409,-0.09070,0,1,0.15286
51410,-0.05428,0,2,0.18892


In [85]:
column_y = "trend_direction_change"
# column_names = ["price_open", "price_close", "price_high", "price_low", "volume", "trend_direction", "trend_direction_change", "trend_direction_consecutive", "trend_direction_swing"]
column_names = ["price_open", "price_close", "price_high", "price_low", "volume", "trend_direction", "trend_direction_change", "trend_direction_consecutive", "trend_direction_swing", "rsi", "24h_price_diff"]
model_data = prepareModelDataScaleX(df_1h, column_y=column_y, column_names=column_names)
train_dl, test_dl, val_dl = prepareDataLoaders(model_data)

In [112]:
num_epochs = 250
learning_rate = 0.1
weight_decay = 1e-2
patience = 15
model_args = {
    'input_size': len(column_names), 
    'hidden_size': 20, 
    'num_layers': 3, 
    'output_size': 1, 
    'scaler': model_data["ss"]
}

In [113]:
model = LSTMLinear(**model_args)

optimizer_args = { 'params': list(model.parameters()), 'lr': learning_rate, 'weight_decay': weight_decay }
optimizer = optim.AdamW(**optimizer_args)
loss_fn = torch.nn.BCELoss()

filename = "BTCUSDT_1h_trend"

fit(
    model=model, optimizer=optimizer, loss_fn=loss_fn, 
    train_dl=train_dl, val_dl=val_dl, epochs=num_epochs,
    patience=patience,
    save_checkpoints=True, model_args=model_args, optimizer_args=optimizer_args,
    checkpoint_path= f'../data/checkpoints/checkpoint_{filename}.pkl',
    checkpoint_f1_path= f'../data/checkpoints/checkpoint_{filename}_f1.pkl'
)
print('done')

Epoch    1/250 = train loss: 0.7078, train ACC: 0.5400, val ACC: 0.5443, time_taken: 0.09406709671020508s
Epoch    2/250 = train loss: 0.6926, train ACC: 0.5400, val ACC: 0.5443, time_taken: 0.07574605941772461s
Epoch    3/250 = train loss: 0.7022, train ACC: 0.5400, val ACC: 0.5443, time_taken: 0.07775425910949707s
Epoch    4/250 = train loss: 0.6907, train ACC: 0.5400, val ACC: 0.5443, time_taken: 0.0753178596496582s
Epoch    5/250 = train loss: 0.6901, train ACC: 0.5400, val ACC: 0.5443, time_taken: 0.07761526107788086s
Epoch    6/250 = train loss: 0.6905, train ACC: 0.5400, val ACC: 0.5443, time_taken: 0.07408714294433594s
Epoch    7/250 = train loss: 0.6903, train ACC: 0.5400, val ACC: 0.5443, time_taken: 0.07557487487792969s
Epoch    8/250 = train loss: 0.6901, train ACC: 0.5400, val ACC: 0.5443, time_taken: 0.07676386833190918s
Epoch    9/250 = train loss: 0.6899, train ACC: 0.5400, val ACC: 0.5443, time_taken: 0.07601475715637207s
Epoch   10/250 = train loss: 0.6898, train ACC:

In [114]:
best_model = loadLSTMLinearFromCheckpoint(f'../data/checkpoints/checkpoint_{filename}.pkl')
best_model_f1 = loadLSTMLinearFromCheckpoint(f'../data/checkpoints/checkpoint_{filename}_f1.pkl')

printScores(test_dl, val_dl, best_model)
printScores(test_dl, val_dl, best_model_f1)

test R2: -0.0038, val R2: -0.0025
test R2: 0.0003, val R2: -0.0030
