## Import Libs

In [1]:
# import os
import pandas as pd
import numpy as np
import datetime as dt
import logging
import torch
import torch.nn as nn
import matplotlib.pyplot as plt

from torch import optim
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

from sklearn.preprocessing import MinMaxScaler
from utils import SklearnWrapper

In [2]:
from config import *
from entities import *
from components import *
from strategies import *
from datasets import *

### Setting 

In [3]:
logging.basicConfig(level=logging.DEBUG)
logging.getLogger('matplotlib').setLevel(logging.WARNING)

In [4]:
symbols = ['AAPL', 'TSLA', 'MSFT']
basket = Basket(symbols=symbols)
basket.load_all_assets(freq="1d")

DEBUG:entities.basket:Initialized Asset Basket: ['AAPL', 'TSLA', 'MSFT'] with 0 assets which loaded.
INFO:entities.basket:Starting batch load for 3 symbols...
DEBUG:entities.basket:Attempting to load AAPL...
DEBUG:entities.asset:Initialized Asset: AAPL with 2724 rows.
INFO:entities.basket:Successfully loaded AAPL (2724 rows).
DEBUG:entities.basket:Attempting to load TSLA...
DEBUG:entities.asset:Initialized Asset: TSLA with 2760 rows.
INFO:entities.basket:Successfully loaded TSLA (2760 rows).
DEBUG:entities.basket:Attempting to load MSFT...
DEBUG:entities.asset:Initialized Asset: MSFT with 2724 rows.
INFO:entities.basket:Successfully loaded MSFT (2724 rows).
INFO:entities.basket:Batch load complete. Success: 3/3. Total assets in basket: 3


In [5]:
targets = ["Close", "High"]
for symbol, asset in basket.assets.items():
    asset.to_returns(log=True, columns=targets)

asset.data.head(5)

DEBUG:entities.asset:AAPL converted to Returns (log=True)
DEBUG:entities.asset:TSLA converted to Returns (log=True)
DEBUG:entities.asset:MSFT converted to Returns (log=True)


Unnamed: 0_level_0,Low,Open,Volume,Close (Log_Returns),High (Log_Returns)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-01-05,39.49752,39.599999,39673900,-0.009238,-0.014658
2015-01-06,38.891163,39.608523,36447900,-0.014787,0.000427
2015-01-07,38.848476,39.266934,29114100,0.012626,-0.006222
2015-01-08,39.898901,39.92452,29645200,0.028994,0.027387
2015-01-09,40.052616,40.658956,23944200,-0.008441,0.001465


In [6]:
strategy = IntersectionStrategy()
joint_df = basket.align(strategy)
joint_df.head()

DEBUG:strategies.concrete:Intersection Strategy: Aligned 3 assets. Common rows: 2723
DEBUG:entities.basket:Aligned data shape: (2723, 15)
INFO:entities.basket:Assets updated in-place to aligned index (Length: 2723)


Unnamed: 0_level_0,AAPL,AAPL,AAPL,AAPL,AAPL,TSLA,TSLA,TSLA,TSLA,TSLA,MSFT,MSFT,MSFT,MSFT,MSFT
Unnamed: 0_level_1,Low,Open,Volume,Close (Log_Returns),High (Log_Returns),Low,Open,Volume,Close (Log_Returns),High (Log_Returns),Low,Open,Volume,Close (Log_Returns),High (Log_Returns)
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
2015-01-05,23.391173,24.030263,257142000,-0.028576,-0.025355,13.810667,14.303333,80527500,-0.04295,-0.030702,39.49752,39.599999,39673900,-0.009238,-0.014658
2015-01-06,23.218085,23.641928,263188400,9.4e-05,-0.011292,13.614,14.004,93928500,0.005648,-0.01068,38.891163,39.608523,36447900,-0.014787,0.000427
2015-01-07,23.67743,23.788384,160423600,0.013925,0.007142,13.985333,14.223333,44526000,-0.001563,0.002704,38.848476,39.266934,29114100,0.012626,-0.006222
2015-01-08,24.121236,24.238848,237458000,0.037702,0.035856,14.000667,14.187333,51637500,-0.001566,-0.004573,39.898901,39.92452,29645200,0.028994,0.027387
2015-01-09,24.45632,25.00221,214798000,0.001072,0.009761,13.664,13.928,70024500,-0.018981,-0.018029,40.052616,40.658956,23944200,-0.008441,0.001465


In [7]:
features = ["Close (Log_Returns)", "High (Log_Returns)"]
basket_tensor = basket.to_tensor(features=features)
basket_tensor.shape

DEBUG:entities.asset:Asset: AAPL is using cuda device.
DEBUG:entities.asset:Asset: TSLA is using cuda device.
DEBUG:entities.asset:Asset: MSFT is using cuda device.


torch.Size([2723, 3, 2])

In [8]:
window = RollingWindow(size=64, stride=1)
market = Market(basket, window)
market_tensor = market.setup(features=features)

INFO:entities.market:Setting up Market environment
DEBUG:entities.asset:Asset: AAPL is using cuda device.
DEBUG:entities.asset:Asset: TSLA is using cuda device.
DEBUG:entities.asset:Asset: MSFT is using cuda device.
DEBUG:components.windowing:RollingWindow applied. Result: torch.Size([2660, 64, 3, 2])
INFO:entities.market:Market Setup Complete. Batch Shape: torch.Size([2660, 64, 3, 2]) [B, L, N, F]


In [9]:
market_tensor.shape

torch.Size([2660, 64, 3, 2])

In [10]:
sklearn_scaler = MinMaxScaler(feature_range=(-1, 1))
scaler = SklearnWrapper(sklearn_scaler)

scaler.fit(market_tensor)
norm_data = scaler.encode(market_tensor)

print(f"Norm data shape: {market_tensor.shape}")
print(f"Data {market_tensor[0,0,0,:]}")

DEBUG:utils.scaler:Scaler Fitted using: MinMaxScaler


Norm data shape: torch.Size([2660, 64, 3, 2])
Data tensor([-0.0286, -0.0254], device='cuda:0')


In [11]:
modes = ["exhaustive", "random"]
market_ds = MarketDataset(market_tensor, mode='random')
ratios = [0.8, 0.1, 0.1]
train_ds, val_ds, test_ds = create_randomize_datasets(market_ds, ratios)

print(f"Train samples: {len(train_ds)}")
print(f"Val samples: {len(val_ds)}") 
print(f"Test samples: {len(test_ds)}")

Train samples: 2128
Val samples: 266
Test samples: 266


In [12]:
train_loader = DataLoader(train_ds, batch_size=32, shuffle=False)
val_loader = DataLoader(val_ds, batch_size=32, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=32, shuffle=True)

In [13]:
x = next(iter(train_loader))
x["target"].shape

torch.Size([32, 64, 2])

In [14]:
x["context"].shape

torch.Size([32, 64, 2, 2])

In [15]:
x["target_idx"]

tensor([0, 2, 2, 1, 2, 0, 1, 2, 2, 1, 0, 0, 2, 1, 2, 0, 2, 2, 0, 1, 0, 1, 1, 0,
        0, 1, 2, 2, 0, 1, 0, 2])

In [16]:
x["window_idx"]

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 33, 34])

In [19]:
cfg = TrainConfig()
print(cfg.optimizer.lr) 
print(cfg.epochs)    

cfg = TrainConfig(
    epochs=500,
    optimizer=OptimizerConfig(lr=2e-4)
)

0.0001
100


In [None]:
# optimizer = optim.AdamW(
#     model.parameters(), 
#     **asdict(cfg.optimizer) 
# )