## Import Libs

In [1]:
# import os
import pandas as pd
import numpy as np
import datetime as dt
import logging
import torch
import torch.nn as nn
import matplotlib.pyplot as plt

from torch import optim
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

from sklearn.preprocessing import MinMaxScaler
from torch.optim import AdamW
from utils import SklearnWrapper

In [2]:
from config import *
from entities import *
from components import *
from strategies import *
from datasets import *
from engine import Engine
from models import DiffusionTransformer
from frameworks import Diffusion

### Setting 

In [3]:
# logging.basicConfig(level=logging.DEBUG)
logging.getLogger('matplotlib').setLevel(logging.WARNING)

In [4]:
symbols = ['AAPL', 'TSLA', 'MSFT']
basket = Basket(symbols=symbols)
basket.load_all_assets(freq="1d")

In [5]:
targets = ["Close", "High"]
for symbol, asset in basket.assets.items():
    asset.to_returns(log=True, columns=targets)

asset.data.head(5)

Unnamed: 0_level_0,Low,Open,Volume,Close (Log_Returns),High (Log_Returns)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-01-05,39.49752,39.599999,39673900,-0.009238,-0.014658
2015-01-06,38.891163,39.608523,36447900,-0.014787,0.000427
2015-01-07,38.848476,39.266934,29114100,0.012626,-0.006222
2015-01-08,39.898901,39.92452,29645200,0.028994,0.027387
2015-01-09,40.052616,40.658956,23944200,-0.008441,0.001465


In [6]:
strategy = IntersectionStrategy()
joint_df = basket.align(strategy)
joint_df.head()

Unnamed: 0_level_0,AAPL,AAPL,AAPL,AAPL,AAPL,TSLA,TSLA,TSLA,TSLA,TSLA,MSFT,MSFT,MSFT,MSFT,MSFT
Unnamed: 0_level_1,Low,Open,Volume,Close (Log_Returns),High (Log_Returns),Low,Open,Volume,Close (Log_Returns),High (Log_Returns),Low,Open,Volume,Close (Log_Returns),High (Log_Returns)
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
2015-01-05,23.391173,24.030263,257142000,-0.028576,-0.025355,13.810667,14.303333,80527500,-0.04295,-0.030702,39.49752,39.599999,39673900,-0.009238,-0.014658
2015-01-06,23.218085,23.641928,263188400,9.4e-05,-0.011292,13.614,14.004,93928500,0.005648,-0.01068,38.891163,39.608523,36447900,-0.014787,0.000427
2015-01-07,23.67743,23.788384,160423600,0.013925,0.007142,13.985333,14.223333,44526000,-0.001563,0.002704,38.848476,39.266934,29114100,0.012626,-0.006222
2015-01-08,24.121236,24.238848,237458000,0.037702,0.035856,14.000667,14.187333,51637500,-0.001566,-0.004573,39.898901,39.92452,29645200,0.028994,0.027387
2015-01-09,24.45632,25.00221,214798000,0.001072,0.009761,13.664,13.928,70024500,-0.018981,-0.018029,40.052616,40.658956,23944200,-0.008441,0.001465


In [7]:
features = ["Close (Log_Returns)", "High (Log_Returns)"]
basket_tensor = basket.to_tensor(features=features)
basket_tensor.shape

torch.Size([2723, 3, 2])

In [8]:
window = RollingWindow(size=64, stride=1)
market = Market(basket, window)
market_tensor = market.setup(features=features)

In [9]:
market_tensor.shape

torch.Size([2660, 64, 3, 2])

In [10]:
sklearn_scaler = MinMaxScaler(feature_range=(-1, 1))
scaler = SklearnWrapper(sklearn_scaler)

scaler.fit(market_tensor)
norm_data = scaler.encode(market_tensor)

print(f"Norm data shape: {market_tensor.shape}")
print(f"Data {market_tensor[0,0,0,:]}")

Norm data shape: torch.Size([2660, 64, 3, 2])
Data tensor([-0.0286, -0.0254], device='cuda:0')


In [11]:
modes = ["exhaustive", "random"]
market_ds = JointMarketDataset(norm_data)
# market_ds = MarketDataset(norm_data, mode='random')
ratios = [0.8, 0.1, 0.1]
train_ds, val_ds, test_ds = create_randomize_datasets(market_ds, ratios)

print(f"Train samples: {len(train_ds)}")
print(f"Val samples: {len(val_ds)}") 
print(f"Test samples: {len(test_ds)}")

Train samples: 2128
Val samples: 266
Test samples: 266


In [12]:
train_loader = DataLoader(train_ds, batch_size=32, shuffle=False)
val_loader = DataLoader(val_ds, batch_size=32, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=32, shuffle=True)

In [13]:
x = next(iter(train_loader))
x.shape

torch.Size([32, 64, 3, 2])

In [14]:
# x["context"].shape

In [15]:
# x["target_idx"]

In [16]:
# x["window_idx"]

In [17]:
cfg = TrainConfig()
print(cfg.optimizer.lr) 
print(cfg.epochs)    

cfg = TrainConfig(
    epochs=500,
    optimizer=OptimizerConfig(lr=2e-4)
)

0.0001
100


In [18]:
# optimizer = optim.AdamW(
#     model.parameters(), 
#     **asdict(cfg.optimizer) 
# )

In [19]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
EPOCHS = 50
BATCH_SIZE = 32
LR = 1e-4

WINDOW_SIZE = x.shape[1]       # Length (L)
NUM_ASSETS = x.shape[2]         # N (จำนวนหุ้น)
NUM_FEATURES = x.shape[3]      # F (OHLCV...)
TOTAL_INPUT_DIM = NUM_ASSETS * NUM_FEATURES # C = N * F (Flatten แล้ว)

print(f"Running on: {DEVICE}")
print(f"Input Dimension (Channels): {TOTAL_INPUT_DIM}")

Running on: cuda
Input Dimension (Channels): 6


In [20]:
diffusion = Diffusion(
    noise_steps=1000,
    beta_start=1e-4,
    beta_end=0.02,
    schedule="cosine", # แนะนำ Cosine สำหรับ Time Series
    device=DEVICE
)

In [21]:
model = DiffusionTransformer(
    features_in=TOTAL_INPUT_DIM,  # รับ Input ขนาด N*F
    d_model=128,                  # ความกว้าง Model
    nhead=4,
    num_layers=4,
    max_len=WINDOW_SIZE           # รองรับความยาวสูงสุดเท่า Window
).to(DEVICE)



In [22]:
optimizer = AdamW(model.parameters(), lr=LR, weight_decay=1e-6)

In [23]:
engine = Engine(
    model=model,
    diffusion=diffusion,
    train_dataloader=train_loader,
    val_dataloader=val_loader,
    optimizer=optimizer,
    device=DEVICE,
    scaler=scaler
)

In [24]:
engine.fit(epochs=EPOCHS, save_dir="./checkpoints")

Epoch 1/50: 100%|██████████| 67/67 [00:05<00:00, 11.52it/s, loss=0.1614]
Epoch 2/50: 100%|██████████| 67/67 [00:02<00:00, 26.22it/s, loss=0.1280]
Epoch 3/50: 100%|██████████| 67/67 [00:02<00:00, 26.36it/s, loss=0.0205]
Epoch 4/50: 100%|██████████| 67/67 [00:02<00:00, 26.36it/s, loss=0.0642]
Epoch 5/50: 100%|██████████| 67/67 [00:02<00:00, 26.32it/s, loss=0.1847]
Epoch 6/50: 100%|██████████| 67/67 [00:02<00:00, 26.37it/s, loss=0.1126]
Epoch 7/50: 100%|██████████| 67/67 [00:02<00:00, 26.37it/s, loss=0.0757]
Epoch 8/50: 100%|██████████| 67/67 [00:02<00:00, 26.34it/s, loss=0.2557]
Epoch 9/50: 100%|██████████| 67/67 [00:02<00:00, 26.34it/s, loss=0.1018]
Epoch 10/50: 100%|██████████| 67/67 [00:02<00:00, 26.35it/s, loss=0.0755]
Epoch 11/50: 100%|██████████| 67/67 [00:02<00:00, 26.37it/s, loss=0.0619]
Epoch 12/50: 100%|██████████| 67/67 [00:02<00:00, 26.38it/s, loss=0.0769]
Epoch 13/50: 100%|██████████| 67/67 [00:02<00:00, 26.35it/s, loss=0.0429]
Epoch 14/50: 100%|██████████| 67/67 [00:02<00:0

In [25]:
# # ดึงตัวอย่างมา 1 อัน [Length, Assets, Features]
#     sample_input = X_val[0] 
    
#     # สั่งทำนาย 5 step สุดท้าย (Forecasting)
#     prediction = engine.simulate(sample_input, steps_to_predict=5)

IndentationError: unexpected indent (4000010355.py, line 2)

In [26]:
x_test = next(iter(test_loader))
x_test.shape

torch.Size([32, 64, 3, 2])

In [27]:
x_test[0].shape

torch.Size([64, 3, 2])

In [28]:
prediction = engine.simulate(x_test, steps_to_predict=8)

In-painting: 1000it [00:11, 85.90it/s]


In [29]:
prediction.shape

(32, 64, 3, 2)

In [48]:
@torch.no_grad()
def monte_carlo_simulate(context_data, steps_to_simulate, engine,num_simulations=100):
    x_context = context_data.repeat(num_simulations, 1, 1, 1).to(engine.device)
    mc_results = engine.simulate(x_context, steps_to_simulate)
    return mc_results

In [31]:
x_test[0].shape

torch.Size([64, 3, 2])

In [43]:
monte_carlo_simulate(x_test[0], 24, 100)

NameError: name 'self' is not defined

In [38]:
x_test_1sampling = x_test[0].unsqueeze(0)

In [39]:
x_test_1sampling.shape

torch.Size([1, 64, 3, 2])

In [49]:
mc_y = monte_carlo_simulate(context_data=x_test_1sampling, steps_to_simulate=24, engine=engine,num_simulations=100)
mc_y.shape # [Sims, Time, Assets, Features]

In-painting: 1000it [00:33, 30.26it/s]


(100, 64, 3, 2)

In [92]:
mc_y_inv_scaled = scaler.decode(torch.from_numpy(mc_y))
mc_y_inv_scaled.shape

torch.Size([100, 64, 3, 2])

In [94]:
def tensor_to_long_df(tensor, asset_names=None, feature_names=None):
    # tensor shape: (100, 64, 3, 2) -> (Sims, Time, Assets, Features)
    n_sims, n_time, n_assets, n_features = tensor.shape
    
    # 1. คลี่ข้อมูลเป็น 2D [ (100*64*3), 2 ]
    flattened_data = tensor.reshape(-1, n_features).cpu().numpy()
    
    # 2. สร้าง Index ให้แต่ละแถว
    sim_ids = np.repeat(np.arange(n_sims), n_time * n_assets)
    time_steps = np.tile(np.repeat(np.arange(n_time), n_assets), n_sims)
    asset_ids = np.tile(np.arange(n_assets), n_sims * n_time)
    
    # 3. ประกอบร่างเป็น DataFrame
    df = pd.DataFrame(flattened_data, columns=feature_names or [f'feat_{i}' for i in range(n_features)])
    df['sim_id'] = sim_ids
    df['time'] = time_steps
    df['asset_id'] = asset_ids
    
    # ถ้ามีชื่อหุ้นก็ใส่แทนตัวเลขได้เลย
    if asset_names:
        asset_map = {i: name for i, name in enumerate(asset_names)}
        df['asset_name'] = df['asset_id'].map(asset_map)
        
    return df

In [95]:
type(mc_y_inv_scaled)

torch.Tensor

In [96]:
df_mc = tensor_to_long_df(mc_y_inv_scaled, asset_names=symbols, feature_names=features)

In [97]:
df_mc

Unnamed: 0,Close (Log_Returns),High (Log_Returns),sim_id,time,asset_id,asset_name
0,0.024505,0.027005,0,0,0,AAPL
1,0.142427,0.144459,0,0,1,TSLA
2,-0.010067,-0.016307,0,0,2,MSFT
3,-0.018440,-0.005926,0,1,0,AAPL
4,-0.057101,-0.040640,0,1,1,TSLA
...,...,...,...,...,...,...
19195,0.028146,0.016450,99,62,1,TSLA
19196,0.000909,-0.009991,99,62,2,MSFT
19197,0.006080,0.001437,99,63,0,AAPL
19198,0.004442,-0.007934,99,63,1,TSLA


In [98]:
worst_cases = df_mc.groupby('sim_id')['Close (Log_Returns)'].min()

In [99]:
type(worst_cases)

pandas.core.series.Series

In [104]:
worst_cases.head(20)

sim_id
0      -0.057101
1      -0.102362
2      -0.095117
3    -328.986511
4      -0.057101
5      -0.061416
6      -0.057101
7    -171.779053
8    -310.219116
9      -0.057101
10     -0.057101
11    -59.102375
12     -0.186451
13     -0.057101
14     -0.057101
15     -0.214660
16   -546.560303
17     -0.082874
18     -0.059616
19     -0.079604
Name: Close (Log_Returns), dtype: float32

In [101]:
df_mc.loc[df_mc['sim_id'] == 3, "Close (Log_Returns)"].min()

np.float32(-328.9865)

In [102]:
what = df_mc.groupby('sim_id')['Close (Log_Returns)'].max()

In [103]:
what.head(20)

sim_id
0       0.142427
1       0.142427
2       0.142427
3     213.566330
4       0.142427
5       0.142427
6       0.142427
7       0.142427
8      19.386658
9       0.142427
10      0.191378
11    457.866852
12      0.142427
13      0.142427
14      0.142427
15      0.142427
16    281.211334
17      0.142427
18      0.142427
19      0.142427
Name: Close (Log_Returns), dtype: float32

In [84]:
df_mc.loc[df_mc['sim_id'] == 3, "Close (Log_Returns)"].max()

np.float32(968.6088)