In [None]:
#https://keras.io/examples/generative/vae/
#https://keras.io/examples/generative/molecule_generation/

In [None]:
#!python3 -m pip install --upgrade pip
#!pip install pydot
#!apt-get install -y graphviz

In [13]:
import os
import random
from sklearn.preprocessing import MinMaxScaler
import joblib
import pandas as pd
import h5py
import tensorflow as tf
from IPython.display import clear_output,display, HTML
import datetime as dt
import numpy as np
#================== initialization ==================
currentTM=dt.datetime.now().strftime("%Y-%m-%dT%H%M%S")
PROJECT = "testVAEModel"
LATENT_DIM = 128
VAE_LR = 5e-4
EPOCHS = 5
BATCH_SIZE = 32
PARQUET_PATH = './data/OptionsEOD_STG.parquet'
SCALER_PATH = './data/scaler.gz'
UNIQUE_KEYS = ['QUOTE_DATE','SYMBOL','EXPIRE_DATE']
SCALER_COL  = ['DTE','INTRINSIC_VALUE', 'TOTAL_VOLUME',	'C_BID',	'C_ASK', 'C_VOLUME',  'P_BID',	'P_ASK',	'P_VOLUME' ]
MODEL_PATH = "./src/models/"
H5_PATH = './data/OptTrainData/'
DISPLAY = False
WANDB_LOG = False

In [14]:
#741d760b304d0be5b18d4ee9682f77156e6967b5
import wandb
from wandb.keras import WandbCallback
wandb.login()
WANDB_LOG = True
CONFIG = {    "latent_dim":LATENT_DIM,
              "learning_rate": VAE_LR,
              "epochs": EPOCHS,
              "batch_size": BATCH_SIZE,
              "architecture": "VAE",
              "dataset": "OptionsChaine",
              "encoder_dense_units":[512,256],
              "encoder_dropout_rate":0.2,
              "decoder_dense_units":[256, 512],
              "decoder_dropout_rate":0.2,
           }

run = wandb.init(project=PROJECT, name=currentTM, config=CONFIG)

In [15]:
#Example

# from IPython.display import clear_output,display, HTML
# import numpy as np
# #load scaler
# scaler = MinMaxScaler()
# PartitionDate = [ d[-7:] for d in  os.listdir(PARQUET_PATH) if 'PartitionDate' in d]
# random.shuffle(PartitionDate)
# scaler = joblib.load(SCALER_PATH)


# for i,partdate in enumerate(PartitionDate) :
#     df = pd.read_parquet(PARQUET_PATH,engine='pyarrow'
#                                  , filters=[('PartitionDate', '=', partdate)]
#                                 )
#     df['P_VOLUME'] = df['P_VOLUME'].fillna(0)
#     df['C_VOLUME'] = df['C_VOLUME'].fillna(0)
#     DATA  = np.empty((0,) + (20,9) ) 
#     for opt_id in np.unique( df[["OPTIONS_ID"]].values):
#         df_filter  = df[df["OPTIONS_ID"]==opt_id]
#         if len(df_filter) == 20:
#             DATA = np.vstack((DATA ,[scaler.transform(df_filter[SCALER_COL])]))
#         else:
#             #print( len(df_filter) )
#             #display(HTML(df_filter[['STRIKE']+SCALER_COL].to_html()))
#             pass
            
#     ## Save the NumPy array to an HDF5 file
#     # with h5py.File(H5_PATH+f"{partdate}.h5", 'w') as f:
#     #     dset = f.create_dataset(f'{partdate}', data=DATA, chunks=True , compression='gzip')

#     print(f"[Processing] {partdate}, {round(((i+1)/len(PartitionDate))*100,2)}%     ",end='\r')

In [16]:
# # Save the NumPy array to an HDF5 file
# with h5py.File(H5_PATH, 'w') as f:
#     #dset = f.create_dataset('dataset', data=DATA, chunks=True, compression='gzip')
#     #test
#     dset = f.create_dataset('dataset', data=DATA, compression='gzip')

In [None]:
=====================================================================

In [17]:
from src.model import OptionChainGenerator
from src.layer import encoder, decoder

model = OptionChainGenerator(
    encoder(latent_dim = LATENT_DIM, 
            input_shape= (20,3), 
            dense_units = CONFIG["encoder_dense_units"], 
            dropout_rate= CONFIG["encoder_dropout_rate"]
           ), 
    decoder(latent_dim  = LATENT_DIM , 
            output_shape= (20,1),
            dense_units = CONFIG["decoder_dense_units"],
            dropout_rate= CONFIG["decoder_dropout_rate"]
           )
)

def dummy_loss(y_true, y_pred):
    return 0.0
    
vae_optimizer = tf.keras.optimizers.Adam(learning_rate=VAE_LR)
model.compile(vae_optimizer , loss=dummy_loss)

In [18]:
################## show model ######################
if DISPLAY :
    from tensorflow.keras.utils import model_to_dot
    from IPython.display import SVG, display
    
    def display_model(model, width=1024, height=512):
        dot = model_to_dot(model, show_shapes=True, show_layer_names=True)
        svg_data = dot.create(prog='dot', format='svg').decode("utf-8")
        svg_html = f'<div style="width:{width}px;height:{height}px;">{svg_data}</div>'
        display(HTML(svg_html))

In [19]:
## Example usage:
## Display the encoder model with reduced size
if DISPLAY :
    display_model(model.encoder, width=1024, height=512)

In [20]:
if DISPLAY :
    display_model(model.decoder, width=2500, height=512)

In [21]:
#================== loadmodel ====================

In [22]:
from tensorflow.keras.models import load_model
model_path = MODEL_PATH+f'{PROJECT}'
if not os.path.exists(model_path):
    os.makedirs(model_path)
    model.encoder.save(model_path+f'/'+f'encoder.keras') 
    model.decoder.save(model_path+f'/'+f'decoder.keras') 
else:
    model.encoder = load_model(model_path+'/'+f'encoder.keras') 
    model.decoder = load_model(model_path+'/'+f'decoder.keras') 

In [23]:
#================== train model ==================
PartitionDate = [ d[-7:] for d in  os.listdir(PARQUET_PATH) if 'PartitionDate' in d]
random.shuffle(PartitionDate)
for partdate in PartitionDate[:]:
    with h5py.File(H5_PATH+partdate+".h5", 'r') as f:
        DATA = f[partdate][:]
        X = DATA[:, :, :3]  # เลือกข้อมูลแถวแรกถึงแถวที่ 3 สำหรับ X
        Y = DATA[:, :, 3:]  # เลือกข้อมูลแถวที่ 3 เป็นต้นไปสำหรับ Y
        if len(X) :
            random.shuffle(PartitionDate)
            tf.keras.backend.clear_session() 
            history = model.fit(X , Y, epochs=5, batch_size=BATCH_SIZE, validation_split=0.2)
    if WANDB_LOG :
        wandb.log({
            "kl_loss": np.average(  history.history['kl_loss'] )
            ,"loss":np.average(  history.history['loss'] )
            ,"optVal_loss":np.average(  history.history['optVal_loss'] )
            ,"vol_loss":np.average(  history.history['vol_loss'] )
        }, commit=True)

    
    
            
    model.encoder.save(model_path+f'/'+f'encoder.keras') 
    model.decoder.save(model_path+f'/'+f'decoder.keras') 
if WANDB_LOG : wandb.finish()

Epoch 1/5




[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - kl_loss: 6.3055e-04 - optVal_loss: 0.6118 - total_loss: 0.6181 - vol_loss: 0.0056 - loss: 0.0000e+00 - val_kl_loss: 0.0000e+00 - val_loss: 0.0000e+00 - val_optVal_loss: 0.0000e+00 - val_total_loss: 0.0000e+00 - val_vol_loss: 0.0000e+00
Epoch 2/5
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - kl_loss: 1.1648e-04 - optVal_loss: 0.5553 - total_loss: 0.5563 - vol_loss: 8.8743e-04 - loss: 0.0000e+00 - val_kl_loss: 0.0000e+00 - val_loss: 0.0000e+00 - val_optVal_loss: 0.0000e+00 - val_total_loss: 0.0000e+00 - val_vol_loss: 0.0000e+00
Epoch 3/5
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - kl_loss: 7.8789e-05 - optVal_loss: 0.5126 - total_loss: 0.5131 - vol_loss: 4.3880e-04 - loss: 0.0000e+00 - val_kl_loss: 0.0000e+00 - val_loss: 0.0000e+00 - val_optVal_loss: 0.0000e+00 - val_total_loss: 0.0000e+00 - val_vol_loss: 0.0000e+00
Epoch 4/5
[1m35/35[0m [32m━━━━━━━

KeyboardInterrupt: 

In [None]:
history.history['kl_loss']

In [None]:
np.average(  history.history['kl_loss'] )

In [None]:
f = h5py.File(H5_PATH+'2012-05'+".h5", 'r') 

In [None]:
f.keys()

In [None]:
import numpy as np

def create_adjacency_matrix(options, threshold=25):
    """
    สร้าง adjacency matrix สำหรับ Options Chain โดยใช้ความใกล้เคียงของ strike price และ DTE
    """
    num_options = len(options)
    adjacency_matrix = np.zeros((num_options, num_options))

    for i in range(num_options):
        for j in range(i, num_options):
            # พิจารณาเชื่อมโยงระหว่าง options หาก strike price ต่างกันไม่เกิน threshold และ DTE เท่ากัน
            if abs(options[i]['strike_price'] - options[j]['strike_price']) <= threshold and options[i]['dte'] == options[j]['dte']:
                adjacency_matrix[i, j] = 1
                adjacency_matrix[j, i] = 1
    
    return adjacency_matrix

options = [
    {'strike_price': 100, 'dte': 30},
    {'strike_price': 105, 'dte': 30},
    {'strike_price': 110, 'dte': 30},
    {'strike_price': 115, 'dte': 30},
    {'strike_price': 120, 'dte': 30},
    {'strike_price': 0, 'dte': 0},  # เปลี่ยนค่าเป็น 0
]

options = [
    {'strike_price': 0, 'dte': 0},
    {'strike_price': 120, 'dte': 30},
    {'strike_price': 150, 'dte': 30},
    {'strike_price': 110, 'dte': 30},
    {'strike_price': 105, 'dte': 30},
    {'strike_price': 100, 'dte': 30},  # เปลี่ยนค่าเป็น 0
]


# ปรับ threshold
threshold = 5

adjacency_matrix = create_adjacency_matrix(options, threshold)
print(adjacency_matrix)

In [None]:
import numpy as np
A = np.array(
    [[1,2,3],
    [4,5,6],
    [7,8,9]]
    
)

In [None]:
A[:,:,0]

In [None]:
import numpy as np

# Generate a random numpy array with shape (2, 20, 6)
random_array = np.random.rand(2, 20, 6)

print(random_array)

In [None]:
random_array = np.random.rand(2, 5, 3)

In [None]:
random_array

In [None]:
random_array[:, :, -1]

In [None]:
colList = ["c_bid", "c_ask", "c_volume", "p_bid", "p_ask", "p_volume"]

In [None]:
colList.index("c_ask")