In [None]:
#https://keras.io/examples/generative/vae/
#https://keras.io/examples/generative/molecule_generation/

In [None]:
#!python3 -m pip install --upgrade pip
#!pip install pydot
#!apt-get install -y graphviz

In [1]:
import os
import random
from sklearn.preprocessing import MinMaxScaler
import joblib
import pandas as pd
import h5py
import tensorflow as tf
from IPython.display import clear_output,display, HTML
import datetime as dt
import numpy as np
#================== initialization ==================
currentTM=dt.datetime.now().strftime("%Y-%m-%dT%H%M%S")
PROJECT = "testVAEModel"
LATENT_DIM = 128
VAE_LR = 5e-4
EPOCHS = 5
BATCH_SIZE = 32
PARQUET_PATH = './data/OptionsEOD_STG.parquet'
SCALER_PATH = './data/scaler.gz'
UNIQUE_KEYS = ['QUOTE_DATE','SYMBOL','EXPIRE_DATE']
SCALER_COL  = ['DTE','INTRINSIC_VALUE', 'TOTAL_VOLUME',	'C_BID',	'C_ASK', 'C_VOLUME',  'P_BID',	'P_ASK',	'P_VOLUME' ]
MODEL_PATH = "./models/"
H5_PATH = './data/OptTrainData/'
DISPLAY = False
WANDB_LOG = False
RESUME = False

2024-06-30 07:12:52.791238: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
#741d760b304d0be5b18d4ee9682f77156e6967b5
import wandb
from wandb.keras import WandbCallback
wandb.login()
WANDB_LOG = True
CONFIG = {    "latent_dim":LATENT_DIM,
              "learning_rate": VAE_LR,
              "epochs": EPOCHS,
              "batch_size": BATCH_SIZE,
              "architecture": "VAE",
              "dataset": "OptionsChaine",
              "encoder_dense_units":[512,256],
              "encoder_dropout_rate":0.2,
              "decoder_dense_units":[256, 512],
              "decoder_dropout_rate":0.2,
           }

run = wandb.init(project=PROJECT, name=currentTM, config=CONFIG)

[34m[1mwandb[0m: Currently logged in as: [33mwasan-sinlapa[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [3]:
#Example

# from IPython.display import clear_output,display, HTML
# import numpy as np
# #load scaler
# scaler = MinMaxScaler()
# PartitionDate = [ d[-7:] for d in  os.listdir(PARQUET_PATH) if 'PartitionDate' in d]
# random.shuffle(PartitionDate)
# scaler = joblib.load(SCALER_PATH)


# for i,partdate in enumerate(PartitionDate) :
#     df = pd.read_parquet(PARQUET_PATH,engine='pyarrow'
#                                  , filters=[('PartitionDate', '=', partdate)]
#                                 )
#     df['P_VOLUME'] = df['P_VOLUME'].fillna(0)
#     df['C_VOLUME'] = df['C_VOLUME'].fillna(0)
#     DATA  = np.empty((0,) + (20,9) ) 
#     for opt_id in np.unique( df[["OPTIONS_ID"]].values):
#         df_filter  = df[df["OPTIONS_ID"]==opt_id]
#         if len(df_filter) == 20:
#             DATA = np.vstack((DATA ,[scaler.transform(df_filter[SCALER_COL])]))
#         else:
#             #print( len(df_filter) )
#             #display(HTML(df_filter[['STRIKE']+SCALER_COL].to_html()))
#             pass
            
#     ## Save the NumPy array to an HDF5 file
#     # with h5py.File(H5_PATH+f"{partdate}.h5", 'w') as f:
#     #     dset = f.create_dataset(f'{partdate}', data=DATA, chunks=True , compression='gzip')

#     print(f"[Processing] {partdate}, {round(((i+1)/len(PartitionDate))*100,2)}%     ",end='\r')

In [4]:
# # Save the NumPy array to an HDF5 file
# with h5py.File(H5_PATH, 'w') as f:
#     #dset = f.create_dataset('dataset', data=DATA, chunks=True, compression='gzip')
#     #test
#     dset = f.create_dataset('dataset', data=DATA, compression='gzip')

In [5]:
#=====================================================================

In [13]:
from src.model import OptionChainGenerator
from src.layer import encoder, decoder

model = OptionChainGenerator(
    encoder(latent_dim = LATENT_DIM, 
            input_shape= (20,3), 
            dense_units = CONFIG["encoder_dense_units"], 
            dropout_rate= CONFIG["encoder_dropout_rate"]
           ), 
    decoder(latent_dim  = LATENT_DIM , 
            output_shape= (20,1),
            dense_units = CONFIG["decoder_dense_units"],
            dropout_rate= CONFIG["decoder_dropout_rate"]
           )
)

def dummy_loss(y_true, y_pred):
    return 0.0
    
vae_optimizer = tf.keras.optimizers.Adam(learning_rate=VAE_LR)
model.compile(vae_optimizer , loss=dummy_loss)

In [14]:
################## show model ######################
if DISPLAY :
    from tensorflow.keras.utils import model_to_dot
    from IPython.display import SVG, display
    
    def display_model(model, width=1024, height=512):
        dot = model_to_dot(model, show_shapes=True, show_layer_names=True)
        svg_data = dot.create(prog='dot', format='svg').decode("utf-8")
        svg_html = f'<div style="width:{width}px;height:{height}px;">{svg_data}</div>'
        display(HTML(svg_html))

In [15]:
## Example usage:
## Display the encoder model with reduced size
if DISPLAY :
    display_model(model.encoder, width=1024, height=512)

In [16]:
if DISPLAY :
    display_model(model.decoder, width=2500, height=512)

In [17]:
#================== loadmodel ====================

In [18]:
from tensorflow.keras.models import load_model
model_path = MODEL_PATH+f'{PROJECT}'
import shutil
if not RESUME :
    if os.path.exists(model_path) :
        shutil.rmtree(model_path)
if not os.path.exists(model_path):
    os.makedirs(model_path)
    model.encoder.save(model_path+f'/'+f'encoder.keras') 
    model.decoder.save(model_path+f'/'+f'decoder.keras') 
else:
    model.encoder = load_model(model_path+'/'+f'encoder.keras') 
    model.decoder = load_model(model_path+'/'+f'decoder.keras') 

In [19]:
#================== train model ==================
PartitionDate = [ d[-7:] for d in  os.listdir(PARQUET_PATH) if 'PartitionDate' in d]
random.shuffle(PartitionDate)

STOP_MODEL = False
for partdate in PartitionDate[:] :
    #nan problem
    #partdate = '2022-05'
    #normal 
    #partdate = '2011-12'
    
    with h5py.File(H5_PATH+partdate+".h5", 'r') as f:
        DATA = f[partdate][:]
        X = DATA[:, :, :3]  # เลือกข้อมูลแถวแรกถึงแถวที่ 3 สำหรับ X
        Y = DATA[:, :, 3:]  # เลือกข้อมูลแถวที่ 3 เป็นต้นไปสำหรับ Y
        if len(X) :
            random.shuffle(PartitionDate)
            tf.keras.backend.clear_session() 
            history = model.fit(X , Y, epochs=5, batch_size=BATCH_SIZE, validation_split=0.2)

            if not np.isnan(  np.average( history.history['kl_loss'] )  ):
                break 
            
    if WANDB_LOG :
        wandb.log({
            "kl_loss": np.average(  history.history['kl_loss'] )
            ,"loss":np.average(  history.history['loss'] )
            ,"total_loss":np.average(  history.history['total_loss'] )
            ,"optVal_loss":np.average(  history.history['optVal_loss'] )
            ,"vol_loss":np.average(  history.history['vol_loss'] )
        }, commit=True)

    if STOP_MODEL :
        break
    
            
    model.encoder.save(model_path+f'/'+f'encoder.keras') 
    model.decoder.save(model_path+f'/'+f'decoder.keras') 
if WANDB_LOG : wandb.finish()

Epoch 1/5
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 28ms/step - kl_loss: 0.0591 - optVal_loss: 22.3750 - total_loss: 48.0816 - vol_loss: 25.6471 - loss: 0.0000e+00 - val_kl_loss: 0.0000e+00 - val_loss: 0.0000e+00 - val_optVal_loss: 0.0000e+00 - val_total_loss: 0.0000e+00 - val_vol_loss: 0.0000e+00
Epoch 2/5
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - kl_loss: 0.1399 - optVal_loss: 0.7675 - total_loss: 1.0219 - vol_loss: 0.1145 - loss: 0.0000e+00 - val_kl_loss: 0.0000e+00 - val_loss: 0.0000e+00 - val_optVal_loss: 0.0000e+00 - val_total_loss: 0.0000e+00 - val_vol_loss: 0.0000e+00
Epoch 3/5
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - kl_loss: 0.1052 - optVal_loss: 0.6750 - total_loss: 0.8031 - vol_loss: 0.0231 - loss: 0.0000e+00 - val_kl_loss: 0.0000e+00 - val_loss: 0.0000e+00 - val_optVal_loss: 0.0000e+00 - val_total_loss: 0.0000e+00 - val_vol_loss: 0.0000e+00
Epoch 4/5
[1m12/12[0m [32m━━━━━━━━━━━━━━

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
loss,▁▁▁

0,1
kl_loss,
loss,0.0
optVal_loss,
total_loss,
vol_loss,


In [None]:
====================================================

In [None]:
#nan problem
#partdate = '2022-05'
#normal 
#partdate = '2011-12'

In [93]:
partdate = '2011-12'
partdate = '2022-05'
with h5py.File(H5_PATH+partdate+".h5", 'r') as f:
    DATA = f[partdate][:]
    X = DATA[:, :, :3]  # เลือกข้อมูลแถวแรกถึงแถวที่ 3 สำหรับ X
    Y = DATA[:, :, 3:]  # เลือกข้อมูลแถวที่ 3 เป็นต้นไปสำหรับ Y

In [94]:
for i in range ( len( np.transpose( Y[0] ) ) ):
    col = np.transpose( Y[i] ) 
    for c in range(len(col)):
        a = np.transpose( Y[i] )[c]
        if np.sum( a ) == 0 :
            print(f"i:{i},c:{c}")
            

i:2,c:2
i:2,c:5


In [95]:
for i in range ( len( np.transpose( X[0] ) ) ):
    col = np.transpose( X[i] ) 
    for c in range(len(col)):
        a = np.transpose( X[i] )[c]
        if np.sum( a ) == 0 :
            print(f"i:{i},c:{c}")

In [106]:
np.transpose( X[2] )[2]

array([1.80951984e-05, 1.80951984e-05, 1.80951984e-05, 1.80951984e-05,
       1.80951984e-05, 1.80951984e-05, 1.80951984e-05, 1.80951984e-05,
       1.80951984e-05, 1.80951984e-05, 1.80951984e-05, 1.80951984e-05,
       1.80951984e-05, 1.80951984e-05, 1.80951984e-05, 1.80951984e-05,
       1.80951984e-05, 1.80951984e-05, 1.80951984e-05, 1.80951984e-05])

In [99]:
np.transpose( Y[2] )[2]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0.])

In [96]:
np.transpose( Y[2] )[5]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0.])

In [98]:
Y[2]

array([[0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.00288613, 0.00308608, 0.        , 0.05272295, 0.00033615,
        0.        ],
       [0.00278002, 0.00298476, 0.        , 0.05335353, 0.00034005,
        0.        ],
       [0.0026824 , 0.0033816 , 0.        , 0.05398411, 0.00034275,
        0.        ],
       [0.00258054, 0.0027779 , 0.        , 0.05462288, 0.0003479 ,
        0.        ],
       [0.00248292, 0.0026808 , 0.        , 0.05526165, 0.00035195,
        0.        ],
       [0.00213913, 0.00306497, 0.        , 0.0559168 , 0.00035615,
        0.        ],
       [0.00230466, 0

In [97]:
np.transpose( Y[2] )

array([[0.        , 0.        , 0.        , 0.        , 0.        ,
        0.00288613, 0.00278002, 0.0026824 , 0.00258054, 0.00248292,
        0.00213913, 0.00230466, 0.00210093, 0.00188447, 0.00204575,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.00308608, 0.00298476, 0.0033816 , 0.0027779 , 0.0026808 ,
        0.00306497, 0.00248237, 0.00239794, 0.00277367, 0.00222063,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.05272295, 0.05335353, 0.05398411, 0.05462288, 0.05526165,
        0.0559168 , 0.05656375, 0.05723528, 0

In [20]:
partdate

'2011-12'

In [None]:
history.history

In [None]:
Y[2][-10]

In [None]:
for i in range( len(X) ): 
    if np.sum( X[i] ) == 0 :
        print(   np.sum( X[i] ) )

In [None]:
len(X)

In [None]:
for i in range( len(Y) ): 
    if np.sum( Y[i] ) == 0 :
        print(   np.sum( Y[i] ) )

In [None]:
np.average(  history.history['kl_loss'] )

In [None]:
partdate

In [None]:
Y[5]

In [None]:
X

In [None]:
f = h5py.File(H5_PATH+'2012-05'+".h5", 'r') 

In [None]:
f.keys()

In [None]:
import numpy as np

def create_adjacency_matrix(options, threshold=25):
    """
    สร้าง adjacency matrix สำหรับ Options Chain โดยใช้ความใกล้เคียงของ strike price และ DTE
    """
    num_options = len(options)
    adjacency_matrix = np.zeros((num_options, num_options))

    for i in range(num_options):
        for j in range(i, num_options):
            # พิจารณาเชื่อมโยงระหว่าง options หาก strike price ต่างกันไม่เกิน threshold และ DTE เท่ากัน
            if abs(options[i]['strike_price'] - options[j]['strike_price']) <= threshold and options[i]['dte'] == options[j]['dte']:
                adjacency_matrix[i, j] = 1
                adjacency_matrix[j, i] = 1
    
    return adjacency_matrix

options = [
    {'strike_price': 100, 'dte': 30},
    {'strike_price': 105, 'dte': 30},
    {'strike_price': 110, 'dte': 30},
    {'strike_price': 115, 'dte': 30},
    {'strike_price': 120, 'dte': 30},
    {'strike_price': 0, 'dte': 0},  # เปลี่ยนค่าเป็น 0
]

options = [
    {'strike_price': 0, 'dte': 0},
    {'strike_price': 120, 'dte': 30},
    {'strike_price': 150, 'dte': 30},
    {'strike_price': 110, 'dte': 30},
    {'strike_price': 105, 'dte': 30},
    {'strike_price': 100, 'dte': 30},  # เปลี่ยนค่าเป็น 0
]


# ปรับ threshold
threshold = 5

adjacency_matrix = create_adjacency_matrix(options, threshold)
print(adjacency_matrix)

In [None]:
import numpy as np
A = np.array(
    [[1,2,3],
    [4,5,6],
    [7,8,9]]
    
)

In [None]:
A[:,:,0]

In [None]:
import numpy as np

# Generate a random numpy array with shape (2, 20, 6)
random_array = np.random.rand(2, 20, 6)

print(random_array)

In [None]:
random_array = np.random.rand(2, 5, 3)

In [None]:
random_array

In [None]:
random_array[:, :, -1]

In [None]:
colList = ["c_bid", "c_ask", "c_volume", "p_bid", "p_ask", "p_volume"]

In [None]:
colList.index("c_ask")