In [None]:
#https://keras.io/examples/generative/wgan_gp/
#https://keras.io/examples/generative/wgan-graphs/

In [None]:
import os,shutil,random
from sklearn.preprocessing import MinMaxScaler
import joblib
from IPython.display import clear_output
import pandas as pd
import h5py
import tensorflow as tf
from IPython.display import clear_output,display, HTML
from sklearn.model_selection import train_test_split
import datetime as dt
import numpy as np
from tensorboard.plugins.hparams import api as hp
from src.wgangp.utils import Scaler
%load_ext tensorboard

In [None]:
#================== initialization ==================
#best LR = 1e-4,EPOCHS = 10,"discriminator_extra_steps":1
#best LR = 1e-4,EPOCHS = 15,"discriminator_extra_steps":1

currentTM=dt.datetime.now().strftime("%Y-%m-%dT%H%M%S")
PROJECT = "wgangpModel"
LATENT_DIM = 16
LR = 1e-4
EPOCHS = 5
BATCH_SIZE = 32

PARQUET_PATH = './data/OptionsEOD_STG.parquet'

UNIQUE_KEYS = ['QUOTE_DATE','SYMBOL','EXPIRE_DATE']
SCALER_COL  = ['UNDERLYING_LAST','STRIKE','STRIKE_DISTANCE','INTRINSIC_VALUE','DTE','TOTAL_VOLUME','C_VEGA','P_VEGA',	'C_BID',	'C_ASK', 'C_VOLUME',  'P_BID',	'P_ASK', 'P_VOLUME' ]

MODEL_PATH = "./models/"
H5_PATH = './data/OptTrainData/'
STACK_DATA_SHAPE = np.empty((0,) + (16, len( SCALER_COL)  ) ) 

WANDB_LOG = True
RESUME = False
SUMMARY = True
log_dir = f"/app/logs/{PROJECT}/"+dt.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
# Scaler = joblib.load(SCALER_PATH )
# Scaler_Price = joblib.load(SCALER_PRICE_PATH )

#DISPLAY = ['map','summary',None]
DISPLAY = 'summary'


In [None]:
###===== load Scaler
QUOTE_COL = ['C_BID',	'C_ASK',  'P_BID',	'P_ASK']
VEGA_COL =  ["C_VEGA","P_VEGA"] 
VOLUME_COL =  ["TOTAL_VOLUME","C_VOLUME","P_VOLUME"] 

SCALER_QUOTE_COL_INDEX = [i for i,v in enumerate(SCALER_COL) if v in QUOTE_COL]
SCALER_VEGA_COL_INDEX = [i for i,v in enumerate(SCALER_COL) if v in VEGA_COL]
SCALER_VOLUME_COL_INDEX = [i for i,v in enumerate(SCALER_COL) if v in VOLUME_COL]
SCALER_OTHER_COL_INDEX = [i for i,v in enumerate(SCALER_COL) if v not in QUOTE_COL+VEGA_COL+VOLUME_COL ]

SCALER = Scaler(
    [v for v in SCALER_COL if v not in QUOTE_COL+VEGA_COL+VOLUME_COL ] + ['QUOTE','VEGA','VOLUME']
    , path="./data/scaler/")

In [None]:
##['UNDERLYING_LAST','STRIKE','STRIKE_DISTANCE','INTRINSIC_VALUE','DTE','TOTAL_VOLUME','C_VEGA','P_VEGA',	'C_BID',	'C_ASK', 'C_VOLUME',  'P_BID',	'P_ASK', 'P_VOLUME' ]
select_x = [i for i,c in  enumerate(SCALER_COL) if c in ['DTE','INTRINSIC_VALUE','C_VEGA','P_VEGA'] ]
select_y = [i for i,c in enumerate(SCALER_COL) if c in ['C_BID','C_ASK',  'P_BID',	'P_ASK'] ]

In [None]:
import wandb
from wandb.integration.keras import WandbCallback
CONFIG = {
          "learning_rate": LR,
          "epochs": EPOCHS,
          "batch_size": BATCH_SIZE,
          "architecture": "wgangp",
          "dataset": "OptionsChaine",
          "generator_dense_units":[128,64,32],
          "generator_dropout_rate":0.2,
          "discriminator_dense_units":[32,64,128],
          "discriminator_dropout_rate":0.2,
          "use_bias":False,
          "use_dropout":True,
          "use_bn":True,
          "transform":True,
          "discriminator_extra_steps":1,
          "x_col":select_x,
          "y_col":select_y,
          "gp_weight":(10.0,),
          "gp_cap": 20
           }



notes = f"""
DenseLayer model
additional_con_loss_p_1/c_1 * 0.01
discriminator_extra_steps : {CONFIG['discriminator_extra_steps']}
gp_weight:{CONFIG['gp_weight']}
LR:{LR}
gp cap : {CONFIG['gp_cap']}
reduce_mean(ASK-BID) 
"""


if WANDB_LOG :
    wandb.login()
    run = wandb.init(project=PROJECT, 
                     name=currentTM, 
                     config=CONFIG,
                     notes=notes
                    )

In [None]:
#=====================================================================

In [None]:
from src.wgangp.model import OptionChainGenerator
from src.wgangp.layers import generatorDense as generator
from src.wgangp.layers import discriminatorDense as discriminator

In [None]:


model = OptionChainGenerator(
    discriminator = discriminator(
            input_shape= (16,len(select_y) ), 
            dense_units = CONFIG["generator_dense_units"], 
            dropout_rate= CONFIG["generator_dropout_rate"],
            use_bias=CONFIG["use_bias"],
            use_dropout=CONFIG["use_dropout"],
            use_bn=CONFIG["use_bn"]
           ), 
    generator = generator(
            input_dim = (16,len(select_y) ),
            output_dim = (16,len(select_x) ) ,
            dense_units = CONFIG["discriminator_dense_units"],
            dropout_rate= CONFIG["discriminator_dropout_rate"],
            use_bias=CONFIG["use_bias"],
            use_dropout=CONFIG["use_dropout"],
            use_bn=CONFIG["use_bn"]
           ),
    discriminator_extra_steps = CONFIG["discriminator_extra_steps"],
    output_col=[SCALER_COL[i] for i in select_y] ,
    scaler = SCALER,
    gp_weight=CONFIG["gp_weight"],
    gp_cap=CONFIG["gp_cap"]
)


model.compile(
    d_optimizer = tf.optimizers.Adam(
    learning_rate=LR, beta_1=0.5, beta_2=0.9 , clipvalue=10.0
    ),
    g_optimizer = tf.optimizers.Adam(
    learning_rate=LR, beta_1=0.5, beta_2=0.9 , clipvalue=10.0
    )
)

In [None]:
################## show model ######################
if DISPLAY == 'map' :
    from tensorflow.keras.utils import model_to_dot
    from IPython.display import SVG, display
    
    def display_model(model, width=1024, height=512):
        dot = model_to_dot(model, show_shapes=True, show_layer_names=True)
        svg_data = dot.create(prog='dot', format='svg').decode("utf-8")
        svg_html = f'<div style="width:{width}px;height:{height}px;">{svg_data}</div>'
        display(HTML(svg_html))

In [None]:
## Example usage:
## Display the generator model with reduced size
if DISPLAY == 'map' :
    display_model(model.generator, width, height=512)
if DISPLAY == 'summary' :
    model.generator.summary()

In [None]:
if DISPLAY == 'map' :
    display_model(model.discriminator, width=2500, height=512)
if DISPLAY == 'summary' :
    model.discriminator.summary()

In [None]:
#================== loadmodel ====================

In [None]:
from tensorflow.keras.models import load_model
model_path = MODEL_PATH+f'{PROJECT}'
if not RESUME :
    if os.path.exists(model_path) :
        shutil.rmtree(model_path)
if not os.path.exists(model_path):
    os.makedirs(model_path)
    model.generator.save(model_path+f'/'+f'generator.keras') 
    model.discriminator.save(model_path+f'/'+f'discriminator.keras') 
else:
    model.generator = load_model(model_path+'/'+f'generator.keras') 
    model.discriminator = load_model(model_path+'/'+f'discriminator.keras') 

In [None]:
# ================== train model ==================
PartitionDate = [ d[:-3] for d in  os.listdir(H5_PATH)]
random.shuffle(PartitionDate)
#SET MODEL VAR
STACK_DATA = STACK_DATA_SHAPE 
#INIT MODEL VAR
STOP_MODEL = False

#set PartitionDate[:] for limit range
for partdate in PartitionDate[:] :
    clear_output(wait=False)
    DATA = []
    with h5py.File(H5_PATH+partdate+".h5", 'r') as f:
        DATA = f[partdate][:]
    data_shape = DATA.shape
    ###transform
    if CONFIG['transform'] :
        #other index
        for i, c in enumerate (SCALER_COL):
            DATA[:,:,[i]] = SCALER.groupTransform(c,DATA[:,:,[i]].reshape(-1,1)
                                                ,QUOTE_COL = QUOTE_COL
                                               ,VEGA_COL = VEGA_COL
                                               ,VOLUME_COL = VOLUME_COL
                                                 ).reshape(-1, 16, 1)
    DATA = np.vstack((DATA ,STACK_DATA))

    if len(DATA) < 64 :
        #stack data
        STACK_DATA = np.vstack((STACK_DATA ,DATA))
    else: 
        STACK_DATA = np.empty((0,) + data_shape[1:] )
        X = DATA[:, :, select_x]  # เลือกข้อมูล select_x สำหรับ X
        Y = DATA[:, :, select_y]  # เลือกข้อมูล select_y เสำหรับ Y
        x_train, x_val, y_train, y_val = train_test_split(X, Y, test_size=0.2, random_state=42)
        random.shuffle(PartitionDate)
        tf.keras.backend.clear_session() 
        history = model.fit(x_train , y_train, epochs=CONFIG['epochs'], batch_size=BATCH_SIZE, validation_data=(x_val, y_val) ,callbacks=[tensorboard_callback])
        if  np.isnan(  np.average( history.history['generator_loss'] )  ) or np.isnan(  np.average( history.history['discriminator_loss'] )  ):
            STOP_MODEL = True 
    
        if WANDB_LOG :
            LogKeys = history.history.keys()
            LogVal={}
            for k in LogKeys:  
                LogVal[k] = np.average(  history.history[k] )
            wandb.log(LogVal, commit=True)
    if STOP_MODEL :
        break
    
            
    model.generator.save(model_path+f'/'+f'generator.keras') 
    model.discriminator.save(model_path+f'/'+f'discriminator.keras') 
if WANDB_LOG : wandb.finish()

In [None]:
`====================== GET ZERO Val. ========================

In [None]:
D_ZERO = np.full((1,16, 14), 1.00e-08)
for i, c in enumerate (SCALER_COL):
    D_ZERO[:,:,[i]] = SCALER.groupTransform(c,D_ZERO[:,:,[i]].reshape(-1,1)
                                           ,QUOTE_COL = QUOTE_COL
                                           ,VEGA_COL = VEGA_COL
                                           ,VOLUME_COL = VOLUME_COL
                                           ).reshape(-1, 16, 1)
a=pd.DataFrame(  
    D_ZERO.reshape(16, 14)
,columns=SCALER_COL)
a[:1]

In [None]:
====================== CheckData =======================

In [None]:
PartitionDate = [ d[:-3] for d in  os.listdir(H5_PATH)]
random.shuffle(PartitionDate)
for partdate in PartitionDate[:1] :
    DATA = []
    with h5py.File(H5_PATH+partdate+".h5", 'r') as f:
        DATA = f[partdate][:]
        DATA_ORIGINAL = f[partdate][:]
    data_shape = DATA.shape
    print(f"CONFIG['transform'] : {CONFIG['transform']}")
    for i, c in enumerate (SCALER_COL):
        DATA[:,:,[i]] = SCALER.groupTransform(c,DATA[:,:,[i]].reshape(-1,1)
                                               ,QUOTE_COL = QUOTE_COL
                                               ,VEGA_COL = VEGA_COL
                                               ,VOLUME_COL = VOLUME_COL
                                               ).reshape(-1, 16, 1)


In [None]:
col_x = [SCALER_COL[i] for i in select_x]
col_y = [SCALER_COL[i] for i in select_y]
DATA.shape

In [None]:
DATA[:,:,SCALER_QUOTE_COL_INDEX][0]

In [None]:
DATA_ORIGINAL[:,:,SCALER_QUOTE_COL_INDEX][0]

In [None]:
for i in SCALER_QUOTE_COL_INDEX:
    DATA[:,:,i] = SCALER()['QUOTE'].inverse_transform(DATA[:,:,i] )
DATA[:,:,SCALER_QUOTE_COL_INDEX] [0]

In [None]:
======================== predict =========================

In [None]:
PartitionDate = [ d[:-3] for d in  os.listdir(H5_PATH)]
random.shuffle(PartitionDate)
for partdate in PartitionDate[:1] :
    DATA = []
    with h5py.File(H5_PATH+partdate+".h5", 'r') as f:
        DATA = f[partdate][:]
        DATA_ORIGINAL = f[partdate][:]
    data_shape = DATA.shape
    print(f"CONFIG['transform'] : {CONFIG['transform']}")
    for i, c in enumerate (SCALER_COL):
        DATA[:,:,[i]] = SCALER.groupTransform(c,DATA[:,:,[i]].reshape(-1,1)
                                               ,QUOTE_COL = QUOTE_COL
                                               ,VEGA_COL = VEGA_COL
                                               ,VOLUME_COL = VOLUME_COL
                                               ).reshape(-1, 16, 1)


In [None]:
col_x = [SCALER_COL[i] for i in select_x]
col_y = [SCALER_COL[i] for i in select_y]

In [None]:
X = DATA[:, :, select_x][:]
Y_real = DATA[:, :, select_y][:]
#===========
# X = x_train
# Y_real = y_train
# #===========
dfX = pd.DataFrame(
    X[:1].reshape(16, len(select_x)), 
    columns=col_x)
#print(dfX)

In [None]:
genVal = model.generator(X) 
disVal = model.discriminator(genVal, training=False)

In [None]:
genVal.shape

In [None]:
========================== _LOSS_ANAYSIS ================================

In [None]:
-tf.reduce_mean(disVal)

In [None]:
data_dict = {
            [SCALER_COL[i] for i in select_y][i]: genVal[:, :, i]
            for i in range(len([SCALER_COL[i] for i in select_y]))
        }

In [None]:
additional_con_loss_c_1 = tf.reduce_mean(
            tf.maximum(data_dict["C_ASK"] - data_dict["C_BID"], 0.0)
        )
additional_con_loss_c_1

In [None]:
===

In [None]:
additional_con_loss_c_2 = []
for c, v in [("C_BID", -4.999975e-14), ("C_ASK", -4.999975e-14)]:
    # tf mark with zero
    mask = tf.greater(data_dict[c], v)
    filtered_tensor = tf.boolean_mask(data_dict[c], mask)
    filtered_tensor_roll = tf.roll(filtered_tensor, shift=-1, axis=0)
    difference = tf.maximum(
        filtered_tensor_roll[:-1] - filtered_tensor[:-1], 0.0
    )
    additional_con_loss_c_2.append(tf.reduce_sum(difference))
additional_con_loss_c_2 = tf.reduce_mean(additional_con_loss_c_2)
additional_con_loss_c_2

In [None]:
additional_con_loss_p_2 = []
for p, v in [("P_BID", -4.999975e-14), ("P_ASK", -4.999975e-14)]:
    # tf mark with zero
    mask = tf.greater(data_dict[p], v)
    filtered_tensor = tf.boolean_mask(data_dict[p], mask)
    filtered_tensor_roll = tf.roll(filtered_tensor, shift=-1, axis=0)
    difference = tf.maximum(
        filtered_tensor[:-1] - filtered_tensor_roll[:-1], 0.0
    )
    additional_con_loss_p_2.append(tf.reduce_sum(difference))
additional_con_loss_p_2 = tf.reduce_mean(additional_con_loss_p_2)
additional_con_loss_p_2

In [None]:
v

In [None]:
data_dict[p][0]

In [None]:
mask[0]

In [None]:
 tf.greater(data_dict[p][0], -4.999975e-14)

In [None]:
==============

In [None]:
dfY = pd.DataFrame(
    genVal.numpy()[:1].reshape(16, len(select_y)), 
    columns=col_y)


In [None]:
npgenVal = genVal.numpy()
for i in range(genVal.shape[-1]):
    npgenVal[:,:,i] = SCALER()['QUOTE'].inverse_transform(npgenVal[:,:,i] )

In [None]:
select_y

In [None]:
pd.DataFrame(npgenVal[0],columns=col_y)

In [None]:
Y_invT = Y_real
for i in range(Y_invT.shape[-1]):
    Y_invT[:,:,i] = SCALER()['QUOTE'].inverse_transform(Y_real[:,:,i] )
pd.DataFrame(Y_invT[0],columns=col_y)

In [None]:
pd.DataFrame(DATA_ORIGINAL[:,:,select_y][0],columns=col_y)

In [None]:
SUMDF = pd.concat([dfX, dfY],axis=1)

In [None]:
resultDF = pd.DataFrame([])
rm_col = []
for i in SCALER_COL:
    if i in SUMDF.columns:
         resultDF[i] = SUMDF[i]
    else:
        rm_col += [i]
        resultDF[i] = [1e-8]*16


In [None]:
if CONFIG['transform'] :
    decode_transformed=Scaler.inverse_transform(
        resultDF
    )
    decode_transformed=pd.DataFrame(  
        decode_transformed
    ,columns=SCALER_COL).drop(columns=rm_col)
    decode_transformed.loc[decode_transformed['DTE'] < 1e-8] = 0
    decode_transformed[col_y] = decode_transformed[col_y].round(2)
decode_transformed

In [None]:
========================

In [None]:
tdVal = tf.constant( resultDF[['C_BID','C_ASK']].values )

In [None]:
mask = tf.greater(tdVal[:,0], -0.502343)

In [None]:
filtered_tensor = tf.boolean_mask(tdVal[:,0], mask)
filtered_tensor

In [None]:
filtered_tensor_roll = tf.roll(filtered_tensor, shift=-1, axis=0)
filtered_tensor_roll

In [None]:
difference = tf.maximum(
                filtered_tensor_roll[:-1] - filtered_tensor[:-1], 0.0
            )
difference

In [None]:
======================= real data ==============================

In [None]:
if CONFIG['transform'] :
    realdecode_transformed=Scaler.inverse_transform(
        DATA[0]
    )
    realdecode_transformed=pd.DataFrame(  
        realdecode_transformed
    ,columns=SCALER_COL).drop(columns=rm_col)
    realdecode_transformed.loc[realdecode_transformed['DTE'] <= 1e-8] = 0
    

In [None]:
realdecode_transformed

In [None]:
9.999994e-09

In [None]:
======================= _compute_loss ==============================

In [None]:
#generated_data = [c_bid, c_ask, c_volume, p_bid, p_ask, p_volume]
colList = ["c_bid", "c_ask", "c_volume", "p_bid", "p_ask", "p_volume"]
generated_data = decode_data[3:]
z_mean    = z_mean
z_log_var = log_var
Y_real    = DATA[:, :, 3:][:1]

In [None]:
z_mean

In [None]:
for  col,genData in zip(colList,generated_data):
    print( colList.index(col),col )

In [None]:
subtract_genData = genData - tf.cast(tf.expand_dims(Y_real[:, :, colList.index(col)], axis=-1)
        , tf.float32) 

In [None]:
reconstruction_values_total = []
reconstruction_values_total.append( tf.reduce_mean( tf.square(subtract_genData)   ) )

In [None]:
log_var = tf.clip_by_value(log_var, -1.0, 1.0)
kl_loss = -0.5 * tf.reduce_sum(1 + log_var - tf.square(z_mean) - tf.exp(log_var), axis=-1)

In [None]:
tf.reduce_mean(reconstruction_values_total + kl_loss)

In [None]:
========== kiras vae origi

In [None]:
Y_real[0][0]

In [None]:
 tf.concat(decode_data, axis=-1).numpy()[0][0]

In [None]:
Y_real[0] -  tf.concat(decode_data, axis=-1).numpy()

In [None]:
features_loss = tf.reduce_mean(
    tf.reduce_sum(
        tf.keras.losses.mean_squared_error(Y_real, tf.concat(decode_data, axis=-1)),
        axis=(1),
    )
)

In [None]:
features_loss

In [None]:
# features_loss = tf.reduce_mean(
#     tf.reduce_sum(
#         keras.losses.categorical_crossentropy(features_real, features_gen),
#         axis=(1),
#     )
# )
# kl_loss = -0.5 * tf.reduce_sum(
#     1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var), 1
# )
# kl_loss = tf.reduce_mean(kl_loss)

# property_loss = tf.reduce_mean(
#     keras.losses.binary_crossentropy(qed_true, qed_pred)
# )

# graph_loss = self._gradient_penalty(graph_real, graph_generated)

# return kl_loss + property_loss + graph_loss + adjacency_loss + features_loss

In [None]:
======================= inverse_transform ========================

In [None]:
#add 0
decode_data = [tf.zeros([1, 32, 1])]*3 + decode_data

In [None]:
invert_decode = Scaler.inverse_transform(
    np.array([d.numpy().reshape(-1) for d in decode_data]).transpose()
    ) 

In [None]:
pd.DataFrame(
    invert_decode[:,3:], 
    columns=SCALER_COL[3:])

In [None]:
=====================================================================

In [None]:
DATA.reshape(-1,data_shape[-1])

In [None]:
data_shape

In [None]:
import numpy as np
matrix = np.array([
[1,2,3,],
[4,5,6],
[7,8,9]
])

In [None]:
matrix = tf.convert_to_tensor(matrix)
    
# Shift matrix to compare each row with the next row
matrix_next = tf.roll(matrix, shift=-1, axis=0)

# Ignore the last row for comparison as it rolls over to the first row
matrix = matrix[:-1]
matrix_next = matrix_next[:-1]

# Compute the difference between each row and the next row
difference = matrix_next - matrix

In [None]:
matrix_next

In [None]:
matrix

In [None]:
difference

In [None]:
import json


In [None]:
json.dumps( """ WHERE 
Count_Date < LEFT( DATEADD( month,2,CAST('$timestamp' AS date) ),7)+'-01')
AND Count_Date > LEFT( DATEADD( year,-2,CAST('$timestamp' AS date) ),4)+'-01-01' 
""")