In [1]:
# -*- coding: utf-8 -*-
from io import BytesIO
from tensorflow.python.lib.io import file_io
from tqdm.auto import tqdm
from configuration import Config
from Simulation.model import LuxuryDiceSimulationMdn as Model
from Simulation.loss import MdnLoss

import os
import numpy as np
import tensorflow as tf
import warnings
warnings.filterwarnings("ignore")

In [2]:
#
# Configuration Loading
# ----------------------------------------------------------------------------------------------------------------------
config = Config(os.path.join(os.getcwd(), "Simulation/config.yaml"))

# Set GPU as available physical device
if gpus := tf.config.experimental.list_physical_devices(device_type='GPU'):
    tf.config.experimental.set_visible_devices(devices=gpus[0], device_type='GPU')

---------------------------------- APP CONFIG ----------------------------------
data: 
  data_path: gs://bin_for_aiops/GambleMaster/LuxuryDice/luxury_dice.npz
  time_length: 15
  train_test_split: 0.8
train: 
  batch_size: 64
  epoch: 1000
test: 
  batch_size: 128
model: 
  loss: MDN
  metrics: MDN
  alpha: 3
optimizer: 
  method: adam
  learning_rate: 1e-4
weights: 
  simulation: weights/best_luxury_dice_simulation_model.tf
--------------------------------------------------------------------------------


2023-12-13 08:51:15.842665: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-12-13 08:51:15.854497: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-12-13 08:51:15.854732: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


In [3]:
if __name__ == "__main__":
    # Data Preparation
    # x = np.random.rand(32, 15, 3)
    # x = tf.cast(x, tf.float32)
    # time_encode = np.random.rand(32, 2)
    # time_encode = tf.cast(time_encode, tf.float32)
    # y = np.random.rand(32, 13)
    # y = tf.cast(y, tf.float32)
    f = BytesIO(file_io.read_file_to_string(config.data.data_path, binary_mode=True))
    data = np.load(f)
    x, time_encode, y = data["record"], data["time_code"], data["y"]
    x = tf.cast(x, tf.float32)
    time_encode = tf.cast(time_encode, tf.float32)
    y = tf.cast(y, tf.float32)

    k = int(config.data.train_test_split * x.shape[0])
    x_train, x_test = x[:k, :, :], x[k:, :, :]
    time_encode_train, time_encode_test = time_encode[:k, :], time_encode[k:, :]
    y_train, y_test = y[:k, :], y[k:, :]

    training_data = tf.data.Dataset.from_tensor_slices((x_train, time_encode_train, y_train))
    training_batch = training_data.batch(config.train.batch_size)
    testing_data = tf.data.Dataset.from_tensor_slices((x_test, time_encode_test, y_test))
    testing_batch = testing_data.batch(config.train.batch_size)

    #
    # Create model (BetSimulation)
    # ----------------------------------------------------------------------------------------------------------------------
    model = Model()

    if config.optimizer.method == 'sgd':
        optimizer = tf.keras.optimizers.SGD(learning_rate=float(config.optimizer.learning_rate))
    else:
        optimizer = tf.keras.optimizers.Adam(learning_rate=float(config.optimizer.learning_rate))

    #
    # Loss
    # ----------------------------------------------------------------------------------------------------------------------
    # kld = tf.keras.losses.KLDivergence()
    mdn_loss = MdnLoss(reduce=False)

    #
    # Train Model
    # ----------------------------------------------------------------------------------------------------------------------
    train_losses = []
    test_losses = []
    best_train_loss = float("inf")
    best_valid_loss = float("inf")
    for e in range(config.train.epoch):
        train_loss_cache = []
        test_loss_cache = []
        for x, time, y in tqdm(training_batch, desc="Training"):
            with tf.GradientTape() as tape:
                # y_hat = model(x, time, training=True)
                # train_loss = tf.keras.losses.MSE(y[:,:1], y_hat[:,:1]) * float(config.model.alpha) + kld(y[:,1:], y_hat[:,1:])
                y_mu, y_sigma = model(x, time, training=True)
                # neg_log_pdf = mdn_loss(y_mu, y_sigma, y)
                # train_loss = tf.math.reduce_mean(neg_log_pdf[:, :1]) + tf.math.reduce_mean(
                #     neg_log_pdf[:, 1:])
                train_loss = tf.math.reduce_mean(mdn_loss(y_mu[:, :1], y_sigma[:, :1], y[:, :1], "Normal")) + tf.math.reduce_mean(mdn_loss(y_mu[:, 1:], y_sigma[:, 1:], y[:, 1:], "Beta"))

            gradients = tape.gradient(train_loss, model.trainable_variables)
            # print(gradients)
            optimizer.apply_gradients(zip(gradients, model.trainable_variables))
            train_loss_cache.append(train_loss.numpy())
            # break

        for x, time, y in tqdm(testing_batch, desc="Testing"):
            # y_hat = model(x, time)
            # test_loss = tf.keras.losses.MSE(y[:,:1], y_hat[:,:1]) * float(config.model.alpha) + kld(y[:,1:], y_hat[:,1:])
            y_mu, y_sigma = model(x, time)
            # neg_log_pdf = mdn_loss(y_mu, y_sigma, y)
            # test_loss = tf.math.reduce_mean(neg_log_pdf[:, :1]) * float(config.model.alpha) + tf.math.reduce_mean(
            #     neg_log_pdf[:, 1:])
            test_loss = tf.math.reduce_mean(mdn_loss(y_mu[:, :1], y_sigma[:, :1], y[:, :1], "Normal")) + tf.math.reduce_mean(mdn_loss(y_mu[:, 1:], y_sigma[:, 1:], y[:, 1:], "Beta"))
            test_loss_cache.append(test_loss.numpy())
            # break

        train_loss_epoch = np.mean(train_loss_cache)
        test_loss_epoch = np.mean(test_loss_cache)
        train_losses.append(train_loss_epoch)
        test_losses.append(test_loss_epoch)
        
        print('Epoch: {}/{}\ttrain_loss: {:.6f}\ttest_loss: {:.6f}'.
              format(e + 1, config.train.epoch, train_loss_epoch, test_loss_epoch))
        # break


KeyboardInterrupt

