In [35]:
import os

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"  
os.environ["CUDA_VISIBLE_DEVICES"]="2"

import warnings
warnings.filterwarnings('ignore')
from typing import Dict, List, Text
from collections import namedtuple

import tensorflow as tf
import tensorflow_data_validation as tfdv
import tensorflow_transform as tft
from tensorflow.keras.models import Sequential
import re

from tensorflow.keras.layers import (Dense, Dropout, Embedding, Lambda, Activation, BatchNormalization, Input) 

import numpy as np
import pandas as pd
from tqdm import tqdm
from train import build_callbacks

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
%load_ext autoreload
%autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
name = 'transform'
tr_dir = f'./tfx/pipelines/{name}/Transform'
tr_id = 4

shema_dir = os.path.join(tr_dir, f'post_transform_schema/{tr_id}/schema.pbtxt')
schema = tfdv.load_schema_text(shema_dir)
tfdv.display_schema(schema)

Unnamed: 0_level_0,Type,Presence,Valency,Domain
Feature name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
'big_columns',FLOAT,required,,-
'categorical',INT,required,,-
'medium_columns',FLOAT,required,,-
'small_columns',FLOAT,required,,-
'target',FLOAT,required,,-
'z_normal',FLOAT,required,,-


In [5]:
output_dir = os.path.join(tr_dir, f'transform_graph/{tr_id}/')
tf_transform_output = tft.TFTransformOutput(output_dir)
feature_spec = tf_transform_output.transformed_feature_spec()
feature_spec

{'big_columns': FixedLenFeature(shape=[10], dtype=tf.float32, default_value=None),
 'categorical': FixedLenFeature(shape=[1], dtype=tf.int64, default_value=None),
 'medium_columns': FixedLenFeature(shape=[40], dtype=tf.float32, default_value=None),
 'small_columns': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 'target': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 'z_normal': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None)}

In [6]:
train_file_pattern = os.path.join(tr_dir, f'transformed_examples/{tr_id}/Split-train/transformed_examples-00000-of-00001.gz')
val_file_pattern = os.path.join(tr_dir, f'transformed_examples/{tr_id}/Split-eval/transformed_examples-00000-of-00001.gz')

AUTOTUNE = tf.data.AUTOTUNE

def read_records(file_pattern, feature_spec, batch=50):
    return tf.data.TFRecordDataset(file_pattern, compression_type='GZIP') \
        .map(lambda x: tf.io.parse_example(x, feature_spec)) \
        .batch(batch) \
        .prefetch(buffer_size=tf.data.AUTOTUNE)

def split(x):
    xc = x.copy()
    target = xc.pop('target')
    return xc, target / 100.0

train_dataset = read_records(train_file_pattern, feature_spec).map(split).prefetch(buffer_size=AUTOTUNE)
val_dataset = read_records(val_file_pattern, feature_spec).map(split).prefetch(buffer_size=AUTOTUNE)

In [19]:
for x, y  in val_dataset:
    break
# y

## Model dev cells

In [20]:
def batch_block_builder(input_size, block_size):
    
    inputs = Input(shape=(input_size,))
    x1 = Dense(block_size)(inputs)
    x1 = BatchNormalization()(x1)
    x1 = Activation('relu')(x1)
    
    return tf.keras.models.Model(inputs=inputs, outputs=x1) 


class CustomModel(tf.keras.models.Model):
    
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        
        self.big_dense = tf.keras.models.Sequential([
            batch_block_builder(10, 20),
            batch_block_builder(20, 20),
            batch_block_builder(20, 5),
            ])
        self.medium_dense = tf.keras.models.Sequential([
            batch_block_builder(40, 80),
            batch_block_builder(80, 80),
            batch_block_builder(80, 20),
            ])
        self.small_dense = tf.keras.models.Sequential([
            batch_block_builder(1, 5),
            batch_block_builder(5, 5),
            batch_block_builder(5, 1),
            ])
        self.z_norm_dense = tf.keras.models.Sequential([
            batch_block_builder(1, 5),
            batch_block_builder(5, 5),
            batch_block_builder(5, 1),
            ])
        self.cat_embedding = tf.keras.models.Sequential([
            Embedding(2, 5, name='cats'),
            Lambda(lambda x: x[:, 0, :]),
            batch_block_builder(5, 5),
            batch_block_builder(5, 5),
            batch_block_builder(5, 1),
        ])
            
        self.pred_submodel = tf.keras.models.Sequential([
            Dense(10, name='reducer'),
            BatchNormalization(),
            Activation('relu'),
            Dense(1, name='preds'),
            ])
    
    def call(self, x: Dict[Text, tf.Tensor], trainig=False) -> tf.Tensor:
           
        features = []
        features.append(self.z_norm_dense(x['z_normal']))
        features.append(self.small_dense(x['small_columns']))
        features.append(self.medium_dense(x['medium_columns']))
        features.append(self.big_dense(tf.cast(x['big_columns'], tf.float32)))
        features.append(self.cat_embedding(x['categorical']))
        
        features = tf.concat(features, axis=-1)
        preds = self.pred_submodel(features)
        
        return preds
    
model = CustomModel()
model(x)

model.summary()

Model: "custom_model_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential_15 (Sequential)  (None, 5)                 925       
                                                                 
 sequential_16 (Sequential)  (None, 20)                12100     
                                                                 
 sequential_17 (Sequential)  (None, 1)                 90        
                                                                 
 sequential_18 (Sequential)  (None, 1)                 90        
                                                                 
 sequential_19 (Sequential)  (None, 1)                 120       
                                                                 
 sequential_20 (Sequential)  (50, 1)                   341       
                                                                 
Total params: 13666 (53.38 KB)
Trainable params: 131

2024-07-18 09:49:20.536333: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:606] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


In [21]:
tf.keras.backend.clear_session()

model_name = 'model_batch_norm_true_ref_x6'

## Build different model
model = CustomModel()
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
            initial_learning_rate=4e-3,
            decay_steps=10000,
            decay_rate=0.98)    
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

model.compile(loss='mean_squared_error',
                  optimizer=optimizer, 
                  metrics=[tf.keras.metrics.MeanAbsoluteError(),
                           tf.keras.metrics.MeanAbsolutePercentageError()])    
callbacks = build_callbacks(f'{model_name}', 
                            monitor='val_mean_absolute_percentage_error',
                            mode='min')

initial = 0
total = 20
model.fit(
  train_dataset,
  validation_data=val_dataset,
  initial_epoch=initial,
  epochs=total,
  verbose=1, 
  callbacks=callbacks)

Epoch 1/20


2024-07-18 09:49:36.246795: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0xbcd2920 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-07-18 09:49:36.246849: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 3090, Compute Capability 8.6
2024-07-18 09:49:36.255691: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:255] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-07-18 09:49:36.277300: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8907
2024-07-18 09:49:36.462910: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


   1440/Unknown - 55s 26ms/step - loss: 0.0620 - mean_absolute_error: 0.1855 - mean_absolute_percentage_error: 318.6889

2024-07-18 09:50:25.668304: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous recv item cancelled. Key hash: 4961231836824512645
2024-07-18 09:50:25.668375: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous recv item cancelled. Key hash: 11925116166579186899
2024-07-18 09:50:25.668397: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous recv item cancelled. Key hash: 8118860245551523550



Epoch 1: val_mean_absolute_percentage_error improved from inf to 316.00253, saving model to ./checkpoints/model_batch_norm_true_ref_x6_epoch-01_val_mean_absolute_percentage_error-316.00253.h5
Epoch 2/20
Epoch 2: val_mean_absolute_percentage_error improved from 316.00253 to 61.79787, saving model to ./checkpoints/model_batch_norm_true_ref_x6_epoch-02_val_mean_absolute_percentage_error-61.79787.h5
Epoch 3/20
Epoch 3: val_mean_absolute_percentage_error improved from 61.79787 to 37.51743, saving model to ./checkpoints/model_batch_norm_true_ref_x6_epoch-03_val_mean_absolute_percentage_error-37.51743.h5
Epoch 4/20
Epoch 4: val_mean_absolute_percentage_error improved from 37.51743 to 21.22722, saving model to ./checkpoints/model_batch_norm_true_ref_x6_epoch-04_val_mean_absolute_percentage_error-21.22722.h5
Epoch 5/20
Epoch 5: val_mean_absolute_percentage_error did not improve from 21.22722
Epoch 6/20
Epoch 6: val_mean_absolute_percentage_error did not improve from 21.22722
Epoch 7/20
Epoch 7

<keras.src.callbacks.History at 0x7fbbe467bbb0>

In [22]:
model.load_weights('./checkpoints/model_batch_norm_true_x5_epoch-13_val_mean_absolute_percentage_error-16.05021.h5')

## Check outputs by hands

In [24]:
for x1, y in val_dataset.take(1):
    pass
tf.reduce_mean(tf.abs(model(x1) - y) * 100)

<tf.Tensor: shape=(), dtype=float32, numpy=1.5244832>

In [25]:
pd.DataFrame({'preds':model(x1).numpy()[:, 0] * 100 , "trues":y.numpy()[:, 0] * 100})

Unnamed: 0,preds,trues
0,21.20504,20.439486
1,50.885403,50.238983
2,57.757652,55.97337
3,95.274605,98.506592
4,61.422546,60.348804
5,57.260883,55.468987
6,39.243706,40.220413
7,16.639652,14.36792
8,48.936169,47.718895
9,95.373772,98.101608


## Save model

In [None]:
tf.saved_model.save(model, './custom_model')