<a href="https://colab.research.google.com/github/Jersae/Google-TimeSeries-workshop/blob/main/TSSD_TF_Keras_Preprocessing_Using_Adapt().ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import tensorflow as tf

from tensorflow.keras.layers.experimental import preprocessing

In [2]:
print(tf.__version__)

2.4.1


## Using Adapt

In [3]:
data = np.array([[0.1, 0.2, 0.3], [0.8, 0.9, 1.0], [1.5, 1.6, 1.7],])
norm_layer = preprocessing.Normalization()
norm_layer.adapt(data)
normalized_data = norm_layer(data)

In [4]:
data.mean(), data.std()

(0.8999999999999999, 0.5773502691896257)

In [5]:
print("Features mean: %.2f" % (normalized_data.numpy().mean()))
print("Features std: %.2f" % (normalized_data.numpy().std()))

Features mean: 0.00
Features std: 1.00


### Our Original data transformed

In [6]:
normalized_data

<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[-1.2247449, -1.2247449, -1.2247449],
       [ 0.       ,  0.       ,  0.       ],
       [ 1.2247449,  1.224745 ,  1.224745 ]], dtype=float32)>

## Lets look at doing it for a full dataset

In [7]:
(x_train,y_train),(x_test,y_test) = tf.keras.datasets.cifar10.load_data() #tf.keras.datasets.mnist.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [8]:
x_train = x_train.reshape((len(x_train), -1))
input_shape = x_train.shape[1:]
classes = 10
input_shape

(3072,)

In [9]:
# Create a Normalization layer and set its internal state using the training data
normalizer = preprocessing.Normalization(name="Basic_Normalization")
normalizer.adapt(x_train)

In [10]:
x_train[0]

array([ 59,  62,  63, ..., 123,  92,  72], dtype=uint8)

In [11]:
normalizer(x_train[0])

<tf.Tensor: shape=(3072, 3072), dtype=float32, numpy=
array([[-0.976824  , -1.0569063 , -0.9142917 , ..., -1.041793  ,
        -1.0670669 , -0.83814293],
       [-0.9359588 , -1.0157582 , -0.877001  , ..., -0.99558634,
        -1.0191808 , -0.7927413 ],
       [-0.9223371 , -1.0020422 , -0.86457074, ..., -0.9801841 ,
        -1.0032188 , -0.7776074 ],
       ...,
       [-0.10503357, -0.17907873, -0.11875609, ..., -0.05604991,
        -0.04549665,  0.130426  ],
       [-0.52730703, -0.6042765 , -0.50409365, ..., -0.53351927,
        -0.54031974, -0.33872458],
       [-0.79974157, -0.8785976 , -0.75269854, ..., -0.841564  ,
        -0.85956043, -0.64140236]], dtype=float32)>

In [12]:

# Create a model that include the normalization layer
inputs = tf.keras.Input(shape=input_shape)
x = normalizer(inputs)
outputs = tf.keras.layers.Dense(classes, activation="softmax")(x)
model = tf.keras.Model(inputs, outputs)

# Train the model
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy")


In [13]:
model.fit(x_train, y_train)



<tensorflow.python.keras.callbacks.History at 0x7ffa59f6a650>

In [14]:
normalizer.count_params(), normalizer.get_config()

(6145,
 {'axis': (-1,),
  'batch_input_shape': (None, None),
  'dtype': 'float32',
  'name': 'Basic_Normalization',
  'trainable': True})

In [15]:
model.save('./basic_model.h5',save_format='h5')

In [16]:
ls

basic_model.h5  [0m[01;34msample_data[0m/


## Restarting and loading the saved model

https://www.tensorflow.org/tutorials/keras/save_and_load

In [17]:
import tensorflow as tf

from tensorflow.keras.layers.experimental import preprocessing

In [18]:
model = tf.keras.models.load_model('./basic_model.h5')

In [19]:
# We can see the Normalization layer is there with its parameters having been saved and are not trainable
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 3072)]            0         
_________________________________________________________________
Basic_Normalization (Normali (None, 3072)              6145      
_________________________________________________________________
dense (Dense)                (None, 10)                30730     
Total params: 36,875
Trainable params: 30,730
Non-trainable params: 6,145
_________________________________________________________________


## Catergorical Lookups

In [20]:
# Define some toy data
data = tf.constant(["horse", "dog", "cat", "bee", "bird", "ant"])

In [21]:
# Use StringLookup to build an index of the feature values
indexer = preprocessing.StringLookup()
indexer.adapt(data)

In [22]:
# Use CategoryEncoding to encode the integer indices to a one-hot vector
encoder = preprocessing.CategoryEncoding(output_mode="binary")
encoder.adapt(indexer(data))

In [23]:
# Convert new test data (which includes unknown feature values)
# Note that index 0 is reserved for missing values (which you should specify as the empty string ""), and index 1 is reserved for out-of-vocabulary values (values that were not seen during adapt()).
test_data = tf.constant(["ant", "bee", "cat", "dog", "bird", "elephant","", "whale"])
encoded_data = encoder(indexer(test_data))
print(encoded_data)

tf.Tensor(
[[0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0.]], shape=(8, 8), dtype=float32)
