In [1]:
import tensorflow as tf
import numpy as np

In [2]:
age = tf.constant([[11.], [22.], [33.], [92.], [18.], [50.]])
discretization = tf.keras.layers.Discretization(bin_boundaries=[18., 50.])

age_categories = discretization(age)
age_categories

<tf.Tensor: shape=(6, 1), dtype=int64, numpy=
array([[0],
       [1],
       [1],
       [2],
       [1],
       [2]])>

In [3]:
age = tf.constant([[10.], [88.], [77.], [16.], [45.], [25.]])
discretize_layer = tf.keras.layers.Discretization(bin_boundaries=[18., 35., 65.])
age_categories = discretize_layer(age)
print(age_categories)

tf.Tensor(
[[0]
 [3]
 [3]
 [0]
 [2]
 [1]], shape=(6, 1), dtype=int64)


In [6]:
onehot_layer_v2 = tf.keras.utils.to_categorical(age_categories, 4)
onehot_layer_v2

array([[1., 0., 0., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [1., 0., 0., 0.],
       [0., 0., 1., 0.],
       [0., 1., 0., 0.]], dtype=float32)

In [4]:
onehot_layer = tf.keras.layers.CategoryEncoding(num_tokens=4)
print(onehot_layer(age_categories))

tf.Tensor(
[[1. 0. 0. 0.]
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]
 [1. 0. 0. 0.]
 [0. 0. 1. 0.]
 [0. 1. 0. 0.]], shape=(6, 4), dtype=float32)


In [None]:
two_age_categories = np.array([[3, 1], [3, 0], [2, 1]])
print(onehot_layer(two_age_categories))

tf.Tensor(
[[0. 1. 0. 1.]
 [1. 0. 0. 1.]
 [0. 1. 1. 0.]], shape=(3, 4), dtype=float32)


In [None]:
two_age_categories = np.array([[3, 1], [3, 0], [2, 1]])
onehot_layer = tf.keras.layers.CategoryEncoding(num_tokens=4 + 4)
print(onehot_layer(two_age_categories + [0, 4]))

tf.Tensor(
[[0. 0. 0. 1. 0. 1. 0. 0.]
 [0. 0. 0. 1. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 1. 0. 0.]], shape=(3, 8), dtype=float32)


In [8]:
cities = ["Brussel", "Aalst", "Gent", "Kortrijk", "Gent"]
str_lookup_layer = tf.keras.layers.StringLookup()
str_lookup_layer.adapt(cities)
print(str_lookup_layer([["Kortrijk"], ["Aalst"], ["Brussel"], ["Brugge"], ["Gent"]]))

tf.Tensor(
[[2]
 [4]
 [3]
 [0]
 [1]], shape=(5, 1), dtype=int64)


In [None]:
str_lookup_layer = tf.keras.layers.StringLookup(output_mode="one_hot")
str_lookup_layer.adapt(cities)
print(str_lookup_layer([["Aalst"], ["Kortrijk"], ["Gent"], ["Aalst"]]))

tf.Tensor(
[[0. 0. 0. 0. 1.]
 [0. 0. 1. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 1.]], shape=(4, 5), dtype=float32)


In [None]:
str_lookup_layer = tf.keras.layers.StringLookup(num_oov_indices=5)
str_lookup_layer.adapt(cities)
print(str_lookup_layer([["Gent"], ["Hasselt"], ["Brugge"], ["Kortrijk"], ["Brussel"]]))

tf.Tensor(
[[5]
 [1]
 [1]
 [6]
 [7]], shape=(5, 1), dtype=int64)


In [None]:
train_data = ["Amai dat weet ik niet", "Amai, amai, amai.","Dat weet je niet", "Weet je dat niet? Amai"]
text_vec_layer = tf.keras.layers.TextVectorization()
text_vec_layer.adapt(train_data)
print(text_vec_layer(["Je weet dat", "Tja, ik weet dat niet"]))



tf.Tensor(
[[6 3 5 0 0]
 [1 7 3 5 4]], shape=(2, 5), dtype=int64)


In [None]:
print(text_vec_layer(["Weet zij dat? Amai", "Neen, ik wist dat"]))

tf.Tensor(
[[3 1 5 2]
 [1 7 1 5]], shape=(2, 4), dtype=int64)


------
Code from theorie slides

In [None]:
train_data = ["To be", "!(to be)", "That's the question", "Be, be, be. "]
text_vec_layer = tf.keras.layers.TextVectorization()
text_vec_layer.adapt(train_data)
text_vec_layer(["Be good!", "Question: be or be?"])

<tf.Tensor: shape=(2, 4), dtype=int64, numpy=
array([[2, 1, 0, 0],
       [6, 2, 1, 2]])>

In [None]:
text_vec_layer.get_vocabulary()

['', '[UNK]', 'be', 'to', 'the', 'thats', 'question']

In [None]:
train_data = ["To be", "!(to be)", "That's the question", "Be, be, be. "]
text_vec_layer = tf.keras.layers.TextVectorization(output_mode="tf_idf")
text_vec_layer.adapt(train_data)
text_vec_layer(["Be good!", "Question: be or be?"])

<tf.Tensor: shape=(2, 6), dtype=float32, numpy=
array([[0.96725637, 0.6931472 , 0.        , 0.        , 0.        ,
        0.        ],
       [0.96725637, 1.3862944 , 0.        , 0.        , 0.        ,
        1.0986123 ]], dtype=float32)>

In [None]:
text_vec_layer.get_vocabulary()

['[UNK]', 'be', 'to', 'the', 'thats', 'question']

In [None]:
text_vec_layer(["unk be to thats the question"])

<tf.Tensor: shape=(1, 6), dtype=float32, numpy=
array([[0.96725637, 0.6931472 , 0.84729785, 1.0986123 , 1.0986123 ,
        1.0986123 ]], dtype=float32)>