In [1]:
import tensorflow as tf

In [2]:
vocab = ["<1H OCEAN", "INLAND", "NEAR OCEAN", "NEAR BAY", "ISLAND"]
indices = tf.range(len(vocab), dtype= tf.int64)
table_init = tf.lookup.KeyValueTensorInitializer(vocab, indices)
num_of_oov = 2
table = tf.lookup.StaticVocabularyTable(table_init, num_oov_buckets= num_of_oov)

In [4]:
# one-hot vectors
categories = tf.constant(["NEAR OCEAN", "NEAR OCEAN", "ISLAND", "DESERT"])
cat_indices = table.lookup(categories)
cat_indices

<tf.Tensor: shape=(4,), dtype=int64, numpy=array([2, 2, 4, 5])>

In [5]:
cat_one_hot = tf.one_hot(cat_indices, depth= len(vocab) + num_of_oov)
cat_one_hot

<tf.Tensor: shape=(4, 7), dtype=float32, numpy=
array([[0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0.]], dtype=float32)>

In [6]:
# embedding
embedding_dim = 2
embed_init = tf.random.uniform([len(vocab) + num_of_oov, embedding_dim])
embedding_matrix = tf.Variable(embed_init)
embedding_matrix

<tf.Variable 'Variable:0' shape=(7, 2) dtype=float32, numpy=
array([[0.17164636, 0.8210461 ],
       [0.07313848, 0.12004721],
       [0.409997  , 0.73943245],
       [0.4075874 , 0.5736309 ],
       [0.95277846, 0.56499803],
       [0.7779032 , 0.5198419 ],
       [0.7662859 , 0.78984904]], dtype=float32)>

In [7]:
categories = tf.constant(["NEAR OCEAN", "NEAR OCEAN", "ISLAND", "DESERT"])
cat_indices = table.lookup(categories)
tf.nn.embedding_lookup(embedding_matrix, cat_indices)

<tf.Tensor: shape=(4, 2), dtype=float32, numpy=
array([[0.409997  , 0.73943245],
       [0.409997  , 0.73943245],
       [0.95277846, 0.56499803],
       [0.7779032 , 0.5198419 ]], dtype=float32)>

In [8]:
# keras embedding layers
embedding = tf.keras.layers.Embedding(input_dim= len(vocab) + num_of_oov, output_dim= embedding_dim)
embedding(cat_indices)

<tf.Tensor: shape=(4, 2), dtype=float32, numpy=
array([[-0.03035202,  0.03076302],
       [-0.03035202,  0.03076302],
       [ 0.02639463, -0.01720481],
       [-0.02153989, -0.02428501]], dtype=float32)>

In [13]:
# full-model implementation of embedding
regular_inputs = tf.keras.layers.Input(shape= [8])
categories = tf.keras.layers.Input(shape= [], dtype= tf.string)
cat_indices = tf.keras.layers.Lambda(lambda cat: table.lookup(cat), output_shape= lambda s: s)(categories)
cat_embedding = tf.keras.layers.Embedding(input_dim= 6, output_dim= 2)(cat_indices)
encoded_inputs = tf.keras.layers.Concatenate()([regular_inputs, cat_embedding])
output = tf.keras.layers.Dense(1)(encoded_inputs)

model = tf.keras.models.Model(inputs= [regular_inputs, categories], outputs= [output])

In [18]:
import tensorflow_datasets as tfds