In [2]:
import tensorflow as tf
import numpy as np
import re

In [14]:
text = "Quo usque tandem abutere, Catilina, patientia nostra? Quamdiu etiam furor iste tuus nos eludet? Quem ad finem sese effrenata iactabit audacia?"

tokenized = re.sub('[,?.]','', text).lower().split(' ') #Let's tokenize our text by just take each word
vocab = {k:v for v,k in enumerate(np.unique(tokenized))}

In [58]:
vocab

{'abutere': 0,
 'ad': 1,
 'audacia': 2,
 'catilina': 3,
 'effrenata': 4,
 'eludet': 5,
 'etiam': 6,
 'finem': 7,
 'furor': 8,
 'iactabit': 9,
 'iste': 10,
 'nos': 11,
 'nostra': 12,
 'patientia': 13,
 'quamdiu': 14,
 'quem': 15,
 'quo': 16,
 'sese': 17,
 'tandem': 18,
 'tuus': 19,
 'usque': 20}

In [59]:
# Now we need to define the Embedding size, so the dimension of each vector, in our case 50, and the vocabulary length

In [60]:
EMBED_SIZE = 50
VOCAB_LEN = len(vocab.keys())

print(VOCAB_LEN)

21


In [68]:
# We know need to define the ids of the words we want to embed. Just for example, we are going to take abutere and patientia

words_ids = tf.constant([vocab["abutere"], vocab["patientia"]])

# words_ids represent the ids of some words in a vocabulary. A vocabulary is a map of words (tokens) to ids.

In [69]:
embeddings = tf.Variable(tf.random_uniform([VOCAB_LEN, EMBED_SIZE]))
embed = tf.nn.embedding_lookup(embeddings, words_ids)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print(sess.run(embed))

[[0.94159853 0.47791886 0.6757828  0.7451253  0.67886007 0.67146075
  0.14859271 0.42767048 0.9956533  0.6282997  0.5976262  0.02285182
  0.94600546 0.9259826  0.5596858  0.74445415 0.9257264  0.00419676
  0.1787237  0.72174835 0.25012636 0.5517918  0.7234329  0.8347409
  0.25397623 0.5264813  0.83136034 0.04862249 0.8449614  0.03566229
  0.5097456  0.7491182  0.26026833 0.8288218  0.18477559 0.82419276
  0.8491255  0.3262117  0.60015523 0.6737932  0.38296258 0.41750813
  0.1519301  0.595335   0.4984163  0.6633427  0.37720454 0.3542471
  0.8745059  0.38002026]
 [0.67347205 0.7986989  0.96490455 0.74781907 0.16274679 0.57981133
  0.13682342 0.8484597  0.6320871  0.44620407 0.7793801  0.95069027
  0.19091463 0.5246465  0.19539988 0.17891133 0.21647453 0.5682733
  0.5725316  0.71901083 0.8867103  0.14238453 0.17147994 0.9695312
  0.5529585  0.26986146 0.67527103 0.73019993 0.8661417  0.52650523
  0.7738441  0.5870944  0.73371124 0.83653367 0.5228075  0.0424813
  0.10963345 0.9285134  0.68

In [70]:
# Using Keras Layer
import tensorflow as tf
embeddings = tf.keras.layers.Embedding(VOCAB_LEN, EMBED_SIZE)
embed = embeddings(words_ids)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print(sess.run(embed))

[[ 0.00303479 -0.02560792 -0.00341245  0.02672969 -0.02263589  0.00113025
   0.03898313  0.00759194  0.03284452  0.03830725  0.0154817   0.00705725
  -0.01198811 -0.04363779 -0.03399888 -0.0059553   0.00060582  0.02232868
   0.03099075 -0.04960848  0.04739812 -0.00901444  0.04343739 -0.02876884
  -0.00928445 -0.01474331 -0.03673764 -0.00927904  0.02522433 -0.00591274
   0.04068582  0.0291919   0.04751973  0.02993834  0.00733216  0.02753672
   0.02186326 -0.02682384 -0.02130427  0.0350327  -0.0122747  -0.00767615
  -0.03839944  0.04837588  0.00409646 -0.03049152 -0.03555983  0.0379563
  -0.04907681 -0.00277138]
 [-0.03026829 -0.00636082 -0.02106096  0.00889608  0.03470941  0.01518322
   0.03276154 -0.04161562 -0.01552086  0.00311099  0.01026328  0.04158516
  -0.02565833  0.00613364  0.00676235 -0.01882094  0.03703779  0.0442528
  -0.0229293  -0.02409744  0.03482821  0.00021745 -0.03824664 -0.0464901
   0.00405698  0.00394703  0.0169821   0.02284623  0.01580958 -0.04783583
   0.02679542 

In [71]:
# using Tensorflow Layers

embed = tf.contrib.layers.embed_sequence(ids=words_ids, vocab_size=VOCAB_LEN, embed_dim=EMBED_SIZE)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print(sess.run(embed))


The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

[[-1.60786927e-01 -1.75933480e-01 -8.23026001e-02 -1.92231983e-01
  -2.39409119e-01 -1.46477431e-01  1.53587371e-01  1.55653387e-01
  -7.78972208e-02  1.20996445e-01 -2.74368942e-01  1.66701645e-01
   2.82338828e-01 -1.49694175e-01  1.44037008e-02  1.23237461e-01
   1.99615955e-04 -2.34681368e-01 -1.78386301e-01  2.66641170e-01
   9.37233269e-02  2.49281377e-01 -1.17825568e-01  1.73913747e-01
  -2.60450423e-01  1.93151921e-01 -2.54842252e-01  1.86541617e-01
   1.90132916e-01  3.91776860e-02  6.50626719e-02  2.72292644e-01
   2.06192672e-01 -1.86209768e-01 -7.54587352e-02 -2.16853917e-01
  -8.09297413e-02 -9.94452238e-02  4.15234