In [3]:
pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.19.0-cp312-cp312-win_amd64.whl.metadata (4.1 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Using cached absl_py-2.2.2-py3-none-any.whl.metadata (2.6 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Using cached astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Using cached flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Using cached gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow)
  Using cached google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Using cached libclang-18.1.1-py2.py3-none-win_amd64.whl.metadata (5.3 kB)
Collecting opt-einsum>=2.3.2 (from tensorflow)
  Using cached opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)
Collecting termcolor>=1.1.0 (from tensorflow)
  Using cached termc

In [5]:
from tensorflow.keras.preprocessing.text import one_hot

In [6]:
## Sententeces
sent = ['the glass of milk',
        'the glass of juice',
        'the cup of tea',
        'I am a good boy',
        'I am a good developer',
        'understand the meaning of words',
        'your videos are good']

In [7]:
# Define the vocabular size
vocab_size = 10000

In [11]:
# One hot representation
# One-hot encoding is a technique used to convert categorical data into a format that can be processed by
# machine learning algorithms. It works by creating a new column for each category in the data, and
# assigning a 1 or 0 to each row depending on whether the row belongs to the category

one_hot_repr = [one_hot(i,vocab_size) for i in sent]
one_hot_repr

[[9764, 3077, 6690, 3333],
 [9764, 3077, 6690, 1912],
 [9764, 8200, 6690, 9444],
 [9330, 3516, 7459, 1662, 907],
 [9330, 3516, 7459, 1662, 4564],
 [3169, 9764, 2669, 6690, 1588],
 [9671, 7598, 6353, 1662]]

In [14]:
# Word Embedding Representation
# ==============================
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
import numpy as np

In [17]:
# The length of the sentences in the sent variable are not constant, to make it constant we do pad_sequence
sent_length = 8
embedded_docs = pad_sequences(one_hot_repr, padding='pre', maxlen=sent_length)

# padding = 'pre' means, the padding will be done from the left side of the sentence, maxlen = 8 means,
# the length of the sentence will be 8. if padding set to 'post', the padding will be done from the right side of the sentence.

# Now the length of the embedded_docs is constant
print(embedded_docs)

[[   0    0    0    0 9764 3077 6690 3333]
 [   0    0    0    0 9764 3077 6690 1912]
 [   0    0    0    0 9764 8200 6690 9444]
 [   0    0    0 9330 3516 7459 1662  907]
 [   0    0    0 9330 3516 7459 1662 4564]
 [   0    0    0 3169 9764 2669 6690 1588]
 [   0    0    0    0 9671 7598 6353 1662]]


In [16]:
# Feature representation
dim = 10

In [18]:
model = Sequential()
model.add(Embedding(vocab_size, dim, input_length=sent_length))
model.compile('adam','mse')



In [19]:
model.predict(embedded_docs)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 523ms/step


array([[[-0.01636187, -0.01171   ,  0.0414528 , -0.00465622,
          0.03513094, -0.02040564, -0.02864522,  0.04363995,
          0.04895959, -0.03355552],
        [-0.01636187, -0.01171   ,  0.0414528 , -0.00465622,
          0.03513094, -0.02040564, -0.02864522,  0.04363995,
          0.04895959, -0.03355552],
        [-0.01636187, -0.01171   ,  0.0414528 , -0.00465622,
          0.03513094, -0.02040564, -0.02864522,  0.04363995,
          0.04895959, -0.03355552],
        [-0.01636187, -0.01171   ,  0.0414528 , -0.00465622,
          0.03513094, -0.02040564, -0.02864522,  0.04363995,
          0.04895959, -0.03355552],
        [ 0.04349371,  0.03057522, -0.03666326,  0.03167571,
          0.00975544, -0.04143808,  0.02863218,  0.04057642,
          0.04427404,  0.0011586 ],
        [ 0.03280786, -0.0428683 , -0.04680184,  0.02224845,
         -0.04694275, -0.04357157, -0.03090893, -0.01182027,
         -0.03756322, -0.03776542],
        [ 0.00413704,  0.04685182, -0.03035638, -0.0

In [20]:
model.summary()