In [None]:
# 🔹 Step 2: ✅ Setup Environment & Libraries
from tensorflow.keras.preprocessing.text import one_hot
# 🔹 Step 3: 📝 Prepare Sentences (Input Data)
### sentences
sent=[ 'the glass of milk',
'the glass of juice',
'the cup of tea',
'I am a good boy',
'I am a good developer',
'understand the meaning of words',
'your videos are good',]

# •	→ These are the example sentences you'll be working with.
# •	→ Each sentence will be converted into vector format.

sent
# ['the glass of milk',
#  'the glass of juice',
#  'the cup of tea',
#  'I am a good boy',
#  'I am a good developer',
#  'understand the meaning of words',
#  'your videos are good']

# 🔹 Step 4: 📏 Vocabulary Size
vocab_size = 10000
# •	→ This means you assume a vocabulary of 10,000 unique words.
# •	→ Useful for indexing words during encoding.

# 🔹 Step 5: 🔢 One Hot Encoding (Index-Based)
one_hot_representation = [one_hot(words,vocab_size) for words in sent] 

# Explain the above code
# •	→ This is a list comprehension that applies the one_hot function to each word in the sentence.
# •	→ The one_hot function converts each word into a one-hot representation.
# •	→ The one_hot function takes two arguments: the word and the vocabulary size.
# •	→ The one_hot function returns a one-hot representation of the word.

one_hot_representation
# [[6086, 1649, 3854, 2592],
#  [6086, 1649, 3854, 7355],
#  [6086, 3942, 3854, 9267],
#  [3828, 1193, 872, 5319, 2893],
#  [3828, 1193, 872, 5319, 6764],
#  [391, 6086, 7592, 3854, 464],
#  [1672, 6762, 789, 5319]]


## word Embedding Representation
# Step 6: 📏 Embedding Layer
from tensorflow.keras.layers import Embedding # 📏 Embedding Layer
#from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import pad_sequences # 📏 Pad Sequences Layer is used to pad the sequences to the same length.
from tensorflow.keras.models import Sequential # 📏 Sequential Model is used to create a sequential model. 
import numpy as np

# 🔹 Step 7: ⏩ Pad Sequences (Equal Length)
sent_length = 8
embedded_docs = pad_sequences(one_hot_representation, padding='pre', maxlen=sent_length)
# •	→ Ensures all sentences are of the same length (8 words) otherwise it will not able to train the RNN model.
# •	→ padding='pre' adds zeros at the start if sentence is short.
# •	→ maxlen=8 sets the maximum length of the sentences to 8.
# •	→ Example:
# o	[0, 0, 0, 0, 6186, 6775, 637, 4895]

print(embedded_docs)
# [[   0    0    0    0 6086 1649 3854 2592]
#  [   0    0    0    0 6086 1649 3854 7355]
#  [   0    0    0    0 6086 3942 3854 9267]
#  [   0    0    0 3828 1193  872 5319 2893]
#  [   0    0    0 3828 1193  872 5319 6764]
#  [   0    0    0  391 6086 7592 3854  464]
#  [   0    0    0    0 1672 6762  789 5319]]


# 🔹 Step 8: ⚙️ Setup Embedding Layer
embedding_vector_features = 10

model = Sequential()
model.add(Embedding(vocab_size, embedding_vector_features, input_length=sent_length))
model.compile(optimizer='adam', loss='mse')
# •	→ Embedding layer takes:
# o	vocab_size: total words it can handle
# o	embedding_vector_features: size of the dense vector (features) for each word
# o	input_length: max sentence length (for RNNs or CNNs)

# 🔹 Step 9: 📊 Model Summary
model.summary()
# Model: "sequential_1"
# ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
# ┃ Layer (type)                    ┃ Output Shape           ┃ Param #       ┃
# ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
# │ embedding_1 (Embedding)         │ ?                      │ 0 (unbuilt)   │
# └─────────────────────────────────┴────────────────────────┴───────────────┘

# 🔹 Step 10: 🧪 Predict Using Embedding
print(model.predict(embedded_docs))
# •	→ Now each sentence is represented as a matrix of shape (8, 10):
# o	8 → max words per sentence
# o	10 → features per word
# •	→ Example: model.predict(embedded_docs[0].reshape(1, -1)) shows vector for sentence 'the glass of milk'

# Output: 
# [1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 196ms/step
# [[[-0.03569493 -0.03568484 -0.01413916  0.03283079  0.02087626
#    -0.02224137 -0.00520598 -0.03330121  0.0161608  -0.01954903]
#   [-0.03569493 -0.03568484 -0.01413916  0.03283079  0.02087626
#    -0.02224137 -0.00520598 -0.03330121  0.0161608  -0.01954903]
#   [-0.03569493 -0.03568484 -0.01413916  0.03283079  0.02087626
#    -0.02224137 -0.00520598 -0.03330121  0.0161608  -0.01954903]
#   [-0.03569493 -0.03568484 -0.01413916  0.03283079  0.02087626
#    -0.02224137 -0.00520598 -0.03330121  0.0161608  -0.01954903]
#   [ 0.04040735  0.02275534 -0.02276136 -0.01484396  0.01204512
#     0.03833229 -0.01339642 -0.00392485 -0.03046179  0.04022882]
#   [-0.03019124 -0.01279249 -0.03905613 -0.02459447 -0.03395279
#     0.00775326 -0.03828407 -0.04277759  0.02639576  0.04670269]
#   [-0.00419534  0.04367883  0.01065139 -0.02015371 -0.04413842
#    -0.01546397  0.01556754  0.01885882  0.00462196 -0.03924478]
#   [-0.01765973 -0.02059596  0.01124931 -0.04790077 -0.03543241
#     0.03486873  0.02037734  0.01445598 -0.04581702 -0.00621518]]

#  [[-0.03569493 -0.03568484 -0.01413916  0.03283079  0.02087626
#    -0.02224137 -0.00520598 -0.03330121  0.0161608  -0.01954903]
#   [-0.03569493 -0.03568484 -0.01413916  0.03283079  0.02087626
#    -0.02224137 -0.00520598 -0.03330121  0.0161608  -0.01954903]
#   [-0.03569493 -0.03568484 -0.01413916  0.03283079  0.02087626
#    -0.02224137 -0.00520598 -0.03330121  0.0161608  -0.01954903]
#   [-0.03569493 -0.03568484 -0.01413916  0.03283079  0.02087626
#    -0.02224137 -0.00520598 -0.03330121  0.0161608  -0.01954903]
#   [ 0.04040735  0.02275534 -0.02276136 -0.01484396  0.01204512
#     0.03833229 -0.01339642 -0.00392485 -0.03046179  0.04022882]
#   [-0.03019124 -0.01279249 -0.03905613 -0.02459447 -0.03395279
#     0.00775326 -0.03828407 -0.04277759  0.02639576  0.04670269]
#   [-0.00419534  0.04367883  0.01065139 -0.02015371 -0.04413842
#    -0.01546397  0.01556754  0.01885882  0.00462196 -0.03924478]
#   [-0.02429505 -0.018147   -0.03645756 -0.03517072 -0.00346982
#    -0.00747031 -0.04896731  0.04250869 -0.03297199 -0.03667396]]

#  [[-0.03569493 -0.03568484 -0.01413916  0.03283079  0.02087626
#    -0.02224137 -0.00520598 -0.03330121  0.0161608  -0.01954903]
#   [-0.03569493 -0.03568484 -0.01413916  0.03283079  0.02087626
#    -0.02224137 -0.00520598 -0.03330121  0.0161608  -0.01954903]
#   [-0.03569493 -0.03568484 -0.01413916  0.03283079  0.02087626
#    -0.02224137 -0.00520598 -0.03330121  0.0161608  -0.01954903]
#   [-0.03569493 -0.03568484 -0.01413916  0.03283079  0.02087626
#    -0.02224137 -0.00520598 -0.03330121  0.0161608  -0.01954903]
#   [ 0.04040735  0.02275534 -0.02276136 -0.01484396  0.01204512
#     0.03833229 -0.01339642 -0.00392485 -0.03046179  0.04022882]
#   [-0.0497164   0.00153299  0.02029312  0.02193828 -0.01244128
#     0.00724491 -0.00549623 -0.00725592 -0.00728454  0.01878544]
#   [-0.00419534  0.04367883  0.01065139 -0.02015371 -0.04413842
#    -0.01546397  0.01556754  0.01885882  0.00462196 -0.03924478]
#   [-0.04448075 -0.01344184 -0.02768246 -0.03871102  0.02572754
#    -0.00365255  0.02883886  0.01475826 -0.01304816  0.01194626]]

#  [[-0.03569493 -0.03568484 -0.01413916  0.03283079  0.02087626
#    -0.02224137 -0.00520598 -0.03330121  0.0161608  -0.01954903]
#   [-0.03569493 -0.03568484 -0.01413916  0.03283079  0.02087626
#    -0.02224137 -0.00520598 -0.03330121  0.0161608  -0.01954903]
#   [-0.03569493 -0.03568484 -0.01413916  0.03283079  0.02087626
#    -0.02224137 -0.00520598 -0.03330121  0.0161608  -0.01954903]
#   [-0.02810692  0.04585162  0.00783378  0.04826948  0.03797236
#    -0.01758863 -0.00021721 -0.01458552 -0.04710324  0.04270247]
#   [-0.02526909  0.00819306  0.01378684 -0.00490857  0.03530004
#    -0.00541176  0.01337293  0.02394569 -0.04992829 -0.02327907]
#   [ 0.03897044  0.00175347 -0.01567962 -0.03710892 -0.00812767
#    -0.0126673  -0.00261028  0.04230764  0.04169044  0.02090956]
#   [-0.0216205   0.01036555  0.02149669  0.04637566  0.04399173
#    -0.00486566 -0.00116234 -0.01853008 -0.00501232  0.00057198]
#   [-0.01953138  0.04612256 -0.0066989  -0.03013592 -0.00503711
#     0.04142947  0.03435178 -0.04601353 -0.01902003 -0.02458649]]

#  [[-0.03569493 -0.03568484 -0.01413916  0.03283079  0.02087626
#    -0.02224137 -0.00520598 -0.03330121  0.0161608  -0.01954903]
#   [-0.03569493 -0.03568484 -0.01413916  0.03283079  0.02087626
#    -0.02224137 -0.00520598 -0.03330121  0.0161608  -0.01954903]
#   [-0.03569493 -0.03568484 -0.01413916  0.03283079  0.02087626
#    -0.02224137 -0.00520598 -0.03330121  0.0161608  -0.01954903]
#   [-0.02810692  0.04585162  0.00783378  0.04826948  0.03797236
#    -0.01758863 -0.00021721 -0.01458552 -0.04710324  0.04270247]
#   [-0.02526909  0.00819306  0.01378684 -0.00490857  0.03530004
#    -0.00541176  0.01337293  0.02394569 -0.04992829 -0.02327907]
#   [ 0.03897044  0.00175347 -0.01567962 -0.03710892 -0.00812767
#    -0.0126673  -0.00261028  0.04230764  0.04169044  0.02090956]
#   [-0.0216205   0.01036555  0.02149669  0.04637566  0.04399173
#    -0.00486566 -0.00116234 -0.01853008 -0.00501232  0.00057198]
#   [-0.01512501 -0.00613271  0.02041062  0.03420749 -0.02175893
#    -0.01719315 -0.0423243  -0.011119   -0.01446674  0.04440813]]

#  [[-0.03569493 -0.03568484 -0.01413916  0.03283079  0.02087626
#    -0.02224137 -0.00520598 -0.03330121  0.0161608  -0.01954903]
#   [-0.03569493 -0.03568484 -0.01413916  0.03283079  0.02087626
#    -0.02224137 -0.00520598 -0.03330121  0.0161608  -0.01954903]
#   [-0.03569493 -0.03568484 -0.01413916  0.03283079  0.02087626
#    -0.02224137 -0.00520598 -0.03330121  0.0161608  -0.01954903]
#   [-0.0269259   0.01682637 -0.02316555 -0.02261239 -0.03732441
#    -0.03082137 -0.03901649  0.01009659  0.03570462  0.04694809]
#   [ 0.04040735  0.02275534 -0.02276136 -0.01484396  0.01204512
#     0.03833229 -0.01339642 -0.00392485 -0.03046179  0.04022882]
#   [-0.02979877  0.02704749 -0.04209245 -0.04393542  0.04377755
#    -0.00807125  0.04097413 -0.01018187  0.03306102 -0.02885369]
#   [-0.00419534  0.04367883  0.01065139 -0.02015371 -0.04413842
#    -0.01546397  0.01556754  0.01885882  0.00462196 -0.03924478]
#   [ 0.00940046  0.01597072 -0.04192784 -0.04229158  0.03902156
#     0.02196207  0.04559286  0.0264777   0.03492266  0.00921279]]

#  [[-0.03569493 -0.03568484 -0.01413916  0.03283079  0.02087626
#    -0.02224137 -0.00520598 -0.03330121  0.0161608  -0.01954903]
#   [-0.03569493 -0.03568484 -0.01413916  0.03283079  0.02087626
#    -0.02224137 -0.00520598 -0.03330121  0.0161608  -0.01954903]
#   [-0.03569493 -0.03568484 -0.01413916  0.03283079  0.02087626
#    -0.02224137 -0.00520598 -0.03330121  0.0161608  -0.01954903]
#   [-0.03569493 -0.03568484 -0.01413916  0.03283079  0.02087626
#    -0.02224137 -0.00520598 -0.03330121  0.0161608  -0.01954903]
#   [-0.01064968 -0.0231376   0.03079159 -0.02931199 -0.03796649
#     0.03909513  0.00260478  0.02335879  0.03717966 -0.03719858]
#   [ 0.04222336 -0.02897574  0.04786757  0.04115654  0.01940531
#    -0.02504121  0.02561492 -0.03015587  0.03786654 -0.03226081]
#   [-0.01725054 -0.04744424  0.01454699 -0.03942645  0.04848105
#    -0.04392425  0.01487465 -0.00022218  0.02037709 -0.04943595]
#   [-0.0216205   0.01036555  0.02149669  0.04637566  0.04399173
#    -0.00486566 -0.00116234 -0.01853008 -0.00501232  0.00057198]]]


# 🔹 Step 11: 📍 What’s Happening?
# •	→ The sentence 'the glass of milk' is tokenized to [6186, 6775, 637, 4895]
# •	→ Padded to [0, 0, 0, 0, 6186, 6775, 637, 4895]
# •	→ Each of these numbers is now converted into a 10-dim vector by the embedding layer
# •	→ For example:
# •	6186 → [0.25, -0.14, 0.31, ..., 0.08]  → vector of size 10


# 🔹 Step 12: 📌 Why Embedding is Better?
# •	→ Embedding gives you:
# o	Dense vectors (more efficient than one-hot)
# o	Preserves semantic relationships
# •	→ Example:
# o	Similar words like king and queen are close in vector space
# o	Different categories (like apple and mango) also cluster together

embedded_docs[0]
# array([   0,    0,    0,    0, 6086, 1649, 3854, 2592], dtype=int32)
model.predict(embedded_docs[0].reshape(1, -1))
# array([[[-0.02139236,  0.04156153, -0.00822502, -0.02136725,
#           0.00672828,  0.04571738, -0.00378443, -0.0127629 ,
#          -0.01274241,  0.0490505 ],
#         [-0.02139236,  0.04156153, -0.00822502, -0.02136725,
#           0.00672828,  0.04571738, -0.00378443, -0.0127629 ,
#          -0.01274241,  0.0490505 ],
#         [-0.02139236,  0.04156153, -0.00822502, -0.02136725,
#           0.00672828,  0.04571738, -0.00378443, -0.0127629 ,
#          -0.01274241,  0.0490505 ],
#         [-0.02139236,  0.04156153, -0.00822502, -0.02136725,
#           0.00672828,  0.04571738, -0.00378443, -0.0127629 ,
#          -0.01274241,  0.0490505 ],
#         [ 0.04731965,  0.01597354,  0.04082378,  0.0327131 ,
#           0.00288612,  0.04806567,  0.04102972,  0.03775961,
#          -0.02092441,  0.02246917],
#         [-0.00382866,  0.02513489, -0.01869538,  0.0055298 ,
#           0.02396252, -0.0181849 ,  0.04099594,  0.03989681,
#           0.01045933,  0.00703907],
#         [-0.00116382,  0.04169172,  0.00152386, -0.03997656,
#          -0.02834749, -0.00111879,  0.00158717, -0.00380088,
#          -0.03648036, -0.02725489],
#         [ 0.04764913,  0.04994391,  0.0279489 , -0.01208482,
#          -0.03168216, -0.03444691,  0.0311343 ,  0.02132369,
#           0.01532625, -0.0248673 ]]], dtype=float32)



[[   0    0    0    0 6086 1649 3854 2592]
 [   0    0    0    0 6086 1649 3854 7355]
 [   0    0    0    0 6086 3942 3854 9267]
 [   0    0    0 3828 1193  872 5319 2893]
 [   0    0    0 3828 1193  872 5319 6764]
 [   0    0    0  391 6086 7592 3854  464]
 [   0    0    0    0 1672 6762  789 5319]]




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
[[[-4.23709303e-03 -3.46774943e-02 -2.14374065e-02  3.88695113e-02
    2.40114816e-02  1.75497644e-02  2.93568633e-02 -3.54238264e-02
    7.68346712e-03 -3.11110504e-02]
  [-4.23709303e-03 -3.46774943e-02 -2.14374065e-02  3.88695113e-02
    2.40114816e-02  1.75497644e-02  2.93568633e-02 -3.54238264e-02
    7.68346712e-03 -3.11110504e-02]
  [-4.23709303e-03 -3.46774943e-02 -2.14374065e-02  3.88695113e-02
    2.40114816e-02  1.75497644e-02  2.93568633e-02 -3.54238264e-02
    7.68346712e-03 -3.11110504e-02]
  [-4.23709303e-03 -3.46774943e-02 -2.14374065e-02  3.88695113e-02
    2.40114816e-02  1.75497644e-02  2.93568633e-02 -3.54238264e-02
    7.68346712e-03 -3.11110504e-02]
  [ 2.57819034e-02  2.84010656e-02 -3.00602671e-02  3.12190130e-03
    2.30179168e-02  1.68727748e-02  2.47445814e-02  7.81860203e-03
    9.73502547e-03  7.07067177e-03]
  [-3.84193063e-02 -8.83370638e-03  1.62816979e-02 -3.47054489e-02
   -2.04754

array([   0,    0,    0,    0, 6086, 1649, 3854, 2592], dtype=int32)