In [91]:
import tensorflow as tf

In [92]:
print(tf.__version__)

2.19.0


In [93]:
from tensorflow.keras.preprocessing.text import one_hot

In [94]:
# Sentences
sent = ["The cat sat on the mat",
       "The dog sat on the log",
       "The cat chased the dog",
       "The dog chased the cat",
       "The cat is on the mat",
       "The dog is on the log"]

In [95]:
print("Sentences:", sent)

Sentences: ['The cat sat on the mat', 'The dog sat on the log', 'The cat chased the dog', 'The dog chased the cat', 'The cat is on the mat', 'The dog is on the log']


In [None]:
vocab_size = 500
embedding_dim = 8
encoded_docs = [one_hot(d, vocab_size) for d in sent]
print("Encoded documents:", encoded_docs)         

Encoded documents: [[263, 124, 118, 448, 263, 118], [263, 186, 118, 448, 263, 487], [263, 124, 367, 263, 186], [263, 186, 367, 263, 124], [263, 124, 52, 448, 263, 118], [263, 186, 52, 448, 263, 487]]


In [97]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [118]:
sequence_length = 6
# Pad the encoded_docs to the desired sequence length
encoded_docs = pad_sequences(encoded_docs, maxlen=sequence_length, padding='post', truncating='post')

# Check shape and type
print("Encoded documents:", encoded_docs)
print("Shape:", encoded_docs.shape)      # Should be (6, 6)
print("Type:", type(encoded_docs))       # Must be <class 'numpy.ndarray'>
print("Dtype:", encoded_docs.dtype)      # Should be int32 or int64

# Define the model
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=6))
model.compile('adam', 'mse')
model.build(input_shape=(None, sequence_length))  # None is the batch size

model.build(input_shape=(None, 6))  # None is the batch size
print("Model input shape:", model.input_shape)

# Summarize the model
model.summary()

import numpy as np
predictions = model.predict(encoded_docs)
print("Predictions :", predictions)


Encoded documents: [[263 124 118 448 263 118]
 [263 186 118 448 263 487]
 [263 124 367 263 186   0]
 [263 186 367 263 124   0]
 [263 124  52 448 263 118]
 [263 186  52 448 263 487]]
Shape: (6, 6)
Type: <class 'numpy.ndarray'>
Dtype: int32
Model input shape: (None, 6)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121ms/step
Predictions : [[[ 0.01931331  0.00428424 -0.01305189 -0.02390587 -0.007769
    0.02829227  0.02104032  0.01189115]
  [ 0.01083471  0.04049388  0.04018494  0.01642385  0.042056
    0.02672154 -0.00908234 -0.01454563]
  [-0.03027741  0.01186644  0.00740627  0.01894921  0.02543484
    0.00685992  0.03083852 -0.00233655]
  [-0.03340769  0.04343922 -0.00359309 -0.04478893  0.00782824
   -0.02284473  0.0221597  -0.04920521]
  [ 0.01931331  0.00428424 -0.01305189 -0.02390587 -0.007769
    0.02829227  0.02104032  0.01189115]
  [-0.03027741  0.01186644  0.00740627  0.01894921  0.02543484
    0.00685992  0.03083852 -0.00233655]]

 [[ 0.01931331  0.00428424 -0.01305189 -0.02390587 -0.007769
    0.02829227  0.02104032  0.01189115]
  [-0.01853503 -0.00414215 -0.01859139  0.00373037  0.00402814
    0.02712656  0.0422035  -0.03378342]
  [-0.03027741  0.01186644  0.00740627  0.01894921  0.02543484
    0.00685992  0.03083852 -0.00

In [116]:
# Pad the sequences
max_length = 10
padded_docs = pad_sequences(encoded_docs, maxlen=max_length, padding='pre')
print("Padded documents:\n", padded_docs)
print(model.predict(padded_docs))

Padded documents:
 [[  0   0   0   0 263 124 118 448 263 118]
 [  0   0   0   0 263 186 118 448 263 487]
 [  0   0   0   0 263 124 367 263 186   0]
 [  0   0   0   0 263 186 367 263 124   0]
 [  0   0   0   0 263 124  52 448 263 118]
 [  0   0   0   0 263 186  52 448 263 487]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
[[[-0.04672387  0.0252513   0.04160093  0.03644267 -0.04437103
    0.02982745  0.04926676 -0.00516806]
  [-0.04672387  0.0252513   0.04160093  0.03644267 -0.04437103
    0.02982745  0.04926676 -0.00516806]
  [-0.04672387  0.0252513   0.04160093  0.03644267 -0.04437103
    0.02982745  0.04926676 -0.00516806]
  [-0.04672387  0.0252513   0.04160093  0.03644267 -0.04437103
    0.02982745  0.04926676 -0.00516806]
  [-0.03248789  0.02002846  0.02507769 -0.00597906  0.00212635
    0.00443172 -0.00149425  0.01508865]
  [ 0.04093403  0.02504433 -0.04828462  0.03210217  0.01914693
    0.0165066  -0.03343976  0.04481992]
  [ 0.03956303  0.04497666  0.02

In [117]:
# Get the weights of the embedding layer
weights = model.layers[0].get_weights()[0]
print("Weights of the embedding layer:", weights)

# Get the embedding for a specific word
word_index = 1  # Example word index
embedding_vector = weights[word_index]
print(f"Embedding vector for word index {word_index}:", embedding_vector)

# Get the embedding for a specific sentence
sentence_index = 0  # Example sentence index
sentence_embedding = model.predict(padded_docs[sentence_index].reshape(1, -1))
print(f"Embedding for sentence index {sentence_index}:", sentence_embedding)

Weights of the embedding layer: [[-0.04672387  0.0252513   0.04160093 ...  0.02982745  0.04926676
  -0.00516806]
 [-0.02158055 -0.03347833  0.02351132 ... -0.03240325  0.03982097
   0.04199858]
 [-0.03907311 -0.03378366  0.00565916 ...  0.00543913 -0.02293836
  -0.00706404]
 ...
 [-0.04957249  0.00115796  0.01973024 ... -0.03969352  0.04025329
   0.00548764]
 [ 0.00519142 -0.00477131  0.00737673 ...  0.00783898 -0.04526198
   0.01504571]
 [-0.0418165   0.00328505  0.00252652 ...  0.03386971 -0.02408072
  -0.02200813]]
Embedding vector for word index 1: [-0.02158055 -0.03347833  0.02351132  0.00551087 -0.02174786 -0.03240325
  0.03982097  0.04199858]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step
Embedding for sentence index 0: [[[-0.04672387  0.0252513   0.04160093  0.03644267 -0.04437103
    0.02982745  0.04926676 -0.00516806]
  [-0.04672387  0.0252513   0.04160093  0.03644267 -0.04437103
    0.02982745  0.04926676 -0.00516806]
  [-0.04672387  0.0252513   0.04