<a href="https://colab.research.google.com/github/MalayathiGeetha/Deep_Learning_Practice_Notes/blob/main/Word_embedding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Embedding

In [2]:
reviews = [
    "absolutely fantastic experience",
    "very friendly staff",
    "delicious food and quick service",
    "loved the ambiance and taste",
    "great value for money",
    "worst experience ever",
    "food was cold and tasteless",
    "rude staff and poor hygiene",
    "not worth the price",
    "disappointed and won’t return"
]

# Labels (1 = positive, 0 = negative)
labels = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]


In [4]:
from tensorflow.keras.preprocessing.text import Tokenizer
tokenizer = Tokenizer()
tokenizer.fit_on_texts(reviews)
sequences = tokenizer.texts_to_sequences(reviews)


In [5]:
word_index = tokenizer.word_index
print("Word Index:", word_index)


Word Index: {'and': 1, 'experience': 2, 'staff': 3, 'food': 4, 'the': 5, 'absolutely': 6, 'fantastic': 7, 'very': 8, 'friendly': 9, 'delicious': 10, 'quick': 11, 'service': 12, 'loved': 13, 'ambiance': 14, 'taste': 15, 'great': 16, 'value': 17, 'for': 18, 'money': 19, 'worst': 20, 'ever': 21, 'was': 22, 'cold': 23, 'tasteless': 24, 'rude': 25, 'poor': 26, 'hygiene': 27, 'not': 28, 'worth': 29, 'price': 30, 'disappointed': 31, 'won’t': 32, 'return': 33}


In [6]:
max_length = max(len(seq) for seq in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')


In [7]:
vocab_size = len(word_index) + 1  # +1 for padding token
embedding_dim = 8

In [14]:
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length),
    Flatten(),
    Dense(1, activation='sigmoid')  # For binary classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()



In [15]:
model.fit(padded_sequences, labels, epochs=30, verbose=1)

embedding_weights = model.layers[0].get_weights()[0]


ValueError: Unrecognized data type: x=[[ 6  7  2  0  0]
 [ 8  9  3  0  0]
 [10  4  1 11 12]
 [13  5 14  1 15]
 [16 17 18 19  0]
 [20  2 21  0  0]
 [ 4 22 23  1 24]
 [25  3  1 26 27]
 [28 29  5 30  0]
 [31  1 32 33  0]] (of type <class 'numpy.ndarray'>)

In [20]:


embedding_weights = model.layers[0].get_weights()[0]

In [16]:
model.build(input_shape=(None, max_length))
model.summary()

In [21]:
print("\nLearned Word Embeddings:")
for word, i in word_index.items():
    print(f"{word}: {embedding_weights[i]}")


Learned Word Embeddings:
and: [ 0.02316845  0.02979591  0.00024863  0.01708931  0.00147251  0.00187419
 -0.02282012  0.04314139]
experience: [ 0.02968713 -0.0218168  -0.00776379  0.02370464 -0.00552142 -0.03413116
 -0.04325489 -0.0014476 ]
staff: [ 0.01244812  0.01715245 -0.0091222  -0.00051929 -0.01794585 -0.04314342
  0.01130069  0.00566475]
food: [-0.03468909  0.03811287  0.01992008 -0.02670354  0.03544556  0.0258537
 -0.01583255 -0.04969509]
the: [-0.02415225  0.02289635  0.00108383  0.03852072 -0.01889491  0.01653096
 -0.00095236  0.01223149]
absolutely: [ 0.01946357 -0.00661602  0.00103092 -0.04541202  0.03880651 -0.0137952
 -0.04942509  0.00476427]
fantastic: [ 0.03070721  0.03845284  0.02335927 -0.03795042 -0.01704081  0.00676942
 -0.00112553  0.02374328]
very: [-0.01588633 -0.00177567  0.03433576 -0.02651417 -0.04200776 -0.03527144
  0.04777905  0.01144081]
friendly: [-0.00411688 -0.03231661  0.04946626  0.00250409 -0.03459234  0.00213515
 -0.04141949  0.04731667]
delicious: 