In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, TimeDistributed

texts = [
    "I love this coding",
    "She runs fast",
    "The dog barked loudly",
    "He is very happy",
    "We enjoy coding daily"
]

pos_sequences = [
    ["PRON", "VERB", "DET", "NOUN"],
    ["PRON", "VERB", "ADV"],
    ["DET", "NOUN", "VERB", "ADV"],
    ["PRON", "VERB", "ADV", "ADJ"],
    ["PRON", "VERB", "NOUN", "ADV"]
]

word_tokenizer = Tokenizer()
word_tokenizer.fit_on_texts(texts)
word_sequences = word_tokenizer.texts_to_sequences(texts)
vocab_size = len(word_tokenizer.word_index) + 1

pos_tokenizer = Tokenizer()
pos_tokenizer.fit_on_texts(pos_sequences)
pos_encoded = pos_tokenizer.texts_to_sequences(pos_sequences)
num_pos_tags = len(pos_tokenizer.word_index) + 1

max_length = max(len(seq) for seq in word_sequences)
X = pad_sequences(word_sequences, maxlen=max_length, padding='post')
y = pad_sequences(pos_encoded, maxlen=max_length, padding='post')

y = tf.keras.utils.to_categorical(y, num_classes=num_pos_tags)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

embedding_dim = 50
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length))
model.add(LSTM(units=64, return_sequences=True))
model.add(TimeDistributed(Dense(num_pos_tags, activation='softmax')))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()

model.fit(X_train, y_train, epochs=10, batch_size=2, validation_data=(X_test, y_test), verbose=1)

y_pred = model.predict(X_test)
y_pred_tags = np.argmax(y_pred, axis=-1)
y_test_tags = np.argmax(y_test, axis=-1)

index_to_pos = {v: k for k, v in pos_tokenizer.word_index.items()}
index_to_pos[0] = 'PAD'

print("\nExample Prediction:")
test_sample = X_test[0]
pred_tags = y_pred_tags[0]
true_tags = y_test_tags[0]
words = [word_tokenizer.index_word.get(idx, 'PAD') for idx in test_sample]
print("Words:", words)
print("True POS:", [index_to_pos[idx] for idx in true_tags])
print("Pred POS:", [index_to_pos[idx] for idx in pred_tags])






Epoch 1/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 701ms/step - accuracy: 0.1667 - loss: 1.9441 - val_accuracy: 0.0000e+00 - val_loss: 1.9473
Epoch 2/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 65ms/step - accuracy: 0.2500 - loss: 1.9382 - val_accuracy: 0.2500 - val_loss: 1.9469
Epoch 3/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 125ms/step - accuracy: 0.3750 - loss: 1.9313 - val_accuracy: 0.2500 - val_loss: 1.9467
Epoch 4/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - accuracy: 0.4167 - loss: 1.9242 - val_accuracy: 0.2500 - val_loss: 1.9463
Epoch 5/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step - accuracy: 0.3750 - loss: 1.9178 - val_accuracy: 0.2500 - val_loss: 1.9461
Epoch 6/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step - accuracy: 0.4583 - loss: 1.9116 - val_accuracy: 0.2500 - val_loss: 1.9460
Epoch 7/10
[1m2/2[0m [32m━━━━━━━━━━━━