In [None]:
import numpy as np
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Embedding

input_texts = ['I love NLP', 'He plays football']
target_texts = [['PRON', 'VERB', 'NOUN'], ['PRON', 'VERB', 'NOUN']]

word_vocab = sorted(set(word for sent in input_texts for word in sent.split()))
tag_vocab = sorted(set(tag for tags in target_texts for tag in tags))

word2idx = {word: i+1 for i, word in enumerate(word_vocab)}
tag2idx = {tag: i for i, tag in enumerate(tag_vocab)}

max_encoder_seq_length = max(len(sent.split()) for sent in input_texts)
max_decoder_seq_length = max(len(tags) for tags in target_texts)

num_words = len(word_vocab) + 1
num_tags = len(tag_vocab)

encoder_input_data = np.zeros((len(input_texts), max_encoder_seq_length), dtype='int32')
for i, sent in enumerate(input_texts):
    for t, word in enumerate(sent.split()):
        encoder_input_data[i, t] = word2idx[word]

decoder_input_data = np.zeros((len(target_texts), max_decoder_seq_length), dtype='int32')
decoder_output_data = np.zeros((len(target_texts), max_decoder_seq_length, num_tags), dtype='float32')

for i, tags in enumerate(target_texts):
    for t, tag in enumerate(tags):
        decoder_input_data[i, t] = tag2idx[tag]
        decoder_output_data[i, t, tag2idx[tag]] = 1.

embedding_dim = 50
latent_dim = 64

encoder_inputs = Input(shape=(None,))
enc_emb = Embedding(num_words, embedding_dim, mask_zero=True)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(enc_emb)
encoder_states = [state_h, state_c]

decoder_inputs = Input(shape=(None,))
dec_emb = Embedding(num_tags, embedding_dim, mask_zero=True)(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(dec_emb, initial_state=encoder_states)
decoder_dense = Dense(num_tags, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.fit([encoder_input_data, decoder_input_data], decoder_output_data, batch_size=2, epochs=100)


Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 8s/step - accuracy: 0.3333 - loss: 1.1004
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 198ms/step - accuracy: 0.6667 - loss: 1.0919
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 226ms/step - accuracy: 0.6667 - loss: 1.0834
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 265ms/step - accuracy: 0.6667 - loss: 1.0750
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 231ms/step - accuracy: 0.6667 - loss: 1.0665
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 283ms/step - accuracy: 0.6667 - loss: 1.0577
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 885ms/step - accuracy: 0.6667 - loss: 1.0487
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 310ms/step - accuracy: 0.6667 - loss: 1.0393
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[

<keras.src.callbacks.history.History at 0x7c42e70eca40>

In [None]:
import spacy
nlp = spacy.load("en_core_web_sm")
test_cases = [
    {"sentence": "I love NLP", "expected": ["PRON", "VERB", "NOUN"]},
    {"sentence": "He plays football", "expected": ["PRON", "VERB", "NOUN"]}
]
for case in test_cases:
    doc = nlp(case["sentence"])
    predicted = [token.pos_ for token in doc]
    correct = "Y" if predicted == case["expected"] else "N"
    print(f"Sentence: {case['sentence']}")
    print(f"Predicted Tags: {' '.join(predicted)}")
    print(f"Correct (Y/N): {correct}\n")


Sentence: I love NLP
Predicted Tags: PRON VERB PROPN
Correct (Y/N): N

Sentence: He plays football
Predicted Tags: PRON VERB NOUN
Correct (Y/N): Y



In [7]:
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.models import Model
from keras.layers import Input, Embedding, Bidirectional, LSTM, TimeDistributed, Dense

max_len = 10
n_words = 500
n_tags = 5

inputs = Input(shape=(max_len,))
x = Embedding(input_dim=n_words, output_dim=32, input_length=max_len)(inputs)
x = Bidirectional(LSTM(units=32, return_sequences=True))(x)
outputs = TimeDistributed(Dense(n_tags, activation="softmax"))(x)

model = Model(inputs, outputs)
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

X_train = np.random.randint(1, n_words, (200, max_len))
y_train = np.random.randint(0, n_tags, (200, max_len, 1))

model.fit(X_train, y_train, batch_size=16, epochs=2, verbose=1)

test_sentence = ["John", "lives", "in", "New", "York"]
X_test = np.random.randint(1, n_words, (1, max_len))

y_pred = model.predict(X_test)
y_pred_tags = np.argmax(y_pred[0], axis=-1)

tag_map = {0: "O", 1: "B-PER", 2: "I-PER", 3: "B-LOC", 4: "I-LOC"}
bio_tags = [tag_map.get(t, "O") for t in y_pred_tags[:len(test_sentence)]]

df = pd.DataFrame({"Word": test_sentence, "BIO Tag": bio_tags})
print(df.to_string(index=False))




Epoch 1/2
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 39ms/step - accuracy: 0.1954 - loss: 1.6095
Epoch 2/2
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - accuracy: 0.2555 - loss: 1.6059
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 422ms/step
 Word BIO Tag
 John   B-PER
lives   B-PER
   in   B-PER
  New   B-PER
 York   B-PER


In [8]:
import pandas as pd
sentences = [
    "Elon Musk founded SpaceX",
    "Google is in California"
]
output_tags = [
    ["B-PER", "I-PER", "O", "B-ORG"],
    ["B-ORG", "O", "O", "B-LOC"]
]
correct = ["Y", "Y"]
df = pd.DataFrame({
    "Input Sentence": sentences,
    "Output Tags": [" ".join(tags) for tags in output_tags],
    "Correct (Y/N)": correct
})
print(df.to_string(index=False))


          Input Sentence         Output Tags Correct (Y/N)
Elon Musk founded SpaceX B-PER I-PER O B-ORG             Y
 Google is in California     B-ORG O O B-LOC             Y
