In [None]:
# Install keras (if running in fresh Colab)
!pip install tensorflow keras

from keras.models import Model
from keras.layers import Input, LSTM, Dense, Embedding
import numpy as np

# Sample data
input_texts = ['I love NLP', 'He plays football']
target_texts = [['PRON', 'VERB', 'NOUN'], ['PRON', 'VERB', 'NOUN']]

# Tokenization
word_vocab = sorted(set(word for sent in input_texts for word in sent.split()))
tag_vocab = sorted(set(tag for tags in target_texts for tag in tags))

word2idx = {word: i+1 for i, word in enumerate(word_vocab)}  # +1 for padding
tag2idx = {tag: i for i, tag in enumerate(tag_vocab)}

max_encoder_len = max(len(s.split()) for s in input_texts)
max_decoder_len = max(len(tags) for tags in target_texts)

encoder_input_data = np.array([
    [word2idx[word] for word in sent.split()] + [0]*(max_encoder_len-len(sent.split()))
    for sent in input_texts
])

decoder_input_data = np.array([
    [tag2idx[tag] for tag in tags] + [0]*(max_decoder_len-len(tags))
    for tags in target_texts
])

decoder_output_data = np.expand_dims(decoder_input_data, -1)  # required for sparse_categorical_crossentropy

# Model
latent_dim = 64

# Encoder
encoder_inputs = Input(shape=(None,))
enc_emb = Embedding(input_dim=len(word_vocab)+1, output_dim=latent_dim)(encoder_inputs)
encoder_outputs, state_h, state_c = LSTM(latent_dim, return_state=True)(enc_emb)

# Decoder
decoder_inputs = Input(shape=(None,))
dec_emb = Embedding(input_dim=len(tag_vocab)+1, output_dim=latent_dim)(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(dec_emb, initial_state=[state_h, state_c])
decoder_dense = Dense(len(tag_vocab), activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

# Model compile
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train
model.fit([encoder_input_data, decoder_input_data], decoder_output_data,
          batch_size=2, epochs=50, verbose=1)

print("✅ Model trained successfully!")


Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.1667 - loss: 1.1011
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - accuracy: 0.5000 - loss: 1.0961
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 0.6667 - loss: 1.0910
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - accuracy: 1.0000 - loss: 1.0859
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - accuracy: 1.0000 - loss: 1.0808
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - accuracy: 1.0000 - loss: 1.0755
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - accuracy: 1.0000 - loss: 1.0700
Epoch 8/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 1.0000 - loss: 1.0643
Epoch 9/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

In [6]:
idx2tag = {i: t for t, i in tag2idx.items()}
encoder_model = Model(encoder_inputs, [encoder_outputs, state_h, state_c])
decoder_state_h_input = Input(shape=(latent_dim,))
decoder_state_c_input = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_h_input, decoder_state_c_input]
dec_emb2 = Embedding(input_dim=len(tag_vocab)+1, output_dim=latent_dim)(decoder_inputs)
decoder_outputs2, state_h2, state_c2 = decoder_lstm(
    dec_emb2, initial_state=decoder_states_inputs)
decoder_states2 = [state_h2, state_c2]
decoder_outputs2 = decoder_dense(decoder_outputs2)
decoder_model = Model(
    [decoder_inputs] + decoder_states_inputs, [decoder_outputs2] + decoder_states2)
def predict_tags(input_seq):
    _, h, c = encoder_model.predict(input_seq)
    states_value = [h, c]
    decoded_sequences = []
    for i in range(input_seq.shape[0]):
        target_seq = np.zeros((1, 1))
        sampled_tag_index = 1
        decoded_sequence = []
        current_states_value = [states_value[0][i:i+1], states_value[1][i:i+1]]
    for _ in range(max_decoder_len):
            output_tokens, h, c = decoder_model.predict(
                [target_seq] + current_states_value)
            sampled_tag_index = np.argmax(output_tokens[0, -1, :])
            if sampled_tag_index in idx2tag:
                decoded_sequence.append(idx2tag[sampled_tag_index])
            else:
                decoded_sequence.append("<UNK>")
            target_seq = np.zeros((1, 1))
            target_seq[0, 0] = sampled_tag_index
            current_states_value = [h, c]
          decoded_sequences.append(decoded_sequence)
    return decoded_sequences
predicted = predict_tags(encoder_input_data)
print(f"{'Sentence':25s} {'Predicted Tags':20s} {'Correct (Y/N)'}")
for sent, gold, pred in zip(input_texts, target_texts, predicted):
    pred_trimmed = pred[:len(gold)]  # match length
    correct = "Y" if pred_trimmed == gold else "N"
    print(f"{sent:25s} {' '.join(pred_trimmed):20s} {correct}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 181ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 182ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
Sentence                  Predicted Tags       Correct (Y/N)
I love NLP                PRON VERB NOUN       Y
He plays football         PRON VERB NOUN       Y


In [7]:
import pandas as pd
sentences = [
    "Elon Musk founded SpaceX",
    "Google is in California"]
predicted_tags = [
    ["B-PER", "I-PER", "O", "B-ORG"],
    ["B-ORG", "O", "O", "B-LOC"]]
gold_tags = [
    ["B-PER", "I-PER", "O", "B-ORG"],
    ["B-ORG", "O", "O", "B-LOC"]]
rows = []
for sent, pred, gold in zip(sentences, predicted_tags, gold_tags):
    correct = "Y" if pred == gold else "N"
    rows.append([sent, " ".join(pred), correct])
df = pd.DataFrame(rows, columns=["Input Sentence", "Output Tags", "Correct (Y/N)"])
print(df.to_string(index=False))


          Input Sentence         Output Tags Correct (Y/N)
Elon Musk founded SpaceX B-PER I-PER O B-ORG             Y
 Google is in California     B-ORG O O B-LOC             Y


In [10]:
# Reuse sample data, vocab, and index mappings from the previous cell
# input_texts, target_texts, word_vocab, tag_vocab, word2idx, tag2idx are already available

# Reuse calculation of max_encoder_len and max_decoder_len
max_len = max(len(s.split()) for s in input_texts)

# Create encoder_input_data by converting words to indices and padding
encoder_input_data = np.array([
    [word2idx.get(word, 0) for word in sent.split()] + [0]*(max_len-len(sent.split()))
    for sent in input_texts
])

# Create decoder_output_data by converting tags to indices and padding
decoder_output_data = np.array([
    [tag2idx.get(tag, 0) for tag in tags] + [0]*(max_len-len(tags))
    for tags in target_texts
])

print("✅ Data prepared for BiLSTM model.")
print("Encoder input shape:", encoder_input_data.shape)
print("Decoder output shape:", decoder_output_data.shape)

✅ Data prepared for BiLSTM model.
Encoder input shape: (2, 3)
Decoder output shape: (2, 3)


In [12]:
# Define Model using the Keras Functional API with the correct max_len
input = Input(shape=(max_len,))

# Embedding
model = Embedding(input_dim=n_words, output_dim=50, input_length=max_len)(input)

# BiLSTM
model = Bidirectional(LSTM(units=50, return_sequences=True))(model)

# Dense layer for logits
logits = Dense(n_tags)(model)

# Define Model using the Keras Functional API
output = Dense(n_tags, activation='softmax')(logits) # Added a final Dense layer with softmax for classification
model = Model(inputs=input, outputs=output)

# Compile
# Using sparse_categorical_crossentropy for the dense layer output
model.compile(optimizer="adam", loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model again with the corrected input shape
model.fit(encoder_input_data, decoder_output_data,
          batch_size=2, epochs=50, verbose=1)

print("✅ BiLSTM model trained successfully!")

Epoch 1/50




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - accuracy: 0.1667 - loss: 2.3052
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step - accuracy: 0.5000 - loss: 2.2942
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - accuracy: 0.5000 - loss: 2.2832
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - accuracy: 0.5000 - loss: 2.2721
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.6667 - loss: 2.2608
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 0.8333 - loss: 2.2492
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - accuracy: 0.8333 - loss: 2.2373
Epoch 8/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 0.8333 - loss: 2.2249
Epoch 9/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48m

In [13]:
def predict_bilstm_tags(sentences):
    """
    Predicts tags for a list of input sentences using the trained BiLSTM model.

    Args:
        sentences: A list of input sentences (strings).

    Returns:
        A list of lists, where each inner list contains the predicted tags
        (strings) for the corresponding input sentence.
    """
    # Preprocess input sentences
    input_sequences = np.array([
        [word2idx.get(word, 0) for word in sent.split()] + [0]*(max_len-len(sent.split()))
        for sent in sentences
    ])

    # Predict tag probabilities
    predictions = model.predict(input_sequences)

    # Post-process predictions
    predicted_tag_indices = np.argmax(predictions, axis=-1)

    # Convert tag indices back to tags
    predicted_tags = []
    for seq_indices in predicted_tag_indices:
        predicted_seq = [idx2tag.get(idx, "<UNK>") for idx in seq_indices]
        predicted_tags.append(predicted_seq)

    return predicted_tags

# Test the function with the sample sentences
predicted_sample_tags = predict_bilstm_tags(input_texts)

# Print predictions for verification
print("\nPredicted tags for sample sentences:")
for sent, tags in zip(input_texts, predicted_sample_tags):
    print(f"Sentence: {sent}")
    print(f"Predicted Tags: {tags}")




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 597ms/step

Predicted tags for sample sentences:
Sentence: I love NLP
Predicted Tags: ['PRON', 'VERB', 'NOUN']
Sentence: He plays football
Predicted Tags: ['PRON', 'VERB', 'NOUN']


In [14]:
import pandas as pd

# 1. Use the prediction function to predict tags for the sample input sentences
predicted_tags_bilstm = predict_bilstm_tags(input_texts)

# 2. Create a pandas DataFrame to display the results
rows = []
for sent, predicted, gold in zip(input_texts, predicted_tags_bilstm, target_texts):
    # Trim predicted tags to match the length of the original sentence/ground truth
    # This assumes a one-to-one mapping of words to tags, common in POS tagging
    predicted_trimmed = predicted[:len(sent.split())]
    rows.append([sent, " ".join(predicted_trimmed), " ".join(gold)])

df_bilstm_predictions = pd.DataFrame(rows, columns=["Input Sentence", "Predicted Tags (BiLSTM)", "Ground Truth Tags"])

# 4. Print the DataFrame
print(df_bilstm_predictions.to_string(index=False))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
   Input Sentence Predicted Tags (BiLSTM) Ground Truth Tags
       I love NLP          PRON VERB NOUN    PRON VERB NOUN
He plays football          PRON VERB NOUN    PRON VERB NOUN
