Build a Bi-LSTM model for Named Entity Recognition (NER) using TensorFlow 2 on a small manually defined dataset. The model predicts tags like PER (person), LOC (location), or O (other).

In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers

In [2]:
sentences = [["john", "lives", "in", "new", "york"],
             ["alice", "is", "from", "paris"],
             ["bob", "visited", "london", "last", "year"]]

labels = [["PER", "O", "O", "LOC", "LOC"],
          ["PER", "O", "O", "LOC"],
          ["PER", "O", "LOC", "O", "O"]]

In [3]:
#Build vocabularies

word_tokenizer=tf.keras.preprocessing.text.Tokenizer(lower=True,oov_token='UNK')
word_tokenizer.fit_on_texts(sentences)
X=word_tokenizer.texts_to_sequences(sentences)
word_index=word_tokenizer.word_index
vocab_size=len(word_index)+1

tag_tokenizer=tf.keras.preprocessing.text.Tokenizer(lower=True,oov_token='UNK')
tag_tokenizer.fit_on_texts(labels)
y=tag_tokenizer.texts_to_sequences(labels)
tag_index=tag_tokenizer.word_index
num_tags=len(tag_index)+1

In [4]:
#Pad sequences
max_len=max(len(s) for s in X)
X=tf.keras.preprocessing.sequence.pad_sequences(X,maxlen=max_len,padding='post')
y=tf.keras.preprocessing.sequence.pad_sequences(y,maxlen=max_len,padding='post')

In [5]:
#Convert labels to categorical
y_cat=tf.keras.utils.to_categorical(y,num_classes=num_tags)

In [6]:
#Build Bi-LSTM model
model=Sequential([
    layers.Embedding(input_dim=vocab_size,output_dim=64,input_length=max_len),
    layers.Bidirectional(layers.LSTM(64,return_sequences=True)),
    layers.TimeDistributed(layers.Dense(num_tags,activation='softmax')) #one output per token
])

In [7]:
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [8]:
model.fit(X,y_cat,epochs=50,verbose=0)

<keras.src.callbacks.history.History at 0x2d1303c9d30>

In [13]:
#Predict on new sentence
test_sentence=["alice", "is", "from", "paris"]
test_seq=word_tokenizer.texts_to_sequences([test_sentence])
test_seq=tf.keras.preprocessing.sequence.pad_sequences(test_seq,maxlen=max_len,padding='post')

pred=model.predict(test_seq)[0]
pred_tags=[list(tag_index.keys())[np.argmax(p) - 1] if np.argmax(p) > 0 else "PAD" for p in pred]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step


In [14]:
for word, tag in zip(test_sentence, pred_tags):
    print(f"{word} → {tag}")


alice → per
is → o
from → o
paris → loc
