<a href="https://colab.research.google.com/github/Rehan6541/AI/blob/main/NER_using_GRU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense, TimeDistributed, Bidirectional
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import numpy as np

In [None]:
#Sample traning setences(tokenized manually)
#There are example sentences for tranining.Each Setence is tokenized(split into words).
train_sentences = [
    ["John","lives","in","New","York"],
    ["Alice","is","from","Paris"],
    ["Berlin","is","the","capital","of","Germany"]
]

In [None]:
#Corresponding NER tags for training data
#Corresponding NER (Named Entity Recognition) tags for each word
#B-PER:Beginning of a person entity.
#B-LOC:Beginning of a location entity
#O: Outside,meaning no entity
train_ner_tags = [
    ['B-PER','O','O','B-LOC','B-LOC'],
    ['B-PER','O','O','B-LOC'],
    ['B-PER','O','O','O','O','B-LOC']
]

# Vocabulary and tag mapping (for tokenization)
vocab = { "John":1,"lives":2,"in":3,"New":4,"York":5,"Alice":6,"is":7,"from":8,"Paris":9,"Berlin":10,"the":11,"capital":12,"of":13,"Germany":14}
tags = {"O":0,"B-LOC":1,"B-PER":2}

#The vocab dictionary maps rach word in the trainning sentence to a unique integer.
#This is needed because the nural network dosent operate on words but on numeric values.
#The tags dictionary maps each NER tag to an integer (0 for O , 1 for B-LOC, 2 for B-PER).

#Tokenize the setence and labels
tokenized_train_sentences = [[vocab[word] for word in sentence] for sentence in train_sentences]
tokenized_train_ner_tags = [[tags[tag] for tag in ner] for ner in train_ner_tags]

In [None]:
#This converts the training senteneces and NER tags into lists of integers, using the mapping from
#Senetence : ["John","lives","in","New","York"] becomes [1,2,3,4,5].


#Extend vocabulary to include new tezt words
vocab.update({
    "Marry":15,"visited":16,"London":17,"Tom":18,"moved":19,"to":20,"statue":21
    })
#Here, we extend the vocabulary to include additional words from the test senteneces.
#For example,"Marry" is mapped to 15,"London" to 17, etc.

#Tokenize the test sentences
test_sentences = [
    ["Marry","visited","London"],
    ["Tom","moved","to","Berlin"],
    ["The","statue","is","in","Paris"]
]



In [None]:
tokenized_test_sentences = [[vocab.get(word,0) for word in sentence] for sentence in test_sentences]
#The 0 in vocab.get(word,0) serves as the default value returned
#if a word is not found in the vocab dictionary.Here's How it works:

#Vocab.get(word,0) looks up word in the vocab dictionary.
#If the word exists in vocab, it returns its corresponding value(likely a token or index).
#If the word does not exist in vocab, it returns - as a fallback.


In [None]:
#Parameters
vocab_size = len(vocab) + 1 #Updates vocab_size to account for all words in the vocab
embedding_dim = 64  #Dimension of enbedding vectors
n_tags = len(tags)  #Number of entity tags
max_len = 6 #Max sentence length(after padding)
#vocab_size : Total number of uniqu words in the vocabulary (plus 1 for padding).
#Embedding_dim : The number of NER tags (O, B-LOC,B-PER).
#max_len:Maximum sentence length for paddingg.
#Padding training and test sequences
tokenized_train_sentences = pad_sequences(tokenized_train_sentences,maxlen=max_len,padding = 'post')
tokenized_train_ner_tags = pad_sequences(tokenized_train_ner_tags,maxlen = max_len,padding="post")
tokenized_test_sentences = pad_sequences(tokenized_test_sentences,maxlen = max_len,padding="post")

#Split the training data(train/test split)
X_train, X_test, y_train, y_test = train_test_split(tokenized_train_sentences, tokenized_train_ner_tags, test_size = 0.2)





In [None]:
#Model Creation
model = Sequential()

#Embedding layer adjusted to the correct vocab_size
model.add(Embedding(input_dim = vocab_size, output_dim = embedding_dim, input_length = max_len))

model.add(Bidirectional(GRU(units = 64, return_sequences = True))) #Bidirectiional GRU
model.add(TimeDistributed(Dense(n_tags, activation = "softmax")))
#Optimizer = tf.keras.optimizers.Adam(learning_rate = 0.001)

#Compile model
model.compile(optimizer = "adam", loss = "sparse_categorical_crossentropy",metrics = ["accuracy"])

#TRain the model
history = model.fit(X_train, y_train, batch_size = 32, epochs = 10, validation_split = 0.2)

#Evaluate the model on test data
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy}")



Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 11s/step - accuracy: 0.3333 - loss: 1.1029 - val_accuracy: 0.6667 - val_loss: 1.0951
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step - accuracy: 0.8333 - loss: 1.0857 - val_accuracy: 0.5000 - val_loss: 1.0916
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step - accuracy: 0.8333 - loss: 1.0688 - val_accuracy: 0.5000 - val_loss: 1.0882
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step - accuracy: 0.6667 - loss: 1.0520 - val_accuracy: 0.5000 - val_loss: 1.0849
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 142ms/step - accuracy: 0.6667 - loss: 1.0353 - val_accuracy: 0.5000 - val_loss: 1.0815
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step - accuracy: 0.6667 - loss: 1.0184 - val_accuracy: 0.5000 - val_loss: 1.0781
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━

In [None]:
#Predict on new test seentences
predictions = model.predict(tokenized_test_sentences)

#Decoding the predictions back to NER tags
reverse_tags = {i: tag for tag, i in tags.items()}

'''
Purpose of reverse_tags:
Orgiginal tags ditionary: This typically maps tags(e.g labels, classes)
to unique numerical ID's or indices.
Example:tags = {'NOUN':1,'VERB': 2, 'ADJ': 3}
Reversed reverse_tags dictionary:The reverse dictionary swaps the roles,
mapping the numerical ID's or indices back to their corresponding tags.
'''

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 587ms/step


"\nPurpose of reverse_tags:\nOrgiginal tags ditionary: This typically maps tags(e.g labels, classes)\nto unique numerical ID's or indices.\nExample:tags = {'NOUN':1,'VERB': 2, 'ADJ': 3}\nReversed reverse_tags dictionary:The reverse dictionary swaps the roles,\nmapping the numerical ID's or indices back to their corresponding tags.\n"

In [None]:
def decode_predictions(preds, max_len):
  decoded_preds = []
#This list will store the decoded predictions for all sentences.
#Iterating over each prediction (pred) for every token in each sentence
  for pred in preds:
    decoded_sentence = [reverse_tags[np.argmax(p)] for p in pred]
#pred represents the list of predicted probabilities for a token.
#For each token p, we apply np.argmax(p) to get the index of the highest probability.
#This tells us which NER tag has the highest probability.
#For example, if the probabilities for a token are [0.1, 0.8, 0.1],
#then np.argmax(p) returns 1 (because 0.8 is the highest value),
# which corresponds to the tag B-LOC (from reverse_tags).
#We use reverse_tags[np.argmav(n\l t lank un the artual tag (like "O", "B-LOC", or "B-PER")
#based on the index returned by np.argmax(p)
    decoded_preds.append(decoded_sentence)
  return decoded_preds
#Suppose the preds for one sentence look like this (simplified to one token per sentence for clai
#preds = [
#       [[0.2, 0.7, 0.1], [0.9, 0.05, 0.05], [0.1, 0.2, 0.7]] # Probabilities for three tokens

#The model output probabilities for three tokens (one for each possible NER tag):

#For token 1: [0.2, 0.7, 0.1] > highest probability is at index 1 + B-LOC.
#For token 2: [0.9, 0.05, 0.05] + highest probability is at index 0 + 0.
#For token 3: [0.1, 0.2, 0.7] + highest probability is at index 2 + B-PER.
#After decoding, you get
#decoded_sentence=['B-LOC','O','B-PER']

#Decoding predictions
decoded_predictions = decode_predictions(predictions, max_len)

#Show test sentences with predicted tags
for sentence,pred_tags in zip(test_sentences,decoded_predictions):
  print(f"Sentence: {' '.join(sentence)}")
  print(f"Predicted NER Tags: {pred_tags}")
  print()

Sentence: Marry visited London
Predicted NER Tags: ['O', 'O', 'O', 'O', 'O', 'O']

Sentence: Tom moved to Berlin
Predicted NER Tags: ['O', 'O', 'O', 'O', 'O', 'O']

Sentence: The statue is in Paris
Predicted NER Tags: ['O', 'O', 'O', 'O', 'O', 'O']

