# Named Entity Recognition Assignment
NER is a subtask of information extraction that locates and classifies named entities in a text. The named entities could be organizations, persons, locations, times, etc. In this assignment, you will train a named entity recognition system and test it on a test data. \
Let's get started

In [1]:
import os 
import numpy as np
import pandas as pd
from tqdm import tqdm
import torch
from torch import nn
from utils_2 import load_data_file, get_vocab, get_tags
import random as rnd

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\xAbdoMo\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
vocab = get_vocab(['processed_input/train_vocab.txt', 'processed_input/dev_vocab.txt', 'processed_input/test_vocab.txt'])

input_name = "pizza_orders"
labels_name = "pizza_orders_labels"

tags = get_tags(f'processed_input/train_{input_name}_tags.txt')

print(len(vocab))
print(tags)

448
{'TOPPING_S': 0, 'COMPLEX_TOPPING_S': 1, 'NOT_STYLE_S': 2, 'STYLE': 3, 'NUMBER': 4, 'TOPPING': 5, 'SIZE': 6, 'NONE': 7, 'NOT_TOPPING': 8, 'NOT_TOPPING_S': 9, 'NUMBER_S': 10, 'NOT_COMPLEX_TOPPING_S': 11, 'STYLE_S': 12, 'NOT_COMPLEX_TOPPING': 13, 'COMPLEX_TOPPING': 14, 'SIZE_S': 15, 'NOT_STYLE': 16}


In [3]:
t_sentences, t_labels, t_size = load_data_file(vocab, tags, f'processed_input/train_{input_name}.txt', f'processed_input/train_{labels_name}.txt')

dev_sentences, dev_labels, dev_size =  load_data_file(vocab, tags, f'processed_input/dev_{input_name}.txt', f'processed_input/dev_{labels_name}.txt')

test_sentences, test_labels, test_size =  load_data_file(vocab, tags, f'processed_input/test_{input_name}.txt', f'processed_input/test_{labels_name}.txt')

In [4]:
print('The number of outputs is tag_map', len(tags))
print('The vocab size is', len(vocab))
print('The training size is', t_size)
print('The validation size is', dev_size)
print('The testing size is', test_size)

The number of outputs is tag_map 17
The vocab size is 448
The training size is 2493488
The validation size is 367
The testing size is 1420


In [5]:
from ner import NER, NERDataset

In [6]:
import random


def train(model, train_dataset, batch_size=512, epochs=10, learning_rate=0.01, skip_prop=0.0):
  model.train()   # switch to train mode
  train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True)
  criterion = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters(), learning_rate)
  use_cuda = torch.cuda.is_available()
  device = torch.device("cuda:0" if use_cuda else "cpu")
  if use_cuda:
    model = model.to(device)
    criterion = criterion.cuda(device)
    pass

  for epoch_num in range(epochs):
    total_acc_train = 0
    total_loss_train = 0
  
    for train_input, train_label in tqdm(train_dataloader):
      if skip_prop > random.random():
        continue    # skip this batch
      train_input = train_input.to(device)
      train_label = train_label.to(device)
      output = model.forward(train_input)
      batch_loss = criterion(output.view(-1, output.shape[-1]), train_label.view(-1))
      total_loss_train += batch_loss
      acc = (torch.argmax(output, dim=-1) == train_label).sum().item()
      total_acc_train += acc
      optimizer.zero_grad()
      batch_loss.backward()
      optimizer.step()
      
    epoch_loss = total_loss_train / len(train_dataset)
  
    sample_count = len(train_dataset)
    seq_length = train_dataset[0][0].shape[0]
    epoch_acc = total_acc_train / (sample_count * seq_length)
  
  
    print(
        f'Epochs: {epoch_num + 1} | Train Loss: {epoch_loss} \
        | Train Accuracy: {epoch_acc}\n')

In [7]:
train_dataset = NERDataset(t_sentences, t_labels, vocab['<pad>'], tags["NONE"])
val_dataset = NERDataset(dev_sentences, dev_labels, vocab['<pad>'], tags["NONE"])
test_dataset = NERDataset(test_sentences, test_labels, vocab['<pad>'], tags["NONE"])

In [8]:
model = NER(embedding_dim=95, hidden_size=1200, n_classes=len(tags), vocab_size=len(vocab), num_layers=2, dropout=0.5)
print(model)

NER(
  (embedding): Embedding(448, 95)
  (dropout): Dropout(p=0.5, inplace=False)
  (lstm): LSTM(95, 1200, num_layers=2, batch_first=True, dropout=0.5, bidirectional=True)
  (linear): Linear(in_features=2400, out_features=17, bias=True)
)


In [42]:
model = model.to(torch.device("cuda:0"))
train(model, train_dataset, epochs=10, batch_size=64, skip_prop=0.9)
# train(model, val_dataset, epochs=15, batch_size=32)
# train(model, test_dataset, epochs=15, batch_size=32)

100%|██████████| 38961/38961 [05:49<00:00, 111.39it/s]


Epochs: 1 | Train Loss: 2.127582411048934e-05         | Train Accuracy: 0.09961727355033438



100%|██████████| 38961/38961 [05:50<00:00, 111.03it/s]


Epochs: 2 | Train Loss: 1.8349768652115017e-05         | Train Accuracy: 0.09977566708619756



100%|██████████| 38961/38961 [05:46<00:00, 112.54it/s]


Epochs: 3 | Train Loss: 1.884028642962221e-05         | Train Accuracy: 0.09850065068538147



100%|██████████| 38961/38961 [05:47<00:00, 112.06it/s]


Epochs: 4 | Train Loss: 2.108085209329147e-05         | Train Accuracy: 0.09933889127733253



 43%|████▎     | 16871/38961 [02:31<03:18, 111.12it/s]


KeyboardInterrupt: 

In [59]:
# train(model, val_dataset, epochs=5, batch_size=32)
train(model, test_dataset, epochs=5, batch_size=32)

100%|██████████| 45/45 [00:03<00:00, 12.95it/s]


Epochs: 1 | Train Loss: 0.02269425429403782         | Train Accuracy: 0.9716255868544601



100%|██████████| 45/45 [00:03<00:00, 14.13it/s]


Epochs: 2 | Train Loss: 0.01942363940179348         | Train Accuracy: 0.9728579812206573



100%|██████████| 45/45 [00:03<00:00, 14.09it/s]


Epochs: 3 | Train Loss: 0.016794899478554726         | Train Accuracy: 0.9742957746478873



100%|██████████| 45/45 [00:03<00:00, 14.13it/s]


Epochs: 4 | Train Loss: 0.01656949706375599         | Train Accuracy: 0.9762910798122065



100%|██████████| 45/45 [00:03<00:00, 14.13it/s]

Epochs: 5 | Train Loss: 0.014965523965656757         | Train Accuracy: 0.9770246478873239






In [11]:
def evaluate(model, test_dataset, batch_size=64):
  test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size,shuffle=False)
  use_cuda = torch.cuda.is_available()
  device = torch.device("cuda" if use_cuda else "cpu")
  if use_cuda:
    model = model.cuda()

  total_acc_test = 0
  
  with torch.no_grad():
    for test_input, test_label in tqdm(test_dataloader):
      test_input = test_input.to(device)
      test_label = test_label.to(device)
      output = model.forward(test_input)

      # Check if entire sequence matches by comparing all positions
      sequence_matches = (torch.argmax(output, dim=-1) == test_label).all(dim=-1)
      acc = sequence_matches.sum().item()
      total_acc_test += acc
    
    total_acc_test /= len(test_dataset)
  print(f'\nTest Accuracy: {total_acc_test}')

In [60]:
model.eval()
evaluate(model, test_dataset)
evaluate(model, val_dataset)

100%|██████████| 23/23 [00:01<00:00, 20.07it/s]



Test Accuracy: 0.8380281690140845


100%|██████████| 6/6 [00:00<00:00, 31.30it/s]


Test Accuracy: 0.8583106267029973





In [68]:
inv_order_tags = {}
for tag, value in tags.items():
  inv_order_tags[value] = tag

from utils_2 import tokenize, preprocess_tokens, project_tokens 
def test_sample(sample, model):
  s = tokenize(sample)
  s = preprocess_tokens(s, 0)
  print(s)
  s = project_tokens(s, vocab)
  x_tensor = torch.tensor(s)
  device = torch.device("cuda:0")
  with torch.no_grad():
    output = model.forward(x_tensor.to(device))
    output = torch.argmax(output, dim=-1).to("cpu")
    print([inv_order_tags[x.item()] for x in output])

model.eval()
test_sample("two large pizzas and peperoni with extra olives and chicken and extra osama", model)

['<num>', 'larg', 'pizza', 'and', 'peperoni', 'with', 'extra', 'oliv', 'and', 'chicken', 'and', 'extra', 'osama']
Word: osama not in vocab
isPersonalPronoun: False
isNumber: False
isTopping: False
isQuantity: False
['NUMBER_S', 'SIZE_S', 'NONE', 'NONE', 'TOPPING_S', 'NONE', 'COMPLEX_TOPPING_S', 'COMPLEX_TOPPING', 'NONE', 'TOPPING_S', 'NONE', 'COMPLEX_TOPPING_S', 'NONE']


In [62]:
from model_io import save_model_state, load_model_state

save_model_state(model, "models/pizza_order_e95_h1200_l2_d0.5_r2_x83.8pth")


In [27]:
model_load_test = NER(
  embedding_dim=95, hidden_size=1200, n_classes=len(tags), vocab_size=len(vocab), num_layers=2, dropout=0.5
)

load_model_state(model_load_test, "models/order_boundary_e95_h1200_l2_d0.5_r1_x91.4.pth", torch.device("cuda:0"))
test_sample("two large pizzas with peperoni", model_load_test)

['<pron>', 'would', 'like', '<num>', 'larg', 'pizza', 'with', 'everyth', 'and', '<num>', 'larg', 'pizza', 'with', 'mushroom', 'and', 'extra', 'chees', 'and', '<num>', 'larg', 'coke']
['NONE', 'NONE', 'NONE', 'PIZZAORDER_S', 'PIZZAORDER', 'PIZZAORDER', 'PIZZAORDER', 'PIZZAORDER', 'NONE', 'PIZZAORDER_S', 'PIZZAORDER', 'PIZZAORDER', 'PIZZAORDER', 'PIZZAORDER', 'PIZZAORDER', 'PIZZAORDER', 'PIZZAORDER', 'NONE', 'DRINKORDER_S', 'DRINKORDER', 'DRINKORDER']
