In [1]:
from tqdm import tqdm
import torch
from torch import nn
from utils import load_data_file, orders_balancer, pizza_orders_balancer, randomizer_balancer
import pickle

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\xAbdoMo\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
vocab = pickle.load(open('data/vocab.pkl', 'rb'))

input_name = "orders"
labels_name = "orders_labels"

tags = pickle.load(open(f'data/tags_{input_name}.pkl', 'rb'))

print(len(vocab), len(tags))

678 5


In [3]:
t_sentences, t_labels, t_size = load_data_file(vocab, tags, f'processed_input/train_{input_name}.txt', f'processed_input/train_{labels_name}.txt', orders_balancer)
dev_sentences, dev_labels, dev_size =  load_data_file(vocab, tags, f'processed_input/dev_{input_name}.txt', f'processed_input/dev_{labels_name}.txt', orders_balancer)
test_sentences, test_labels, test_size =  load_data_file(vocab, tags, f'processed_input/test_{input_name}.txt', f'processed_input/test_{labels_name}.txt', orders_balancer)

Sentences loader: 100%|██████████| 2456446/2456446 [00:31<00:00, 79052.77it/s] 
Labels loader: 100%|██████████| 2456446/2456446 [00:05<00:00, 423816.69it/s]
Group tokens and labels: 100%|██████████| 2456446/2456446 [01:07<00:00, 36405.41it/s]
Order balancer - preload: 2456446it [00:02, 839174.55it/s] 
Order balancer - finalize: 100%|██████████| 6950148/6950148 [01:06<00:00, 103795.40it/s]
UnGroup tokens and labels: 6950148it [00:39, 176385.90it/s]
Projector: 100%|██████████| 6950148/6950148 [14:35<00:00, 7933.99it/s] 
Sentences loader: 100%|██████████| 348/348 [00:00<00:00, 30230.47it/s]
Labels loader: 100%|██████████| 348/348 [00:00<00:00, 138654.68it/s]
Group tokens and labels: 100%|██████████| 348/348 [00:00<00:00, 31076.19it/s]
Order balancer - preload: 348it [00:00, 348025.22it/s]
Order balancer - finalize: 100%|██████████| 1214/1214 [00:00<00:00, 173683.70it/s]
UnGroup tokens and labels: 1214it [00:00, 165611.30it/s]
Projector: 100%|██████████| 1214/1214 [00:00<00:00, 7255.79it/s

In [4]:
print('The training size is', t_size)
print('The validation size is', dev_size)
print('The testing size is', test_size)

The training size is 6950148
The validation size is 1214
The testing size is 4678


In [5]:
from ner import NER, NERDataset

In [6]:
import random


def train(model, train_dataset, batch_size=512, epochs=10, learning_rate=0.01, skip_prop=0.0):
  model.train()   # switch to train mode
  train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True)
  criterion = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters(), learning_rate)
  use_cuda = torch.cuda.is_available()
  device = torch.device("cuda:0" if use_cuda else "cpu")
  if use_cuda:
    model = model.to(device)
    criterion = criterion.cuda(device)
    pass

  for epoch_num in range(epochs):
    total_acc_train = 0
    total_loss_train = 0
  
    for train_input, train_label in tqdm(train_dataloader):
      if skip_prop > random.random():
        continue    # skip this batch
      train_input = train_input.to(device)
      train_label = train_label.to(device)
      output = model.forward(train_input)
      batch_loss = criterion(output.view(-1, output.shape[-1]), train_label.view(-1))
      total_loss_train += batch_loss
      acc = (torch.argmax(output, dim=-1) == train_label).sum().item()
      total_acc_train += acc
      optimizer.zero_grad()
      batch_loss.backward()
      optimizer.step()
      
    epoch_loss = total_loss_train / len(train_dataset)
  
    sample_count = len(train_dataset)
    seq_length = train_dataset[0][0].shape[0]
    epoch_acc = total_acc_train / (sample_count * seq_length)
  
  
    print(
        f'Epochs: {epoch_num + 1} | Train Loss: {epoch_loss} \
        | Train Accuracy: {epoch_acc}\n')

In [7]:
train_dataset = NERDataset(t_sentences, t_labels, vocab['<pad>'], tags["NONE"])
val_dataset = NERDataset(dev_sentences, dev_labels, vocab['<pad>'], tags["NONE"])
test_dataset = NERDataset(test_sentences, test_labels, vocab['<pad>'], tags["NONE"])

In [22]:
model = NER(embedding_dim=95, hidden_size=400, n_classes=len(tags), vocab_size=len(vocab), num_layers=2, dropout=0.5)
print(model)

NER(
  (embedding): Embedding(678, 95)
  (dropout): Dropout(p=0.5, inplace=False)
  (lstm): LSTM(95, 200, num_layers=2, batch_first=True, dropout=0.5, bidirectional=True)
  (linear): Linear(in_features=400, out_features=5, bias=True)
)


In [None]:
model = model.to(torch.device("cuda:0"))
train(model, train_dataset, epochs=10, batch_size=512, skip_prop=0.9)
train(model, val_dataset, epochs=15, batch_size=32)
train(model, test_dataset, epochs=15, batch_size=32)

100%|██████████| 13575/13575 [03:25<00:00, 65.93it/s] 


Epochs: 1 | Train Loss: 1.9072357417826424e-06         | Train Accuracy: 0.09369917302480466



100%|██████████| 13575/13575 [02:38<00:00, 85.89it/s] 


Epochs: 2 | Train Loss: 3.6042004580849607e-07         | Train Accuracy: 0.10248525355143516



 55%|█████▍    | 7400/13575 [01:29<01:24, 72.91it/s] 

In [18]:
# train(model, val_dataset, epochs=10, batch_size=32)
# train(model, test_dataset, epochs=10, batch_size=16)

100%|██████████| 38/38 [00:00<00:00, 65.07it/s]


Epochs: 1 | Train Loss: 0.00173646688926965         | Train Accuracy: 0.9822275473016824



100%|██████████| 38/38 [00:00<00:00, 73.57it/s]


Epochs: 2 | Train Loss: 0.001637222827412188         | Train Accuracy: 0.9837002645898857



100%|██████████| 38/38 [00:00<00:00, 75.32it/s]


Epochs: 3 | Train Loss: 0.0015276501653715968         | Train Accuracy: 0.9841745294793071



100%|██████████| 38/38 [00:00<00:00, 72.26it/s]


Epochs: 4 | Train Loss: 0.0013340378645807505         | Train Accuracy: 0.985946782487145



100%|██████████| 38/38 [00:00<00:00, 77.87it/s]


Epochs: 5 | Train Loss: 0.001319974777288735         | Train Accuracy: 0.9862712795167491



100%|██████████| 38/38 [00:00<00:00, 70.23it/s]


Epochs: 6 | Train Loss: 0.001308902632445097         | Train Accuracy: 0.9859218211771754



100%|██████████| 38/38 [00:00<00:00, 77.43it/s]


Epochs: 7 | Train Loss: 0.0011909209424629807         | Train Accuracy: 0.9872697319155309



100%|██████████| 38/38 [00:00<00:00, 77.89it/s]


Epochs: 8 | Train Loss: 0.0012863392475992441         | Train Accuracy: 0.985946782487145



100%|██████████| 38/38 [00:00<00:00, 77.13it/s]


Epochs: 9 | Train Loss: 0.0012899547582492232         | Train Accuracy: 0.9862962408267186



100%|██████████| 38/38 [00:00<00:00, 74.02it/s]


Epochs: 10 | Train Loss: 0.0011822456726804376         | Train Accuracy: 0.9876191902551046



100%|██████████| 293/293 [00:03<00:00, 75.94it/s]


Epochs: 1 | Train Loss: 0.0033554816618561745         | Train Accuracy: 0.9835016059898488



100%|██████████| 293/293 [00:03<00:00, 76.17it/s]


Epochs: 2 | Train Loss: 0.0036906879395246506         | Train Accuracy: 0.9810843994255709



100%|██████████| 293/293 [00:03<00:00, 73.56it/s]


Epochs: 3 | Train Loss: 0.0038934459444135427         | Train Accuracy: 0.9798620931583736



100%|██████████| 293/293 [00:03<00:00, 75.72it/s]


Epochs: 4 | Train Loss: 0.004223140887916088         | Train Accuracy: 0.978436982712314



100%|██████████| 293/293 [00:04<00:00, 73.12it/s]


Epochs: 5 | Train Loss: 0.004465234465897083         | Train Accuracy: 0.9767816621172756



100%|██████████| 293/293 [00:03<00:00, 76.08it/s]


Epochs: 6 | Train Loss: 0.0051994649693369865         | Train Accuracy: 0.9729119391368216



100%|██████████| 293/293 [00:04<00:00, 66.53it/s]


Epochs: 7 | Train Loss: 0.005260578356683254         | Train Accuracy: 0.9727475033161224



100%|██████████| 293/293 [00:05<00:00, 52.03it/s]


Epochs: 8 | Train Loss: 0.005705736577510834         | Train Accuracy: 0.9696725534690477



100%|██████████| 293/293 [00:04<00:00, 59.56it/s]


Epochs: 9 | Train Loss: 0.005961867049336433         | Train Accuracy: 0.9679788645158461



100%|██████████| 293/293 [00:03<00:00, 74.99it/s]

Epochs: 10 | Train Loss: 0.006278303451836109         | Train Accuracy: 0.9664879797415069






In [10]:
def evaluate(model, test_dataset, batch_size=64):
  test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size,shuffle=False)
  use_cuda = torch.cuda.is_available()
  device = torch.device("cuda" if use_cuda else "cpu")
  if use_cuda:
    model = model.cuda()

  total_acc_test = 0
  
  with torch.no_grad():
    for test_input, test_label in tqdm(test_dataloader):
      test_input = test_input.to(device)
      test_label = test_label.to(device)
      output = model.forward(test_input)

      # Check if entire sequence matches by comparing all positions
      sequence_matches = (torch.argmax(output, dim=-1) == test_label).all(dim=-1)
      acc = sequence_matches.sum().item()
      total_acc_test += acc
    
    total_acc_test /= len(test_dataset)
  print(f'\nTest Accuracy: {total_acc_test}')

In [19]:
model.eval()
evaluate(model, test_dataset)
evaluate(model, val_dataset)

100%|██████████| 74/74 [00:00<00:00, 156.67it/s]



Test Accuracy: 0.6633176571184267


100%|██████████| 19/19 [00:00<00:00, 193.04it/s]


Test Accuracy: 0.6359143327841845





In [14]:
inv_order_tags = {}
for tag, value in tags.items():
  inv_order_tags[value] = tag

from utils import tokenize, preprocess_tokens, project_tokens 
def test_sample(sample, model):
  s = tokenize(sample)
  s = preprocess_tokens(s)
  print(s)
  s = project_tokens(s, vocab)
  x_tensor = torch.tensor(s)
  device = torch.device("cuda:0")
  with torch.no_grad():
    output = model.forward(x_tensor.to(device))
    output = torch.argmax(output, dim=-1).to("cpu")
    print([inv_order_tags[x.item()] for x in output])

In [None]:
model.eval()
model = model.to(torch.device("cuda:0"))
test_sample("I'd like to order two pizza and four drinks please also add 10 peperoni pizzas also a peperoni pie with no chess", model)

In [17]:
from model_io import save_model_state

save_model_state(model, "models/order_boundary_e95_h300_l2_d0.5_rg0_x83.7.pth")
model.to(torch.device("cuda:0"))

NER(
  (embedding): Embedding(678, 95)
  (dropout): Dropout(p=0.5, inplace=False)
  (lstm): LSTM(95, 300, num_layers=2, batch_first=True, dropout=0.5, bidirectional=True)
  (linear): Linear(in_features=600, out_features=5, bias=True)
)

In [22]:
from model_io import load_model_state

model_load_test = NER(
  embedding_dim=95, hidden_size=600, n_classes=len(tags), vocab_size=len(vocab), num_layers=2, dropout=0.5
)

load_model_state(model_load_test, "models/complex_e95_h600_l2_d0.5_cv0_x1.pth")
model_load_test.to(torch.device("cuda:0"))
model_load_test.eval()
test_sample("extra love", model_load_test)

['extra', 'love']
['QUANTITY_S', 'NONE']


  state_dict = torch.load(path, device)
