In [None]:
from tqdm import tqdm
import torch
from torch import nn
from utils import load_data_file, orders_balancer, pizza_orders_balancer, randomizer_balancer
import pickle

In [None]:
vocab = pickle.load(open('data/vocab.pkl', 'rb'))

input_name = "orders"
labels_name = "orders_labels"

tags = pickle.load(open(f'data/tags_{input_name}.pkl', 'rb'))

print(len(vocab), len(tags))

In [None]:
t_sentences, t_labels, t_size = load_data_file(vocab, tags, f'processed_input/train_{input_name}.txt', f'processed_input/train_{labels_name}.txt', orders_balancer)
dev_sentences, dev_labels, dev_size =  load_data_file(vocab, tags, f'processed_input/dev_{input_name}.txt', f'processed_input/dev_{labels_name}.txt', orders_balancer)
test_sentences, test_labels, test_size =  load_data_file(vocab, tags, f'processed_input/test_{input_name}.txt', f'processed_input/test_{labels_name}.txt', orders_balancer)

In [None]:
print('The training size is', t_size)
print('The validation size is', dev_size)
print('The testing size is', test_size)

In [None]:
from ner import NER, NERDataset

In [None]:
import random


def train(model, train_dataset, batch_size=512, epochs=10, learning_rate=0.01, skip_prop=0.0):
  model.train()   # switch to train mode
  train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True)
  criterion = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters(), learning_rate)
  use_cuda = torch.cuda.is_available()
  device = torch.device("cuda:0" if use_cuda else "cpu")
  if use_cuda:
    model = model.to(device)
    criterion = criterion.cuda(device)
    pass

  for epoch_num in range(epochs):
    total_acc_train = 0
    total_loss_train = 0
  
    for train_input, train_label in tqdm(train_dataloader):
      if skip_prop > random.random():
        continue    # skip this batch
      train_input = train_input.to(device)
      train_label = train_label.to(device)
      output = model.forward(train_input)
      batch_loss = criterion(output.view(-1, output.shape[-1]), train_label.view(-1))
      total_loss_train += batch_loss
      acc = (torch.argmax(output, dim=-1) == train_label).sum().item()
      total_acc_train += acc
      optimizer.zero_grad()
      batch_loss.backward()
      optimizer.step()
      
    epoch_loss = total_loss_train / len(train_dataset)
  
    sample_count = len(train_dataset)
    seq_length = train_dataset[0][0].shape[0]
    epoch_acc = total_acc_train / (sample_count * seq_length)
  
  
    print(
        f'Epochs: {epoch_num + 1} | Train Loss: {epoch_loss} \
        | Train Accuracy: {epoch_acc}\n')

In [None]:
train_dataset = NERDataset(t_sentences, t_labels, vocab['<pad>'], tags["NONE"])
val_dataset = NERDataset(dev_sentences, dev_labels, vocab['<pad>'], tags["NONE"])
test_dataset = NERDataset(test_sentences, test_labels, vocab['<pad>'], tags["NONE"])

In [None]:
model = NER(embedding_dim=95, hidden_size=200, n_classes=len(tags), vocab_size=len(vocab), num_layers=2, dropout=0.5)
print(model)

In [None]:
model = model.to(torch.device("cuda:0"))
train(model, train_dataset, epochs=10, batch_size=512, skip_prop=0.9)
train(model, val_dataset, epochs=15, batch_size=32)
train(model, test_dataset, epochs=15, batch_size=32)

 26%|██▋       | 3570/13575 [00:27<01:01, 162.36it/s]

In [None]:
# train(model, val_dataset, epochs=10, batch_size=32)
# train(model, test_dataset, epochs=10, batch_size=16)

In [None]:
def evaluate(model, test_dataset, batch_size=64):
  test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size,shuffle=False)
  use_cuda = torch.cuda.is_available()
  device = torch.device("cuda" if use_cuda else "cpu")
  if use_cuda:
    model = model.cuda()

  total_acc_test = 0
  
  with torch.no_grad():
    for test_input, test_label in tqdm(test_dataloader):
      test_input = test_input.to(device)
      test_label = test_label.to(device)
      output = model.forward(test_input)

      # Check if entire sequence matches by comparing all positions
      sequence_matches = (torch.argmax(output, dim=-1) == test_label).all(dim=-1)
      acc = sequence_matches.sum().item()
      total_acc_test += acc
    
    total_acc_test /= len(test_dataset)
  print(f'\nTest Accuracy: {total_acc_test}')

In [None]:
model.eval()
evaluate(model, test_dataset)
evaluate(model, val_dataset)

In [None]:
inv_order_tags = {}
for tag, value in tags.items():
  inv_order_tags[value] = tag

from utils import tokenize, preprocess_tokens, project_tokens 
def test_sample(sample, model):
  s = tokenize(sample)
  s = preprocess_tokens(s)
  print(s)
  s = project_tokens(s, vocab)
  x_tensor = torch.tensor(s)
  device = torch.device("cuda:0")
  with torch.no_grad():
    output = model.forward(x_tensor.to(device))
    output = torch.argmax(output, dim=-1).to("cpu")
    print([inv_order_tags[x.item()] for x in output])

In [None]:
model.eval()
model = model.to(torch.device("cuda:0"))
test_sample("I'd like to order two pizza and four drinks please also add 10 peperoni pizzas also a peperoni pie with no chess", model)

In [None]:
from model_io import save_model_state

save_model_state(model, "models/order_boundary_e95_h300_l2_d0.5_rg0_x83.7.pth")
model.to(torch.device("cuda:0"))

In [None]:
from model_io import load_model_state

model_load_test = NER(
  embedding_dim=95, hidden_size=600, n_classes=len(tags), vocab_size=len(vocab), num_layers=2, dropout=0.5
)

load_model_state(model_load_test, "models/complex_e95_h600_l2_d0.5_cv0_x1.pth")
model_load_test.to(torch.device("cuda:0"))
model_load_test.eval()
test_sample("extra love", model_load_test)