In [1]:
from tqdm import tqdm
import torch
from torch import nn
from utils_2 import load_data_file, orders_balancer, pizza_orders_balancer, randomizer_balancer
import pickle

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\xAbdoMo\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [3]:
vocab = pickle.load(open('data/vocab.pkl', 'rb'))

input_name = "complex_topping"
labels_name = "complex_topping_labels"

tags = pickle.load(open(f'data/tags_{input_name}.pkl', 'rb'))

print(len(vocab), len(tags))

461 5


In [4]:
t_sentences, t_labels, t_size = load_data_file(vocab, tags, f'processed_input/train_{input_name}.txt', f'processed_input/train_{labels_name}.txt', randomizer_balancer(3))
dev_sentences, dev_labels, dev_size =  load_data_file(vocab, tags, f'processed_input/dev_{input_name}.txt', f'processed_input/dev_{labels_name}.txt', randomizer_balancer(3))
test_sentences, test_labels, test_size =  load_data_file(vocab, tags, f'processed_input/test_{input_name}.txt', f'processed_input/test_{labels_name}.txt', randomizer_balancer(2))

Sentences loader: 100%|██████████| 439054/439054 [00:03<00:00, 122407.14it/s]
Labels loader: 100%|██████████| 439054/439054 [00:00<00:00, 749267.00it/s]
Randomizer balancer - preload: 439054it [00:04, 89333.08it/s] 
Randomizer balancer - finalize: 100%|██████████| 1317162/1317162 [00:00<00:00, 5897160.44it/s]
Projector: 100%|██████████| 1317162/1317162 [00:48<00:00, 27206.18it/s]
Sentences loader: 100%|██████████| 85/85 [00:00<00:00, 85046.72it/s]
Labels loader: 100%|██████████| 85/85 [00:00<?, ?it/s]
Randomizer balancer - preload: 85it [00:00, 84985.90it/s]
Randomizer balancer - finalize: 100%|██████████| 255/255 [00:00<00:00, 255261.94it/s]
Projector: 100%|██████████| 255/255 [00:00<00:00, 46220.72it/s]
Sentences loader: 100%|██████████| 311/311 [00:00<00:00, 155289.11it/s]
Labels loader: 100%|██████████| 311/311 [00:00<?, ?it/s]
Randomizer balancer - preload: 311it [00:00, 204873.34it/s]
Randomizer balancer - finalize: 100%|██████████| 622/622 [00:00<?, ?it/s]
Projector: 100%|██████

In [5]:
print('The training size is', t_size)
print('The validation size is', dev_size)
print('The testing size is', test_size)

The training size is 1317162
The validation size is 255
The testing size is 622


In [6]:
from ner import NER, NERDataset

In [7]:
import random


def train(model, train_dataset, batch_size=512, epochs=10, learning_rate=0.01, skip_prop=0.0):
  model.train()   # switch to train mode
  train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True)
  criterion = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters(), learning_rate)
  use_cuda = torch.cuda.is_available()
  device = torch.device("cuda:0" if use_cuda else "cpu")
  if use_cuda:
    model = model.to(device)
    criterion = criterion.cuda(device)
    pass

  for epoch_num in range(epochs):
    total_acc_train = 0
    total_loss_train = 0
  
    for train_input, train_label in tqdm(train_dataloader):
      if skip_prop > random.random():
        continue    # skip this batch
      train_input = train_input.to(device)
      train_label = train_label.to(device)
      output = model.forward(train_input)
      batch_loss = criterion(output.view(-1, output.shape[-1]), train_label.view(-1))
      total_loss_train += batch_loss
      acc = (torch.argmax(output, dim=-1) == train_label).sum().item()
      total_acc_train += acc
      optimizer.zero_grad()
      batch_loss.backward()
      optimizer.step()
      
    epoch_loss = total_loss_train / len(train_dataset)
  
    sample_count = len(train_dataset)
    seq_length = train_dataset[0][0].shape[0]
    epoch_acc = total_acc_train / (sample_count * seq_length)
  
  
    print(
        f'Epochs: {epoch_num + 1} | Train Loss: {epoch_loss} \
        | Train Accuracy: {epoch_acc}\n')

In [8]:
train_dataset = NERDataset(t_sentences, t_labels, vocab['<pad>'], tags["NONE"])
val_dataset = NERDataset(dev_sentences, dev_labels, vocab['<pad>'], tags["NONE"])
test_dataset = NERDataset(test_sentences, test_labels, vocab['<pad>'], tags["NONE"])

In [9]:
model = NER(embedding_dim=95, hidden_size=600, n_classes=len(tags), vocab_size=len(vocab), num_layers=2, dropout=0.5)
print(model)

NER(
  (embedding): Embedding(461, 95)
  (dropout): Dropout(p=0.5, inplace=False)
  (lstm): LSTM(95, 600, num_layers=2, batch_first=True, dropout=0.5, bidirectional=True)
  (linear): Linear(in_features=1200, out_features=5, bias=True)
)


In [10]:
model = model.to(torch.device("cuda:0"))
train(model, train_dataset, epochs=10, batch_size=512, skip_prop=0.9)
train(model, val_dataset, epochs=15, batch_size=32)
train(model, test_dataset, epochs=15, batch_size=32)

100%|██████████| 2573/2573 [00:25<00:00, 101.70it/s]


Epochs: 1 | Train Loss: 1.2624144801520742e-05         | Train Accuracy: 0.09926616847434104



100%|██████████| 2573/2573 [00:24<00:00, 103.35it/s]


Epochs: 2 | Train Loss: 2.2513106934241023e-09         | Train Accuracy: 0.0991222036469318



100%|██████████| 2573/2573 [00:25<00:00, 99.90it/s] 


Epochs: 3 | Train Loss: 1.187060449936439e-09         | Train Accuracy: 0.10479045098476877



100%|██████████| 2573/2573 [00:27<00:00, 94.09it/s] 


Epochs: 4 | Train Loss: 6.981829558938557e-10         | Train Accuracy: 0.10806263770136096



100%|██████████| 2573/2573 [00:26<00:00, 96.26it/s] 


Epochs: 5 | Train Loss: 3.867783870958874e-10         | Train Accuracy: 0.100514591219607



100%|██████████| 2573/2573 [00:27<00:00, 94.08it/s] 


Epochs: 6 | Train Loss: 2.9015201352677877e-10         | Train Accuracy: 0.10556788003298



100%|██████████| 2573/2573 [00:28<00:00, 91.69it/s] 


Epochs: 7 | Train Loss: 2.1907753389172058e-10         | Train Accuracy: 0.10495292150851604



100%|██████████| 2573/2573 [00:25<00:00, 100.56it/s]


Epochs: 8 | Train Loss: 1.575521657581902e-10         | Train Accuracy: 0.10184320531567112



100%|██████████| 2573/2573 [00:26<00:00, 96.99it/s] 


Epochs: 9 | Train Loss: 1.240770958199633e-10         | Train Accuracy: 0.09601248745408689



100%|██████████| 2573/2573 [00:26<00:00, 96.55it/s] 


Epochs: 10 | Train Loss: 1.0545012779639151e-10         | Train Accuracy: 0.0987334891228262



100%|██████████| 8/8 [00:00<00:00, 68.46it/s]


Epochs: 1 | Train Loss: 0.0001649462792556733         | Train Accuracy: 0.9992156862745099



100%|██████████| 8/8 [00:00<00:00, 73.55it/s]


Epochs: 2 | Train Loss: 0.0005526450695469975         | Train Accuracy: 0.9976470588235294



100%|██████████| 8/8 [00:00<00:00, 71.29it/s]


Epochs: 3 | Train Loss: 8.901629917090759e-05         | Train Accuracy: 0.9992156862745099



100%|██████████| 8/8 [00:00<00:00, 72.70it/s]


Epochs: 4 | Train Loss: 6.580874469364062e-06         | Train Accuracy: 1.0



100%|██████████| 8/8 [00:00<00:00, 68.58it/s]


Epochs: 5 | Train Loss: 1.3891845185298735e-07         | Train Accuracy: 1.0



100%|██████████| 8/8 [00:00<00:00, 62.79it/s]


Epochs: 6 | Train Loss: 1.5679614762120764e-07         | Train Accuracy: 1.0



100%|██████████| 8/8 [00:00<00:00, 76.94it/s]


Epochs: 7 | Train Loss: 1.6664046142977895e-07         | Train Accuracy: 1.0



100%|██████████| 8/8 [00:00<00:00, 82.62it/s]


Epochs: 8 | Train Loss: 1.7953787345703631e-09         | Train Accuracy: 1.0



100%|██████████| 8/8 [00:00<00:00, 78.43it/s]


Epochs: 9 | Train Loss: 5.635292787964374e-10         | Train Accuracy: 1.0



100%|██████████| 8/8 [00:00<00:00, 83.22it/s]


Epochs: 10 | Train Loss: 1.0053777810270503e-09         | Train Accuracy: 1.0



100%|██████████| 8/8 [00:00<00:00, 88.39it/s]


Epochs: 11 | Train Loss: 1.0730439869988118e-09         | Train Accuracy: 1.0



100%|██████████| 8/8 [00:00<00:00, 78.53it/s]


Epochs: 12 | Train Loss: 4.696549260607696e-10         | Train Accuracy: 1.0



100%|██████████| 8/8 [00:00<00:00, 81.35it/s]


Epochs: 13 | Train Loss: 9.484470986365068e-10         | Train Accuracy: 1.0



100%|██████████| 8/8 [00:00<00:00, 79.30it/s]


Epochs: 14 | Train Loss: 1.01413866193667e-09         | Train Accuracy: 1.0



100%|██████████| 8/8 [00:00<00:00, 83.21it/s]


Epochs: 15 | Train Loss: 2.761565975895053e-10         | Train Accuracy: 1.0



100%|██████████| 20/20 [00:00<00:00, 86.30it/s]


Epochs: 1 | Train Loss: 0.0005595750990323722         | Train Accuracy: 0.9971864951768489



100%|██████████| 20/20 [00:00<00:00, 85.10it/s]


Epochs: 2 | Train Loss: 0.0008941511041484773         | Train Accuracy: 0.997588424437299



100%|██████████| 20/20 [00:00<00:00, 88.51it/s]


Epochs: 3 | Train Loss: 3.322603151900694e-05         | Train Accuracy: 0.9995980707395499



100%|██████████| 20/20 [00:00<00:00, 88.50it/s]


Epochs: 4 | Train Loss: 2.2679946596326772e-06         | Train Accuracy: 1.0



100%|██████████| 20/20 [00:00<00:00, 88.47it/s]


Epochs: 5 | Train Loss: 6.738087421354066e-08         | Train Accuracy: 1.0



100%|██████████| 20/20 [00:00<00:00, 87.17it/s]


Epochs: 6 | Train Loss: 2.2751772021933903e-08         | Train Accuracy: 1.0



100%|██████████| 20/20 [00:00<00:00, 89.02it/s]


Epochs: 7 | Train Loss: 1.5196812341855548e-07         | Train Accuracy: 1.0



100%|██████████| 20/20 [00:00<00:00, 83.34it/s]


Epochs: 8 | Train Loss: 4.1275946216501325e-08         | Train Accuracy: 1.0



100%|██████████| 20/20 [00:00<00:00, 81.30it/s]


Epochs: 9 | Train Loss: 9.708602810576394e-09         | Train Accuracy: 1.0



100%|██████████| 20/20 [00:00<00:00, 83.45it/s]


Epochs: 10 | Train Loss: 4.446482293474219e-09         | Train Accuracy: 1.0



100%|██████████| 20/20 [00:00<00:00, 87.10it/s]


Epochs: 11 | Train Loss: 3.4400500226183794e-09         | Train Accuracy: 1.0



100%|██████████| 20/20 [00:00<00:00, 87.64it/s]


Epochs: 12 | Train Loss: 1.5022147970000788e-09         | Train Accuracy: 1.0



100%|██████████| 20/20 [00:00<00:00, 74.36it/s]


Epochs: 13 | Train Loss: 1.1379410747025531e-09         | Train Accuracy: 1.0



100%|██████████| 20/20 [00:00<00:00, 74.04it/s]


Epochs: 14 | Train Loss: 9.696019542815293e-10         | Train Accuracy: 1.0



100%|██████████| 20/20 [00:00<00:00, 90.04it/s]

Epochs: 15 | Train Loss: 1.1274647881975852e-09         | Train Accuracy: 1.0






In [44]:
train(model, val_dataset, epochs=5, batch_size=32)
train(model, test_dataset, epochs=5, batch_size=16)

100%|██████████| 147/147 [00:04<00:00, 32.61it/s]


Epochs: 1 | Train Loss: 0.0007481177453882992         | Train Accuracy: 0.9980377325396564



100%|██████████| 147/147 [00:04<00:00, 32.55it/s]


Epochs: 2 | Train Loss: 0.001016600290313363         | Train Accuracy: 0.9973690268688131



100%|██████████| 147/147 [00:04<00:00, 32.64it/s]


Epochs: 3 | Train Loss: 0.0009267125278711319         | Train Accuracy: 0.9977307856743513



100%|██████████| 147/147 [00:04<00:00, 32.99it/s]


Epochs: 4 | Train Loss: 0.0007141839014366269         | Train Accuracy: 0.9980925444798895



100%|██████████| 147/147 [00:04<00:00, 32.64it/s]

Epochs: 5 | Train Loss: 0.0011853700270876288         | Train Accuracy: 0.9972155534361605






In [11]:
def evaluate(model, test_dataset, batch_size=64):
  test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size,shuffle=False)
  use_cuda = torch.cuda.is_available()
  device = torch.device("cuda" if use_cuda else "cpu")
  if use_cuda:
    model = model.cuda()

  total_acc_test = 0
  
  with torch.no_grad():
    for test_input, test_label in tqdm(test_dataloader):
      test_input = test_input.to(device)
      test_label = test_label.to(device)
      output = model.forward(test_input)

      # Check if entire sequence matches by comparing all positions
      sequence_matches = (torch.argmax(output, dim=-1) == test_label).all(dim=-1)
      acc = sequence_matches.sum().item()
      total_acc_test += acc
    
    total_acc_test /= len(test_dataset)
  print(f'\nTest Accuracy: {total_acc_test}')

In [12]:
model.eval()
evaluate(model, test_dataset)
evaluate(model, val_dataset)

100%|██████████| 10/10 [00:00<00:00, 232.52it/s]



Test Accuracy: 1.0


100%|██████████| 4/4 [00:00<00:00, 307.69it/s]


Test Accuracy: 1.0





In [14]:
inv_order_tags = {}
for tag, value in tags.items():
  inv_order_tags[value] = tag

from utils_2 import tokenize, preprocess_tokens, project_tokens 
def test_sample(sample, model):
  s = tokenize(sample)
  s = preprocess_tokens(s, 0)
  print(s)
  s = project_tokens(s, vocab)
  x_tensor = torch.tensor(s)
  device = torch.device("cuda:0")
  with torch.no_grad():
    output = model.forward(x_tensor.to(device))
    output = torch.argmax(output, dim=-1).to("cpu")
    print([inv_order_tags[x.item()] for x in output])

In [20]:
model.eval()
model = model.to(torch.device("cuda:0"))
test_sample("extra love", model)

['extra', 'love']
['QUANTITY_S', 'NONE']


In [21]:
from model_io import save_model_state

save_model_state(model, "models/complex_e95_h600_l2_d0.5_cv0_x1.pth")
model.to(torch.device("cuda:0"))

NER(
  (embedding): Embedding(461, 95)
  (dropout): Dropout(p=0.5, inplace=False)
  (lstm): LSTM(95, 600, num_layers=2, batch_first=True, dropout=0.5, bidirectional=True)
  (linear): Linear(in_features=1200, out_features=5, bias=True)
)

In [22]:
from model_io import load_model_state

model_load_test = NER(
  embedding_dim=95, hidden_size=600, n_classes=len(tags), vocab_size=len(vocab), num_layers=2, dropout=0.5
)

load_model_state(model_load_test, "models/complex_e95_h600_l2_d0.5_cv0_x1.pth")
model_load_test.to(torch.device("cuda:0"))
model_load_test.eval()
test_sample("extra love", model_load_test)

['extra', 'love']
['QUANTITY_S', 'NONE']


  state_dict = torch.load(path, device)
