In [1]:
import json
import re
from collections import defaultdict

ENTITY_KEYS = {
    "NUMBER", "SIZE", "STYLE", "TOPPING", "COMPLEX_TOPPING", "QUANTITY",
    "VOLUME", "DRINKTYPE", "CONTAINERTYPE"
}
ORDER_KEYS = {"PIZZAORDER", "DRINKORDER"}

def tokenize(s):
    tokens = re.findall(r'\(|\)|[^\s()]+', s)
    return tokens

def parse_tokens(tokens):
    stack = []
    current_list = []
    for token in tokens:
        if token == '(':
            stack.append(current_list)
            current_list = []
        elif token == ')':
            finished = current_list
            current_list = stack.pop()
            current_list.append(finished)
        else:
            current_list.append(token)
    return current_list
def extract_orders(structure, order_index=1):

    results = []

    if not isinstance(structure, list) or len(structure) == 0:
        return results, order_index

    first = structure[0]
    if isinstance(first, list):
        for elem in structure:
            sub_results, order_index = extract_orders(elem, order_index)
            results.extend(sub_results)
        return results, order_index

    if isinstance(first, str) and first in ORDER_KEYS:
        order_type = "PIZZAORDER" if first == "PIZZAORDER" else "DRINKORDER"
        current_order_sequence = order_index
        order_index += 1
        content_tokens = []
        for elem in structure[1:]:
            content_tokens.extend(collect_tokens(elem))
        for tok in content_tokens:
            results.append((tok, order_type, current_order_sequence))

        return results, order_index
    else:
        for elem in structure:
            sub_results, order_index = extract_orders(elem, order_index)
            results.extend(sub_results)
        return results, order_index
def collect_tokens(node):
    collected = []
    if isinstance(node, list):
        for sub in node:
            sub_tokens = collect_tokens(sub)
            collected.extend(sub_tokens)
    else:
        if node not in ["(", ")"] and not is_structural_key(node):
            collected.append(node)
    return collected

def is_structural_key(token):
    return token in [
        "ORDER","PIZZAORDER","DRINKORDER","NUMBER","SIZE","STYLE","TOPPING",
        "COMPLEX_TOPPING","QUANTITY","VOLUME","DRINKTYPE","CONTAINERTYPE","NOT"
    ]


In [2]:

def label_input(input_text, top):
    tokens = tokenize(top)
    parsed = parse_tokens(tokens)

    order_info, _ = extract_orders(parsed)
    label_dict = defaultdict(list)
    for tok, lbl, num in order_info:
        label_dict[tok.lower()].append((lbl, num))

    input_tokens = input_text.split()
    labeled_input = []

    used_labels = defaultdict(int)

    for token in input_tokens:
        token_lower = token.lower()
        if token_lower in label_dict:

            label_index = used_labels[token_lower]
            if label_index < len(label_dict[token_lower]):
                token_label, sequence_number = label_dict[token_lower][label_index]
                used_labels[token_lower] += 1 
            else:
                token_label, sequence_number = 'O', None  
        else:
            token_label, sequence_number = 'O', None  

        labeled_input.append((token, token_label, sequence_number))
    return labeled_input

top = "(ORDER i need (PIZZAORDER (NUMBER a ) (SIZE medium ) (TOPPING ham ) and (TOPPING pineapple ) pizza ) and (DRINKORDER (NUMBER a ) (VOLUME small ) (DRINKTYPE iced tea ) ) )"

input_text = "i need a medium ham and pineapple pizza and a small iced tea"
input_label_sequence = label_input(input_text, top)
print(input_label_sequence)

[('i', 'O', None), ('need', 'O', None), ('a', 'PIZZAORDER', 1), ('medium', 'PIZZAORDER', 1), ('ham', 'PIZZAORDER', 1), ('and', 'PIZZAORDER', 1), ('pineapple', 'PIZZAORDER', 1), ('pizza', 'PIZZAORDER', 1), ('and', 'O', None), ('a', 'DRINKORDER', 2), ('small', 'DRINKORDER', 2), ('iced', 'DRINKORDER', 2), ('tea', 'DRINKORDER', 2)]


In [3]:

def transform_to_labels(input_array):
    labeled_numbers = []

    for _, label, sequence in input_array:
        # Compute the numerical label
        if label == 'O' and sequence is None:
            numerical_label = 0  # Neutral/irrelevant
        elif label == 'PIZZAORDER':
            numerical_label = 10 + sequence  # Unique range for pizza orders
        elif label == 'DRINKORDER':
            numerical_label = 20 + sequence  # Unique range for drink orders
        else:
            numerical_label = 0  # Default fallback

        labeled_numbers.append(numerical_label)

    return labeled_numbers

transform_to_labels(input_label_sequence)

[0, 0, 11, 11, 11, 11, 11, 11, 0, 22, 22, 22, 22]

In [4]:
def create_training_data(input_file: str, output_file: str):

    with open(input_file, 'r') as infile, open(output_file, 'w') as outfile:
        for line in infile:
            record = json.loads(line)
            src = record["dev.SRC"]
            top = record["dev.TOP"]

            labeled_input = label_input(src, top)
            numerical_labels = transform_to_labels(labeled_input)

            training_instance = {
                "text": src,
                "labels": numerical_labels
            }

            outfile.write(json.dumps(training_instance) + "\n")

# File paths
input_file = "../dataset/PIZZA_dev.json"
output_file = "../dataset/dev_data_model1.json"

# Generate the training data
#create_training_data(input_file,output_file)

In [5]:
import pandas as pd
train_path = '../dataset/PIZZA_train_model2.json'
dev_path = "../dataset/PIZZA_dev_model2.json"
df = pd.read_json(train_path, lines=True)
dev = pd.read_json(dev_path, lines=True)
df.describe()

Unnamed: 0,text,labels
count,2456446,2456446
unique,2456446,14327
top,can i have a large bbq pulled pork,"[19, 19, 5, 19, 19, 13, 19, 5, 1, 2]"
freq,1,16250


In [6]:
dev.describe()

Unnamed: 0,text,labels
count,348,348
unique,348,298
top,i want to order two medium pizzas with sausage...,"[19, 19, 5, 3, 13, 19, 13, 19, 19, 19, 19, 19]"
freq,1,7


In [7]:
X_train = df['text']
y_train = df['labels']
X_test = dev['text']
y_test = dev['labels']

In [8]:
X_train[0], y_train[0]

('can i have a large bbq pulled pork', [19, 19, 19, 5, 3, 13, 14, 14])

In [9]:
import re
from tensorflow.keras.preprocessing.sequence import pad_sequences

ENTITY_KEYS = {
    "NUMBER", "SIZE", "STYLE", "TOPPING", "COMPLEX_TOPPING", "QUANTITY",
    "VOLUME", "DRINKTYPE", "CONTAINERTYPE"
}

# Define a mapping for entity keys to numerical labels
LABEL_MAP = {'B-DRINKTYPE': 1, 'I-DRINKTYPE': 2, 'B-SIZE': 3, 'I-SIZE': 4, 'B-NUMBER': 5, 'I-NUMBER': 6, 'B-CONTAINERTYPE': 7, 'I-CONTAINERTYPE': 8, 'B-COMPLEX_TOPPING': 9, 'I-COMPLEX_TOPPING': 10, 'B-VOLUME': 11, 'I-VOLUME': 12, 'B-TOPPING': 13, 'I-TOPPING': 14, 'B-QUANTITY': 15, 'I-QUANTITY': 16, 'B-STYLE': 17, 'I-STYLE': 18, 'O': 19}

vocab = {"<PAD>": 0, "<SOS>": 1, "<EOS>": 2, "<UNK>": 3}  # Special tokens

def tokenize_output(output):
    """
    Tokenizes the structured output into meaningful tokens.
    Example:
        Input: "(ORDER (PIZZAORDER (NUMBER a ) (SIZE large ) (TOPPING bbq pulled pork ) ) )"
        Output: ["(ORDER", "(PIZZAORDER", "(NUMBER", "a", "(SIZE", "large", "(TOPPING", "bbq", "pulled", "pork", ")", ")", ")", ")"]
    """
    tokens = re.findall(r"\(|\)|\w+|[^\s()]+", output)
    return tokens

def build_vocab(outputs, index):
    """
    Builds a vocabulary from tokenized outputs.
    """
    i = index
    for output in outputs:
        tokens = tokenize_output(output)
        for token in tokens:
            if token not in vocab:
                vocab[token] = i
                i += 1
    return vocab, i
def encode_outputs(outputs, vocab):
    encoded = []
    for output in outputs:
        tokens = tokenize_output(output)  # Tokenize the output
        sequence = [vocab.get(token, vocab.get("<UNK>", 0)) for token in tokens] # [vocab.get("<SOS>", 0)] + \+ \[vocab.get("<EOS>", 0)]
        encoded.append(sequence)
    return encoded

def pad_sequences_to_fixed_length(sequences, max_len):
    """
    Pads sequences to a fixed length.
    """
    return pad_sequences(sequences, maxlen=max_len, padding="post", value=0)

def decode_sequence(sequence):
    """
    Decodes a sequence of integers back into their ENTITY_KEYS.
    """
    output = []
    # remove all padding tokens 
    sequence = [i for i in sequence if i != 0]
    for i in sequence:
        # use the LABEL_MAP to decode the integer
        output.append(list(LABEL_MAP.keys())[list(LABEL_MAP.values()).index(i)])
    return output

decode_sequence([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])



['B-DRINKTYPE',
 'I-DRINKTYPE',
 'B-SIZE',
 'I-SIZE',
 'B-NUMBER',
 'I-NUMBER',
 'B-CONTAINERTYPE',
 'I-CONTAINERTYPE',
 'B-COMPLEX_TOPPING',
 'I-COMPLEX_TOPPING',
 'B-VOLUME',
 'I-VOLUME']

In [10]:


def prepare_data(
    X_train, y_train, X_test, y_test, max_len_1=20, max_len_2 = 20
):

    index = 4
    X_vocab, index = build_vocab(X_train,index)  # Build vocabulary from training outputs
    X_train_encoded = encode_outputs(X_train, X_vocab)  # Encode training outputs
    X_test_encoded = encode_outputs(X_test, X_vocab)  # Encode testing outputs
    X_train_processed = pad_sequences_to_fixed_length(X_train_encoded, max_len_1)
    X_test_processed = pad_sequences_to_fixed_length(X_test_encoded, max_len_1)

    y_train_processed = pad_sequences_to_fixed_length(y_train, max_len_2)
    y_test_processed = pad_sequences_to_fixed_length(y_test, max_len_2)


    return (
        X_train_processed,
        X_test_processed,
        y_train_processed,
        y_test_processed,
        X_vocab,
    )


In [11]:
X_train_processed, X_test_processed, y_train_processed, y_test_processed, vocab  = prepare_data( X_train, y_train, X_test, y_test, max_len_1=40, max_len_2=40)

In [12]:
X_train_processed

array([[ 4,  5,  6, ...,  0,  0,  0],
       [ 8, 12, 13, ...,  0,  0,  0],
       [ 5, 19, 20, ...,  0,  0,  0],
       ...,
       [ 5, 19, 20, ...,  0,  0,  0],
       [ 5, 19, 20, ...,  0,  0,  0],
       [ 5, 19, 20, ...,  0,  0,  0]])

In [13]:
y_train_processed[1]

array([ 3, 19, 19, 13, 14, 19, 19, 15, 13,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0])

In [14]:
vocab

{'<PAD>': 0,
 '<SOS>': 1,
 '<EOS>': 2,
 '<UNK>': 3,
 'can': 4,
 'i': 5,
 'have': 6,
 'a': 7,
 'large': 8,
 'bbq': 9,
 'pulled': 10,
 'pork': 11,
 'pie': 12,
 'with': 13,
 'green': 14,
 'pepper': 15,
 'and': 16,
 'extra': 17,
 'peperonni': 18,
 "'d": 19,
 'like': 20,
 'vegetarian': 21,
 'pizza': 22,
 'party': 23,
 'size': 24,
 'stuffed': 25,
 'crust': 26,
 'american': 27,
 'cheese': 28,
 'mushroom': 29,
 'one': 30,
 'personal': 31,
 'sized': 32,
 'artichoke': 33,
 'banana': 34,
 'peppperonis': 35,
 'low': 36,
 'fat': 37,
 'want': 38,
 'regular': 39,
 'without': 40,
 'any': 41,
 'fried': 42,
 'onions': 43,
 'little': 44,
 'bit': 45,
 'of': 46,
 'high': 47,
 'rise': 48,
 'dough': 49,
 'lot': 50,
 'olive': 51,
 'pesto': 52,
 'sauce': 53,
 'peperonis': 54,
 'yellow': 55,
 'meatball': 56,
 '-': 57,
 'bean': 58,
 'big': 59,
 'meat': 60,
 'mushrooms': 61,
 'pecorino': 62,
 'balsamic': 63,
 'glaze': 64,
 'black': 65,
 'chicken': 66,
 'mozzarella': 67,
 'italian': 68,
 'sausage': 69,
 'olives': 

In [15]:
X_test_processed

array([[  5,  38,   3, ...,   0,   0,   0],
       [214,  94, 207, ...,   0,   0,   0],
       [  5,  74,   3, ...,   0,   0,   0],
       ...,
       [  3,   7,  22, ...,   0,   0,   0],
       [  3,   7,   3, ...,   0,   0,   0],
       [  3,   5,  74, ...,   0,   0,   0]])

In [16]:
y_test_processed

array([[19, 19, 19, ...,  0,  0,  0],
       [ 5,  3, 19, ...,  0,  0,  0],
       [19, 19, 19, ...,  0,  0,  0],
       ...,
       [19,  5, 19, ...,  0,  0,  0],
       [19, 19, 19, ...,  0,  0,  0],
       [19, 19, 19, ...,  0,  0,  0]])

In [17]:
X_test_processed.shape, y_test_processed.shape


((348, 40), (348, 40))

In [18]:
X_train_processed.shape, y_train_processed.shape

((2456446, 40), (2456446, 40))

In [19]:
import torch
from torch.utils.data import Dataset, DataLoader

class SequenceDataset(Dataset):
    def __init__(self, inputs, targets):
        self.inputs = torch.tensor(inputs, dtype=torch.long) 
        self.targets = torch.tensor(targets, dtype=torch.long) 

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        return {
            "src_input_ids": self.inputs[idx],
            "tgt_input_ids": self.targets[idx],
        }



train_dataset = SequenceDataset(X_train_processed, y_train_processed)
test_dataset = SequenceDataset(X_test_processed, y_test_processed)

batch_size = 128  # Adjust based GPU ;-;  memory
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [20]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import BatchNorm1d
class BiLSTMModel(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim, num_layers=3, dropout=0.5):
        super(BiLSTMModel, self).__init__()
        self.embedding = nn.Embedding(input_dim, embedding_dim, padding_idx=0)
        
        # Bidirectional LSTM
        self.bilstm_1 = nn.LSTM(
            input_size=embedding_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            bidirectional=True,
            batch_first=True,
            dropout=dropout,
        )

        # Batch normalization
        #self.batchnorm_1 = BatchNorm1d(hidden_dim * 2)

        # Fully connected layers
        self.fc2 = nn.Linear(hidden_dim * 2, output_dim)


    def forward(self, x):
        # Embedding layer
        embedded = self.embedding(x)

        # BiLSTM layer
        lstm_out, _ = self.bilstm_1(embedded)

        # # # Batch normalization
        # lstm_out = lstm_out.permute(0, 2, 1)
        # lstm_out = self.batchnorm_1(lstm_out)
        # lstm_out = lstm_out.permute(0, 2, 1)

        output = self.fc2(lstm_out)
        return output


In [21]:
len(vocab)

307

In [22]:

input_dim = len(vocab)
embedding_dim = 128
hidden_dim = 128  
output_dim = y_train_processed.shape[1]  
num_layers = 2  
dropout = 0.2 

device = torch.device("cuda") #"cuda" if torch.cuda.is_available() else
model = BiLSTMModel(input_dim, embedding_dim, hidden_dim, output_dim, num_layers, dropout).to(device)

In [23]:
def evaluate_model_with_accuracy(model, dataloader, criterion, device):
    model.eval()
    epoch_loss = 0
    total_tokens = 0
    correct_tokens = 0

    with torch.no_grad():
        for batch in dataloader:
            src = batch["src_input_ids"].to(device)
            tgt = batch["tgt_input_ids"].to(device)

            output = model(src)
            output_dim = output.shape[-1]

            # Flatten outputs and targets
            output = output.view(-1, output_dim)  # Shape: (batch_size * seq_len, output_dim)
            tgt = tgt.view(-1)  # Shape: (batch_size * seq_len)

            # # Apply mask to remove padding tokens
            # mask = tgt != 0  # Mask to ignore padding indices
            # output = output[mask]  # Filter model outputs
            # tgt = tgt[mask]  # Filter targets

            # Compute loss
            loss = criterion(output, tgt)
            epoch_loss += loss.item()

            # Calculate accuracy
            predictions = output.argmax(dim=1)  # Get the index of the max log-probability
            correct_tokens += (predictions == tgt).sum().item()
            total_tokens += tgt.size(0)

    accuracy = correct_tokens / total_tokens if total_tokens > 0 else 0
    return epoch_loss / len(dataloader), accuracy


In [24]:
import torch.optim as optim
from tqdm import tqdm

criterion = nn.CrossEntropyLoss()  # Use for multi-class classification ignore_index=0 for padding
optimizer = optim.Adam(model.parameters(), lr=0.0071, weight_decay=1e-5)

for epoch in range(20):  # Number of epochs
    model.train()
    epoch_loss = 0
    total_batches = len(train_dataloader)
    progress_bar = tqdm(train_dataloader, desc="Training Progress", unit="batch", leave=True)

    for batch_idx, batch in enumerate(progress_bar):  # Assuming a DataLoader is used
        src = batch["src_input_ids"].to(device)  # Input tokens
        tgt = batch["tgt_input_ids"].to(device)  # Target tokens
        # print(src)
        # print(tgt)

        optimizer.zero_grad()
        output = model(src)  # Forward pass
        output_dim = output.shape[-1]

        # Flatten outputs and targets for loss computation
        output = output.view(-1, output_dim)  # Shape: (batch_size * seq_len, output_dim)
        tgt = tgt.view(-1)  # Shape: (batch_size * seq_len)

        # Apply mask to remove padding tokens
        # mask = tgt != 0  # Mask to ignore padding indices
        # output = output[mask]  # Filter model outputs
        # tgt = tgt[mask]  # Filter targets
        # print(output.shape)
        # print(tgt.shape)

        # Compute loss
        loss = criterion(output, tgt)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        avg_loss = epoch_loss / (batch_idx + 1)
        progress_bar.set_description(f"Training Progress: Batch {batch_idx + 1}/{total_batches}, Avg Loss: {avg_loss:.8f}")

    val_loss, accuracy = evaluate_model_with_accuracy(model, test_dataloader, criterion, device)
    print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(train_dataloader):.8f}, Val Loss: {val_loss:.8f}, Accuracy: {accuracy * 100:.4f}%")


Training Progress: Batch 19191/19191, Avg Loss: 0.00325971: 100%|██████████| 19191/19191 [05:55<00:00, 53.93batch/s]


Epoch 1, Loss: 0.00325971, Val Loss: 1.35810190, Accuracy: 85.3376%


Training Progress: Batch 19191/19191, Avg Loss: 0.00470291: 100%|██████████| 19191/19191 [05:41<00:00, 56.24batch/s]


Epoch 2, Loss: 0.00470291, Val Loss: 2.28432643, Accuracy: 80.9195%


Training Progress: Batch 19191/19191, Avg Loss: 0.00193132: 100%|██████████| 19191/19191 [05:41<00:00, 56.22batch/s]


Epoch 3, Loss: 0.00193132, Val Loss: 1.17015807, Accuracy: 87.0905%


Training Progress: Batch 19191/19191, Avg Loss: 0.00121775: 100%|██████████| 19191/19191 [05:46<00:00, 55.43batch/s]


Epoch 4, Loss: 0.00121775, Val Loss: 1.13598228, Accuracy: 87.2486%


Training Progress: Batch 19191/19191, Avg Loss: 0.00168327: 100%|██████████| 19191/19191 [05:51<00:00, 54.66batch/s]


Epoch 5, Loss: 0.00168327, Val Loss: 2.07814384, Accuracy: 83.1537%


Training Progress: Batch 19191/19191, Avg Loss: 0.00167709: 100%|██████████| 19191/19191 [05:46<00:00, 55.35batch/s]


Epoch 6, Loss: 0.00167709, Val Loss: 1.33062317, Accuracy: 84.7557%


Training Progress: Batch 165/19191, Avg Loss: 0.00050351:   1%|          | 164/19191 [00:03<06:02, 52.50batch/s]


KeyboardInterrupt: 

In [25]:
def save_model(model, path):
    torch.save(model.state_dict(), path)
    print(f"Model saved to {path}")

def load_model(model, path):
    model.load_state_dict(torch.load(path))
    print(f"Model loaded from {path}")
    return model

In [26]:
save_model(model, "../weights/Bilstm_model2.pt")

Model saved to ../weights/Bilstm_model2.pt


### Testing Real Output sequence

In [27]:
test_dataset.__getitem__(1)["src_input_ids"]

tensor([214,  94, 207,  13, 161,  16, 138,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0])

In [28]:

with torch.no_grad():

    output = model( test_dataset.__getitem__(1)["src_input_ids"].to(device))
    predictions = output.argmax(dim=1)
    # print the sentence from the test set
    print(X_test[1])
predictions

five medium pizzas with tomatoes and ham


tensor([ 5,  3, 19, 19, 13, 19, 13,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0], device='cuda:0')

In [29]:
tgt= test_dataset.__getitem__(1)["tgt_input_ids"].to(device)
tgt

tensor([ 5,  3, 19, 19, 13, 19, 13,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0], device='cuda:0')

In [30]:

sequence = predictions.cpu().tolist()
pred_sequence= decode_sequence(sequence)
pred_sequence

['B-NUMBER', 'B-SIZE', 'O', 'O', 'B-TOPPING', 'O', 'B-TOPPING']

In [31]:
sequence = tgt.cpu().tolist()
tgt_sequence = decode_sequence(sequence)
tgt_sequence

['B-NUMBER', 'B-SIZE', 'O', 'O', 'B-TOPPING', 'O', 'B-TOPPING']

In [32]:
pred_sequence == tgt_sequence

True

In [33]:
def evaluate_model_with_sequence_accuracy(model, dataloader, device):
    model.eval()
    total_sequences = 0
    correct_sequences = 0

    with torch.no_grad():
        for batch in dataloader:
            src = batch["src_input_ids"].to(device)
            tgt = batch["tgt_input_ids"].to(device)

            output = model(src)
            output = output.argmax(dim=-1)


            for pred, tgt_seq in zip(output, tgt):
                if torch.equal(pred, tgt_seq): 
                    correct_sequences += 1
                else:
                    print("Predicted:", pred)
                    print("Target:", tgt_seq)
                    print(X_test[total_sequences])
                total_sequences += 1

    print(f"Correct {correct_sequences}, Total {total_sequences}")
    sequence_accuracy = correct_sequences / total_sequences if total_sequences > 0 else 0
    return sequence_accuracy * 100



In [34]:
evaluate_model_with_sequence_accuracy(model, test_dataloader, device)

Predicted: tensor([19, 19,  5, 19,  5,  3, 19, 19, 13, 19, 13, 14, 19,  5,  3, 19, 19, 13,
        19, 15, 13, 19,  5,  3, 19, 19, 13, 19, 13,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0], device='cuda:0')
Target: tensor([19, 19, 19, 19,  5,  3, 19, 19, 13, 19, 13, 14, 19,  5,  3, 19, 19, 13,
        19, 15, 13, 19,  5,  3, 19, 19, 13, 19, 13,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0], device='cuda:0')
i want to order two medium pizzas with sausage and black olives and two medium pizzas with pepperoni and extra cheese and three large pizzas with pepperoni and sausage
Predicted: tensor([19, 19, 13, 19,  5,  3, 17, 19, 19, 15, 13, 14,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0], device='cuda:0')
Target: tensor([19, 19, 19, 19,  5,  3, 17, 19, 19, 15, 13, 14,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0], dev

14.942528735632186