In [1]:
import json
import re
from collections import defaultdict


ORDER_KEYS = {"PIZZAORDER", "DRINKORDER"}

def tokenize(s):
    tokens = re.findall(r'\(|\)|[^\s()]+', s)
    return tokens

def parse_tokens(tokens):
    stack = []
    current_list = []
    for token in tokens:
        if token == '(':
            stack.append(current_list)
            current_list = []
        elif token == ')':
            finished = current_list
            current_list = stack.pop()
            current_list.append(finished)
        else:
            current_list.append(token)
    return current_list
def extract_orders(structure, order_index=1):

    results = []

    if not isinstance(structure, list) or len(structure) == 0:
        return results, order_index

    first = structure[0]
    if isinstance(first, list):
        for elem in structure:
            sub_results, order_index = extract_orders(elem, order_index)
            results.extend(sub_results)
        return results, order_index

    if isinstance(first, str) and first in ORDER_KEYS:
        order_type = "PIZZAORDER" if first == "PIZZAORDER" else "DRINKORDER"
        current_order_sequence = order_index
        order_index += 1
        content_tokens = []
        for elem in structure[1:]:
            content_tokens.extend(collect_tokens(elem))
        for tok in content_tokens:
            results.append((tok, order_type, current_order_sequence))

        return results, order_index
    else:
        for elem in structure:
            sub_results, order_index = extract_orders(elem, order_index)
            results.extend(sub_results)
        return results, order_index
def collect_tokens(node):
    collected = []
    if isinstance(node, list):
        for sub in node:
            sub_tokens = collect_tokens(sub)
            collected.extend(sub_tokens)
    else:
        if node not in ["(", ")"] and not is_structural_key(node):
            collected.append(node)
    return collected

def is_structural_key(token):
    return token in [
        "ORDER","PIZZAORDER","DRINKORDER","NUMBER","SIZE","STYLE","TOPPING",
        "COMPLEX_TOPPING","QUANTITY","VOLUME","DRINKTYPE","CONTAINERTYPE","NOT"
    ]


In [None]:

def label_input(input_text, top):
    tokens = tokenize(top)
    parsed = parse_tokens(tokens)

    order_info, _ = extract_orders(parsed)
    label_dict = defaultdict(list)
    for tok, lbl, num in order_info:
        label_dict[tok.lower()].append((lbl, num))

    input_tokens = input_text.split()
    labeled_input = []

    used_labels = defaultdict(int)

    for token in input_tokens:
        token_lower = token.lower()
        if token_lower in label_dict:

            label_index = used_labels[token_lower]
            if label_index < len(label_dict[token_lower]):
                token_label, sequence_number = label_dict[token_lower][label_index]
                used_labels[token_lower] += 1 
            else:
                token_label, sequence_number = 'O', None  
        else:
            token_label, sequence_number = 'O', None  

        labeled_input.append((token, token_label, sequence_number))
    return labeled_input

top = "(ORDER i need (PIZZAORDER (NUMBER a ) (SIZE medium ) (TOPPING ham ) and (TOPPING pineapple ) pizza ) and (DRINKORDER (NUMBER a ) (VOLUME small ) (DRINKTYPE iced tea ) ) )"

input_text = "i need a medium ham and pineapple pizza and a small iced tea"
input_label_sequence = label_input(input_text, top)
print(input_label_sequence)

In [None]:

def transform_to_labels(input_array):
    labeled_numbers = []

    for _, label, sequence in input_array:
        # Compute the numerical label
        if label == 'O' and sequence is None:
            numerical_label = 0  # Neutral/irrelevant
        elif label == 'PIZZAORDER':
            numerical_label = 10 + sequence  # Unique range for pizza orders
        elif label == 'DRINKORDER':
            numerical_label = 20 + sequence  # Unique range for drink orders
        else:
            numerical_label = 0  # Default fallback

        labeled_numbers.append(numerical_label)

    return labeled_numbers

transform_to_labels(input_label_sequence)

In [None]:
def create_training_data(input_file: str, output_file: str):

    with open(input_file, 'r') as infile, open(output_file, 'w') as outfile:
        for line in infile:
            record = json.loads(line)
            src = record["dev.SRC"]
            top = record["dev.TOP"]

            labeled_input = label_input(src, top)
            numerical_labels = transform_to_labels(labeled_input)

            training_instance = {
                "text": src,
                "labels": numerical_labels
            }

            outfile.write(json.dumps(training_instance) + "\n")

# File paths
input_file = "../dataset/PIZZA_dev.json"
output_file = "../dataset/dev_data_model1.json"

# Generate the training data
create_training_data(input_file,output_file)

In [1]:
import pandas as pd
train_path = '../dataset2/PIZZA_train_model2.json'
dev_path = "../dataset2/PIZZA_dev_model2.json"
df = pd.read_json(train_path, lines=True)
dev = pd.read_json(dev_path, lines=True)
df.describe()

Unnamed: 0,text,labels
count,1871720,1871720
unique,1800845,26042
top,i need four pies with pickle and broccoli,"[21, 21, 5, 21, 21, 11, 21, 5, 21, 21, 11]"
freq,7,12319


In [2]:
dev.describe()

Unnamed: 0,text,labels
count,1357,1357
unique,1357,1000
top,i want a pizza with pesto and mushrooms but no...,"[21, 21, 5, 3, 11, 21, 11, 21, 21, 21, 13]"
freq,1,33


In [3]:
X_train = df['text']
y_train = df['labels']
X_test = dev['text']
y_test = dev['labels']

In [4]:
X_train[0], y_train[0]

('large pie with green pepper and with extra ham',
 [3, 21, 21, 11, 12, 21, 21, 19, 11])

In [5]:
import re
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Define a mapping for entity keys to numerical labels
LABEL_MAP = {
    'B-DRINKTYPE': 1, 'I-DRINKTYPE': 2,
    'B-SIZE': 3, 'I-SIZE': 4,  # Treats SIZE and VOLUME as the same
    'B-NUMBER': 5, 'I-NUMBER': 6,
    'B-CONTAINERTYPE': 7, 'I-CONTAINERTYPE': 8,
    'B-COMPLEX_TOPPING': 9, 'I-COMPLEX_TOPPING': 10,
    'B-TOPPING': 11, 'I-TOPPING': 12,
    'B-NEG_TOPPING': 13, 'I-NEG_TOPPING': 14,
    'B-NEG_STYLE': 15, 'I-NEG_STYLE': 16,
    'B-STYLE': 17, 'I-STYLE': 18,
    'B-QUANTITY': 19, 'I-QUANTITY': 20,
    'O': 21
}
vocab = {"<PAD>": 0, "<SOS>": 1, "<EOS>": 2, "<UNK>": 3}  # Special tokens

def tokenize_output(output):
    """
    Tokenizes the structured output into meaningful tokens.
    Example:
        Input: "(ORDER (PIZZAORDER (NUMBER a ) (SIZE large ) (TOPPING bbq pulled pork ) ) )"
        Output: ["(ORDER", "(PIZZAORDER", "(NUMBER", "a", "(SIZE", "large", "(TOPPING", "bbq", "pulled", "pork", ")", ")", ")", ")"]
    """
    tokens = re.findall(r"\(|\)|\w+|[^\s()]+", output)
    return tokens

def build_vocab(outputs, index):
    """
    Builds a vocabulary from tokenized outputs.
    """
    i = index
    for output in outputs:
        tokens = tokenize_output(output)
        for token in tokens:
            if token not in vocab:
                vocab[token] = i
                i += 1
    return vocab, i
def encode_outputs(outputs, vocab):
    encoded = []
    for output in outputs:
        tokens = tokenize_output(output)  # Tokenize the output
        sequence = [vocab.get(token, vocab.get("<UNK>", 0)) for token in tokens] # [vocab.get("<SOS>", 0)] + \+ \[vocab.get("<EOS>", 0)]
        encoded.append(sequence)
    return encoded

def pad_sequences_to_fixed_length(sequences, max_len):
    """
    Pads sequences to a fixed length.
    """
    return pad_sequences(sequences, maxlen=max_len, padding="post", value=0)

def vocab_decode_sequence(sequence, vocab):
    """
    Decodes a sequence of integers back into the structured output string.
    """
    inv_vocab = {v: k for k, v in vocab.items()}  # Reverse the vocabulary
    tokens = [inv_vocab[idx] for idx in sequence if idx in inv_vocab and idx not in { vocab["<SOS>"], vocab["<EOS>"],vocab["<PAD>"]} ] 
    output = " ".join(tokens)
    output = output.replace(" ( ", " (").replace("( ", "(") #.replace(" )", ")")
    print(output)
    return output

def decode_sequence(sequence):
    """
    Decodes a sequence of integers back into their ENTITY_KEYS.
    """
    output = []
    # remove all padding tokens 
    sequence = [i for i in sequence if i != 0]
    for i in sequence:
        # use the LABEL_MAP to decode the integer
        output.append(list(LABEL_MAP.keys())[list(LABEL_MAP.values()).index(i)])
    return output

decode_sequence([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])



['B-DRINKTYPE',
 'I-DRINKTYPE',
 'B-SIZE',
 'I-SIZE',
 'B-NUMBER',
 'I-NUMBER',
 'B-CONTAINERTYPE',
 'I-CONTAINERTYPE',
 'B-COMPLEX_TOPPING',
 'I-COMPLEX_TOPPING',
 'B-TOPPING',
 'I-TOPPING']

In [6]:
def prepare_data(
    X_train, y_train, X_test, y_test, max_len_1=20, max_len_2 = 20
):

    index = 4
    X_vocab, index = build_vocab(X_train,index)  # Build vocabulary from training outputs
    X_train_encoded = encode_outputs(X_train, X_vocab)  # Encode training outputs
    X_test_encoded = encode_outputs(X_test, X_vocab)  # Encode testing outputs
    X_train_processed = pad_sequences_to_fixed_length(X_train_encoded, max_len_1)
    X_test_processed = pad_sequences_to_fixed_length(X_test_encoded, max_len_1)

    y_train_processed = pad_sequences_to_fixed_length(y_train, max_len_2)
    y_test_processed = pad_sequences_to_fixed_length(y_test, max_len_2)


    return (
        X_train_processed,
        X_test_processed,
        y_train_processed,
        y_test_processed,
        X_vocab,
    )


In [7]:
X_train_processed, X_test_processed, y_train_processed, y_test_processed, vocab  = prepare_data( X_train, y_train, X_test, y_test, max_len_1=40, max_len_2=40)

In [8]:
print(vocab_decode_sequence(X_train_processed[3], vocab))
print(decode_sequence(y_train_processed[3]))

i want a stuffed crust pizza with american cheese and a little bit of sausage
i want a stuffed crust pizza with american cheese and a little bit of sausage
['O', 'O', 'B-NUMBER', 'B-STYLE', 'I-STYLE', 'O', 'O', 'B-TOPPING', 'I-TOPPING', 'O', 'B-QUANTITY', 'I-QUANTITY', 'I-QUANTITY', 'I-QUANTITY', 'B-TOPPING']


In [9]:
y_train_processed[1]

array([ 3,  4, 17, 18, 21, 21, 11, 21, 21, 11,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0])

In [10]:
vocab

{'<PAD>': 0,
 '<SOS>': 1,
 '<EOS>': 2,
 '<UNK>': 3,
 'large': 4,
 'pie': 5,
 'with': 6,
 'green': 7,
 'pepper': 8,
 'and': 9,
 'extra': 10,
 'ham': 11,
 'party': 12,
 'size': 13,
 'stuffed': 14,
 'crust': 15,
 'artichokes': 16,
 'mushroom': 17,
 'i': 18,
 'want': 19,
 'one': 20,
 'regular': 21,
 'pizza': 22,
 'without': 23,
 'any': 24,
 'basil': 25,
 'a': 26,
 'american': 27,
 'cheese': 28,
 'little': 29,
 'bit': 30,
 'of': 31,
 'sausage': 32,
 "'d": 33,
 'like': 34,
 'sized': 35,
 'high': 36,
 'rise': 37,
 'dough': 38,
 'lot': 39,
 'banana': 40,
 'chicken': 41,
 'black': 42,
 'olives': 43,
 'sauce': 44,
 'broccoli': 45,
 'peperonni': 46,
 'italian': 47,
 'can': 48,
 'have': 49,
 'flatbread': 50,
 'style': 51,
 'lunch': 52,
 '-': 53,
 'blue': 54,
 'need': 55,
 'caramelized': 56,
 'onions': 57,
 'combination': 58,
 'eggplant': 59,
 'pecorino': 60,
 'New': 61,
 'York': 62,
 'artichoke': 63,
 'spinach': 64,
 'Neapolitan': 65,
 'bacon': 66,
 'tofu': 67,
 'grilled': 68,
 'mozzarella': 69,
 

In [11]:
X_test_processed

array([[ 18,  19,  26, ...,   0,   0,   0],
       [ 18, 513,  34, ...,   0,   0,   0],
       [ 48,  18, 512, ...,   0,   0,   0],
       ...,
       [ 18, 509, 519, ...,   0,   0,   0],
       [ 18, 597,  20, ...,   0,   0,   0],
       [ 18,  33,  34, ...,   0,   0,   0]])

In [12]:
y_test_processed

array([[21, 21,  5, ...,  0,  0,  0],
       [21, 21, 21, ...,  0,  0,  0],
       [21, 21, 21, ...,  0,  0,  0],
       ...,
       [21, 21, 21, ...,  0,  0,  0],
       [21, 21,  5, ...,  0,  0,  0],
       [21, 21, 21, ...,  0,  0,  0]])

In [13]:
X_test_processed.shape, y_test_processed.shape


((1357, 40), (1357, 40))

In [14]:
X_train_processed.shape, y_train_processed.shape

((1871720, 40), (1871720, 40))

In [44]:
import torch
from torch.utils.data import Dataset, DataLoader

class SequenceDataset(Dataset):
    def __init__(self, inputs, targets):
        self.inputs = torch.tensor(inputs, dtype=torch.long) 
        self.targets = torch.tensor(targets, dtype=torch.long) 

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        return {
            "src_input_ids": self.inputs[idx],
            "tgt_input_ids": self.targets[idx],
        }



train_dataset = SequenceDataset(X_train_processed, y_train_processed)
test_dataset = SequenceDataset(X_test_processed, y_test_processed)

batch_size = 128  # Adjust based GPU ;-;  memory
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [45]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import BatchNorm1d
class BiLSTMModel(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim, num_layers=3, dropout=0.3):
        super(BiLSTMModel, self).__init__()
        self.embedding = nn.Embedding(input_dim, embedding_dim, padding_idx=0)
        
        # Bidirectional LSTM
        self.bilstm_1 = nn.LSTM(
            input_size=embedding_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            bidirectional=True,
            batch_first=True,
            dropout=dropout,
        )

        # Batch normalization
        #self.batchnorm_1 = BatchNorm1d(hidden_dim * 2)

        # Fully connected layers
        self.fc2 = nn.Linear(hidden_dim * 2, output_dim)


    def forward(self, x):
        # Embedding layer
        embedded = self.embedding(x)

        # BiLSTM layer
        lstm_out, _ = self.bilstm_1(embedded)

        # # # Batch normalization
        # lstm_out = lstm_out.permute(0, 2, 1)
        # lstm_out = self.batchnorm_1(lstm_out)
        # lstm_out = lstm_out.permute(0, 2, 1)

        output = self.fc2(lstm_out)
        return output


In [46]:
len(vocab)

607

In [47]:

input_dim = len(vocab)
embedding_dim = 128
hidden_dim = 128  
output_dim = y_train_processed.shape[1]  
num_layers = 2  
dropout = 0.3

device = torch.device("cuda") #"cuda" if torch.cuda.is_available() else
model = BiLSTMModel(input_dim, embedding_dim, hidden_dim, output_dim, num_layers, dropout).to(device)

In [48]:
def evaluate_model_with_accuracy(model, dataloader, criterion, device):
    model.eval()
    epoch_loss = 0
    total_tokens = 0
    correct_tokens = 0

    with torch.no_grad():
        for batch in dataloader:
            src = batch["src_input_ids"].to(device)
            tgt = batch["tgt_input_ids"].to(device)

            output = model(src)
            output_dim = output.shape[-1]

            # Flatten outputs and targets
            output = output.view(-1, output_dim)  # Shape: (batch_size * seq_len, output_dim)
            tgt = tgt.view(-1)  # Shape: (batch_size * seq_len)

            # Compute loss
            loss = criterion(output, tgt)
            epoch_loss += loss.item()

            # Calculate accuracy
            predictions = output.argmax(dim=1)  # Get the index of the max log-probability
            valid_indices = tgt != 0
            correct_tokens += (predictions[valid_indices] == tgt[valid_indices]).sum().item()
            total_tokens += valid_indices.sum().item()

    accuracy = correct_tokens / total_tokens if total_tokens > 0 else 0
    return epoch_loss / len(dataloader), accuracy


In [49]:
import torch.optim as optim
from tqdm import tqdm

criterion = nn.CrossEntropyLoss(ignore_index=0)  # Use for multi-class classification ignore_index=0 for padding
optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)

for epoch in range(20):  # Number of epochs
    model.train()
    epoch_loss = 0
    total_batches = len(train_dataloader)
    progress_bar = tqdm(train_dataloader, desc="Training Progress", unit="batch", leave=True)

    for batch_idx, batch in enumerate(progress_bar):  # Assuming a DataLoader is used
        src = batch["src_input_ids"].to(device)  # Input tokens
        tgt = batch["tgt_input_ids"].to(device)  # Target tokens

        optimizer.zero_grad()
        output = model(src)  # Forward pass
        output_dim = output.shape[-1]

        # Flatten outputs and targets for loss computation
        output = output.view(-1, output_dim)  # Shape: (batch_size * seq_len, output_dim)
        tgt = tgt.view(-1)  # Shape: (batch_size * seq_len)

        # Compute loss
        loss = criterion(output, tgt)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        avg_loss = epoch_loss / (batch_idx + 1)
        progress_bar.set_description(f"Training Progress: Batch {batch_idx + 1}/{total_batches}, Avg Loss: {avg_loss:.8f}")

    val_loss, accuracy = evaluate_model_with_accuracy(model, test_dataloader, criterion, device)
    print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(train_dataloader):.8f}, Val Loss: {val_loss:.8f}, Accuracy: {accuracy * 100:.4f}%")


Training Progress: Batch 14623/14623, Avg Loss: 0.01788165: 100%|██████████| 14623/14623 [04:12<00:00, 57.83batch/s]


tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
Epoch 1, Loss: 0.01788165, Val Loss: 0.97196724, Accuracy: 79.6316%


Training Progress: Batch 14623/14623, Avg Loss: 0.00600864: 100%|██████████| 14623/14623 [04:51<00:00, 50.12batch/s]


tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
Epoch 2, Loss: 0.00600864, Val Loss: 0.64639501, Accuracy: 85.6751%


Training Progress: Batch 14623/14623, Avg Loss: 0.00583692: 100%|██████████| 14623/14623 [05:49<00:00, 41.87batch/s]


tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
Epoch 3, Loss: 0.00583692, Val Loss: 0.62110287, Accuracy: 86.7782%


Training Progress: Batch 14623/14623, Avg Loss: 0.00560855: 100%|██████████| 14623/14623 [04:13<00:00, 57.59batch/s]


tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
Epoch 4, Loss: 0.00560855, Val Loss: 0.56132104, Accuracy: 88.3247%


Training Progress: Batch 14623/14623, Avg Loss: 0.00547356: 100%|██████████| 14623/14623 [04:10<00:00, 58.43batch/s]


tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
Epoch 5, Loss: 0.00547356, Val Loss: 0.71832724, Accuracy: 83.8699%


Training Progress: Batch 14623/14623, Avg Loss: 0.00541348: 100%|██████████| 14623/14623 [04:11<00:00, 58.05batch/s]


tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
Epoch 6, Loss: 0.00541348, Val Loss: 0.55772397, Accuracy: 87.5224%


Training Progress: Batch 14623/14623, Avg Loss: 0.00536128: 100%|██████████| 14623/14623 [04:10<00:00, 58.35batch/s]


tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
Epoch 7, Loss: 0.00536128, Val Loss: 0.68629105, Accuracy: 87.4644%


Training Progress: Batch 14623/14623, Avg Loss: 0.00528897: 100%|██████████| 14623/14623 [04:25<00:00, 55.02batch/s]


tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
tensor([ True,  True,  True,  ..., False, False, False], device='cuda:0')
Epoch 8, Loss: 0.00528897, Val Loss: 0.59851568, Accuracy: 87.7441%


Training Progress: Batch 774/14623, Avg Loss: 0.00563199:   5%|▌         | 774/14623 [00:14<04:12, 54.83batch/s]


KeyboardInterrupt: 

In [50]:
def save_model(model, path):
    torch.save(model.state_dict(), path)
    print(f"Model saved to {path}")

def load_model(model, path):
    model.load_state_dict(torch.load(path))
    print(f"Model loaded from {path}")
    return model

In [51]:
save_model(model, "../weights/Bilstm_model2.pt")

Model saved to ../weights/Bilstm_model2.pt


### Testing Real Output sequence

In [52]:
test_dataset.__getitem__(1)["src_input_ids"]

tensor([ 18, 513,  34, 507, 514, 258, 128, 170, 252,   6,  10,  28,   9,  72,
        140,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0])

In [53]:

with torch.no_grad():

    output = model( test_dataset.__getitem__(1)["src_input_ids"].to(device))
    predictions = output.argmax(dim=1)
    # print the sentence from the test set
    print(X_test[1])
predictions

i would like to try two medium tuna pizzas with extra cheese and no pesto


tensor([21, 21, 21, 21, 21,  5,  3, 11, 21, 21, 11, 12, 21, 21, 13, 14, 14, 14,
        21, 21, 21, 13, 14, 14, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
        21, 21, 21, 21], device='cuda:0')

In [54]:
tgt= test_dataset.__getitem__(1)["tgt_input_ids"].to(device)
tgt

tensor([21, 21, 21, 21, 21,  5,  3, 11, 21, 21, 19, 11, 21, 21, 13,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0], device='cuda:0')

In [55]:

sequence = predictions.cpu().tolist()
pred_sequence= decode_sequence(sequence)
pred_sequence

['O',
 'O',
 'O',
 'O',
 'O',
 'B-NUMBER',
 'B-SIZE',
 'B-TOPPING',
 'O',
 'O',
 'B-TOPPING',
 'I-TOPPING',
 'O',
 'O',
 'B-NEG_TOPPING',
 'I-NEG_TOPPING',
 'I-NEG_TOPPING',
 'I-NEG_TOPPING',
 'O',
 'O',
 'O',
 'B-NEG_TOPPING',
 'I-NEG_TOPPING',
 'I-NEG_TOPPING',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O']

In [56]:
sequence = tgt.cpu().tolist()
tgt_sequence = decode_sequence(sequence)
tgt_sequence

['O',
 'O',
 'O',
 'O',
 'O',
 'B-NUMBER',
 'B-SIZE',
 'B-TOPPING',
 'O',
 'O',
 'B-QUANTITY',
 'B-TOPPING',
 'O',
 'O',
 'B-NEG_TOPPING']

In [57]:
pred_sequence == tgt_sequence

False

In [58]:
def evaluate_model_with_sequence_accuracy(model, dataloader, device):
    model.eval()
    total_sequences = 0
    correct_sequences = 0

    with torch.no_grad():
        for batch in dataloader:
            src = batch["src_input_ids"].to(device)
            tgt = batch["tgt_input_ids"].to(device)

            # Get model predictions
            output = model(src)
            predictions = output.argmax(dim=-1)  # Shape: (batch_size, seq_len)

            # Compare sequences, token-by-token
            for pred, tgt_seq in zip(predictions, tgt):
                valid_mask = tgt_seq != 0  # Ignore padding in comparison
                pred = pred[valid_mask]
                tgt_seq = tgt_seq[valid_mask]

                if torch.equal(pred, tgt_seq):  # Compare sequences
                    correct_sequences += 1
                else:
                    print(X_test[total_sequences])
                    print(f"Predicted: {pred}")
                    print(f"Target: {tgt_seq}")

                total_sequences += 1

    print(f"Correct {correct_sequences}, Total {total_sequences}")
    sequence_accuracy = correct_sequences / total_sequences if total_sequences > 0 else 0
    return sequence_accuracy * 100


In [59]:
evaluate_model_with_sequence_accuracy(model, test_dataloader, device)

i would like to try two medium tuna pizzas with extra cheese and no pesto
Predicted: tensor([21, 21, 21, 21, 21,  5,  3, 11, 21, 21, 11, 12, 21, 21, 13],
       device='cuda:0')
Target: tensor([21, 21, 21, 21, 21,  5,  3, 11, 21, 21, 19, 11, 21, 21, 13],
       device='cuda:0')
get me two pepsis a coke and five large fantas
Predicted: tensor([21, 21,  5,  1,  2,  1, 21,  5,  3,  1], device='cuda:0')
Target: tensor([21, 21,  5,  1,  5,  1, 21,  5,  3,  1], device='cuda:0')
i want one medium pizza along with sausage mushrooms but hold ham please
Predicted: tensor([21, 21,  5,  3, 21, 21, 21, 11, 11, 21, 21, 21, 21], device='cuda:0')
Target: tensor([21, 21,  5,  3, 21, 21, 21, 11, 11, 21, 21, 13, 21], device='cuda:0')
i'll go for one pepsi six large diet cokes and a medium fanta
Predicted: tensor([21, 21, 21,  6,  1, 21, 21,  1,  2, 21,  5,  3,  1], device='cuda:0')
Target: tensor([21, 21, 21,  5,  1,  5,  3,  1,  2, 21,  5,  3,  1], device='cuda:0')
i would like one pie with sausage oliv

32.20338983050847