### Read Data

In [4]:
import pandas as pd
train_path = '../dataset/PIZZA_train.json'
test_path = '../dataset/PIZZA_dev.json'
df = pd.read_json(train_path, lines=True)
dev = pd.read_json(test_path, lines=True)

In [31]:
df.describe()

Unnamed: 0,train.SRC,train.EXR,train.TOP,train.TOP-DECOUPLED
count,2456446,2456446,2456446,2456446
unique,2456446,694346,2456446,1425035
top,can i have a large bbq pulled pork,(ORDER (PIZZAORDER (NUMBER 1 ) (SIZE PARTY_SIZ...,(ORDER can i have (PIZZAORDER (NUMBER a ) (SIZ...,(ORDER (PIZZAORDER (NUMBER three ) (NOT (TOPPI...
freq,1,1999,1,167


In [32]:
X_train = df['train.SRC']
y_train = df['train.TOP']
X_test = dev['dev.SRC']
y_test = dev['dev.TOP']
print(X_train[476368])
print(y_train[476368])
print(dev['dev.SRC'][0])

i want three pies with parmesan cheese and without any sauce
(ORDER i want (PIZZAORDER (NUMBER three ) pies with (TOPPING parmesan cheese ) and without any (NOT (TOPPING sauce ) ) ) )
i want to order two medium pizzas with sausage and black olives and two medium pizzas with pepperoni and extra cheese and three large pizzas with pepperoni and sausage


In [33]:
X_train

0                         can i have a large bbq pulled pork
1          large pie with green pepper and with extra pep...
2                          i'd like a large vegetarian pizza
3          party size stuffed crust pie with american che...
4                    can i have one personal sized artichoke
                                 ...                        
2456441    i'd like a pizza with arugula ricotta cheese a...
2456442    i'd like a pizza with yellow peppers fried oni...
2456443    i'd like a pizza with olives roasted tomatoes ...
2456444    i'd like a pizza with mozzarella jalapeno and ...
2456445    i'd like a pizza with hot pepper pecorino chee...
Name: train.SRC, Length: 2456446, dtype: object

In [34]:
max_str_1 = len(max(X_train, key=len))
max_str_2 = len(y_train[y_train.str.len().idxmax()])
max_str_1, max_str_2

(133, 335)

### Calculate Vocabulary

In [35]:
import re
from tensorflow.keras.preprocessing.sequence import pad_sequences

def tokenize_output(output):
    """
    Tokenizes the structured output into meaningful tokens.
    Example:
        Input: "(ORDER (PIZZAORDER (NUMBER a ) (SIZE large ) (TOPPING bbq pulled pork ) ) )"
        Output: ["(ORDER", "(PIZZAORDER", "(NUMBER", "a", "(SIZE", "large", "(TOPPING", "bbq", "pulled", "pork", ")", ")", ")", ")"]
    """
    tokens = re.findall(r"\(|\)|\w+|[^\s()]+", output)
    return tokens

def build_vocab(outputs):
    """
    Builds a vocabulary from tokenized outputs.
    """
    vocab = {"<PAD>": 0, "<SOS>": 1, "<EOS>": 2}  # Special tokens
    i = 3
    for output in outputs:
        tokens = tokenize_output(output)
        for token in tokens:
            if token not in vocab:
                vocab[token] = i
                i += 1
    return vocab
def encode_outputs(outputs, vocab):
    """
    Encodes tokenized outputs into sequences of integers.
    """
    encoded = []
    for output in outputs:
        tokens = tokenize_output(output)
        sequence = [vocab["<SOS>"]] + [vocab[token] for token in tokens if token in vocab] + [vocab["<EOS>"]]
        encoded.append(sequence)
    return encoded

def pad_sequences_to_fixed_length(sequences, max_len):
    """
    Pads sequences to a fixed length.
    """
    return pad_sequences(sequences, maxlen=max_len, padding="post", value=0)

def decode_sequence(sequence, vocab):
    """
    Decodes a sequence of integers back into the structured output string.
    """
    inv_vocab = {v: k for k, v in vocab.items()}  # Reverse the vocabulary
    tokens = [inv_vocab[idx] for idx in sequence if idx in inv_vocab and idx not in {vocab["<SOS>"], vocab["<EOS>"], vocab["<PAD>"]}]

    output = " ".join(tokens)
    output = output.replace(" ( ", " (").replace("( ", "(") #.replace(" )", ")")
    return output

def decode_sequence_2(sequence, vocab):
    """
    Decodes a sequence of integers back into the structured output string.
    """
    inv_vocab = {v: k for k, v in vocab.items()}  # Reverse the vocabulary
    # sequence = sequence.cpu().tolist()  # Convert tensor to a list of integers
    tokens = [inv_vocab.get(idx, "") for idx in sequence if idx > 0]  # Ignore unknown and put empty char
    return "".join(tokens)



In [20]:


def prepare_data(
    X_train, y_train, X_test, y_test, max_len_1=20, max_len_2 = 20
):

    X_vocab = build_vocab(X_train)  # Build vocabulary from training outputs
    X_train_encoded = encode_outputs(X_train, X_vocab)  # Encode training outputs
    X_test_encoded = encode_outputs(X_test, X_vocab)  # Encode testing outputs
    X_train_processed = pad_sequences_to_fixed_length(X_train_encoded, max_len_1)
    X_test_processed = pad_sequences_to_fixed_length(X_test_encoded, max_len_1)

    vocab = build_vocab(y_train)  # Build vocabulary from training outputs
    y_train_encoded = encode_outputs(y_train, vocab)  # Encode training outputs
    y_test_encoded = encode_outputs(y_test, vocab)  # Encode testing outputs
    y_train_processed = pad_sequences_to_fixed_length(y_train_encoded, max_len_2)
    y_test_processed = pad_sequences_to_fixed_length(y_test_encoded, max_len_2)


    return (
        X_train_processed,
        X_test_processed,
        y_train_processed,
        y_test_processed,
        X_vocab,
        vocab,  # Return vocabulary for decoding
    )


In [21]:
X_train_processed, X_test_processed, y_train_processed, y_test_processed, X_vocab, vocab = prepare_data( X_train, y_train, X_test, y_test,max_len_1=250, max_len_2=250)

In [188]:
del df
del dev
del X_train
del X_test
del y_train
del y_test

In [23]:
X_vocab

{'<PAD>': 0,
 '<SOS>': 1,
 '<EOS>': 2,
 'can': 3,
 'i': 4,
 'have': 5,
 'a': 6,
 'large': 7,
 'bbq': 8,
 'pulled': 9,
 'pork': 10,
 'pie': 11,
 'with': 12,
 'green': 13,
 'pepper': 14,
 'and': 15,
 'extra': 16,
 'peperonni': 17,
 "'d": 18,
 'like': 19,
 'vegetarian': 20,
 'pizza': 21,
 'party': 22,
 'size': 23,
 'stuffed': 24,
 'crust': 25,
 'american': 26,
 'cheese': 27,
 'mushroom': 28,
 'one': 29,
 'personal': 30,
 'sized': 31,
 'artichoke': 32,
 'banana': 33,
 'peppperonis': 34,
 'low': 35,
 'fat': 36,
 'want': 37,
 'regular': 38,
 'without': 39,
 'any': 40,
 'fried': 41,
 'onions': 42,
 'little': 43,
 'bit': 44,
 'of': 45,
 'high': 46,
 'rise': 47,
 'dough': 48,
 'lot': 49,
 'olive': 50,
 'pesto': 51,
 'sauce': 52,
 'peperonis': 53,
 'yellow': 54,
 'meatball': 55,
 '-': 56,
 'bean': 57,
 'big': 58,
 'meat': 59,
 'mushrooms': 60,
 'pecorino': 61,
 'balsamic': 62,
 'glaze': 63,
 'black': 64,
 'chicken': 65,
 'mozzarella': 66,
 'italian': 67,
 'sausage': 68,
 'olives': 69,
 'pestos':

In [24]:
vocab

{'<PAD>': 0,
 '<SOS>': 1,
 '<EOS>': 2,
 '(': 3,
 'ORDER': 4,
 'can': 5,
 'i': 6,
 'have': 7,
 'PIZZAORDER': 8,
 'NUMBER': 9,
 'a': 10,
 ')': 11,
 'SIZE': 12,
 'large': 13,
 'TOPPING': 14,
 'bbq': 15,
 'pulled': 16,
 'pork': 17,
 'pie': 18,
 'with': 19,
 'green': 20,
 'pepper': 21,
 'and': 22,
 'COMPLEX_TOPPING': 23,
 'QUANTITY': 24,
 'extra': 25,
 'peperonni': 26,
 "'d": 27,
 'like': 28,
 'STYLE': 29,
 'vegetarian': 30,
 'pizza': 31,
 'party': 32,
 'size': 33,
 'stuffed': 34,
 'crust': 35,
 'american': 36,
 'cheese': 37,
 'mushroom': 38,
 'one': 39,
 'personal': 40,
 'sized': 41,
 'artichoke': 42,
 'banana': 43,
 'peppperonis': 44,
 'low': 45,
 'fat': 46,
 'want': 47,
 'regular': 48,
 'without': 49,
 'any': 50,
 'NOT': 51,
 'fried': 52,
 'onions': 53,
 'little': 54,
 'bit': 55,
 'of': 56,
 'high': 57,
 'rise': 58,
 'dough': 59,
 'lot': 60,
 'olive': 61,
 'pesto': 62,
 'sauce': 63,
 'peperonis': 64,
 'yellow': 65,
 'meatball': 66,
 '-': 67,
 'bean': 68,
 'big': 69,
 'meat': 70,
 'mushro

### Save SRC as input

In [None]:
src_data = [entry for entry in X_train ]
with open("../dataset/src_data.txt", "w") as src_file:
    src_file.write("\n".join(src_data))

### Turn TOP-DECOUPLED into json tree

In [2]:
import re
import json

def parse_order(input_text):
    def parse_element(element):
        #print(f"Parsing element: {element}")
        
        # Helper function to parse individual elements
        matches = re.findall(r'\b(PIZZAORDER|DRINKORDER)\b|(?:\b([A-Z]+)\s([^()]+)\b)', element)
        parts = [match[0] or f"{match[1]} {match[2]}" for match in matches]

        #print(f"Parts found: {parts}")

        if parts[0] == 'PIZZAORDER':

            pizza = {
                'NUMBER': None, 
                'SIZE': None, 
                'STYLE': None, 
                'AllTopping': []
            }
            for token in parts[1:]:

                #print(f"Processing token: {token}") 

                if token.startswith('NUMBER'):
                    pizza['NUMBER'] = ' '.join(token.split()[1:])
                elif token.startswith('SIZE'):
                    pizza['SIZE'] = ' '.join(token.split()[1:])
                elif token.startswith('STYLE'):
                    pizza['STYLE'] = ' '.join(token.split()[1:])
                elif token.startswith('TOPPING'):
                    pizza['AllTopping'].append({
                        'NOT': False,
                        'Quantity': None,
                        'Topping': ' '.join(token.split()[1:])
                    })
                elif token.startswith('NOT'):
                    pizza['AllTopping'].append({
                        'NOT': True,
                        'Quantity': None,
                        'Topping': ' '.join(token.split()[1:])
                    })
                elif token.startswith('COMPLEX_TOPPING'):
                    toppings = re.findall(r'\(([^()]+)\)', token)
                    for t in toppings:
                        t_tokens = t.split()
                        pizza['AllTopping'].append({
                            'NOT': False,
                            'Quantity': t_tokens[1] if len(t_tokens) > 2 else None,
                            'Topping': t_tokens[-1]# ' '.join(token.split()[-1:])
                        })
                
            return pizza
        
        elif parts[0] == 'DRINKORDER':
            drink = {
                'NUMBER': None, 
                'VOLUME': None, 
                'DRINKTYPE': None, 
                'CONTAINERTYPE': None
            }
            for token in parts[1:]:
                
                #print(f"Processing drink token: {token}")  

                if token.startswith('NUMBER'):
                    drink['NUMBER'] = token.split()[1]
                elif token.startswith('VOLUME'):
                    drink['VOLUME'] = ' '.join(token.split()[1:])
                elif token.startswith('DRINKTYPE'):
                    drink['DRINKTYPE'] = ' '.join(token.split()[1:])
                elif token.startswith('CONTAINERTYPE'):
                    drink['CONTAINERTYPE'] = ' '.join(token.split()[1:])
                
            return drink
        
        return None

    # Initialize the base structure
    order = {"ORDER": {"PIZZAORDER": [], "DRINKORDER": []}}
    

    #print(f"Full input text: {input_text}")
    

    order_elements = [x.group() for x in  re.finditer(r'\((?:PIZZAORDER|DRINKORDER)(?:[^()]*|\((?:[^()]*|\([^()]*\))*\))*\)', input_text)]
    
    #print(f"Found order elements: {order_elements}")  # Debug print
    
    for element in order_elements:
        parsed = parse_element(element)
        if parsed:
            if 'SIZE' in parsed:
                order['ORDER']['PIZZAORDER'].append(parsed)
            elif 'VOLUME' in parsed:
                order['ORDER']['DRINKORDER'].append(parsed)
    
    return order

# Example usage
input_text = '(ORDER (PIZZAORDER (NUMBER one) (SIZE large) (STYLE thin crust) (TOPPING cheese) (TOPPING pepperoni) ) (PIZZAORDER (NUMBER two) (SIZE medium) (STYLE deep dish) (NOT (TOPPING mushrooms) ) (COMPLEX_TOPPING (QUANTITY extra) (TOPPING olives) ) ) (DRINKORDER (NUMBER five) (VOLUME one liter) (DRINKTYPE lemon ice tea) (CONTAINERTYPE bottles) )(DRINKORDER (NUMBER three) (VOLUME two liters) (DRINKTYPE cola) (CONTAINERTYPE cans) ) (DRINKORDER (NUMBER three) (VOLUME two liters) (DRINKTYPE cola) (CONTAINERTYPE cans) ) )'

result = parse_order(input_text)
print(json.dumps(result, indent=2))

{
  "ORDER": {
    "PIZZAORDER": [
      {
        "NUMBER": "one",
        "SIZE": "large",
        "STYLE": "thin crust",
        "AllTopping": [
          {
            "NOT": false,
            "Quantity": null,
            "Topping": "cheese"
          },
          {
            "NOT": false,
            "Quantity": null,
            "Topping": "pepperoni"
          }
        ]
      },
      {
        "NUMBER": "two",
        "SIZE": "medium",
        "STYLE": "deep dish",
        "AllTopping": [
          {
            "NOT": false,
            "Quantity": null,
            "Topping": "mushrooms"
          },
          {
            "NOT": false,
            "Quantity": null,
            "Topping": "olives"
          }
        ]
      }
    ],
    "DRINKORDER": [
      {
        "NUMBER": "five",
        "VOLUME": "one liter",
        "DRINKTYPE": "lemon ice tea",
        "CONTAINERTYPE": "bottles"
      },
      {
        "NUMBER": "three",
        "VOLUME": "two liters",
     

In [8]:
input_text = '(ORDER (PIZZAORDER (NUMBER fourteen ) (TOPPING garlic powder ) ) (DRINKORDER (NUMBER 6 ) (CONTAINERTYPE cans) (DRINKTYPE diet ice teas ) ) )'

result = parse_order(input_text)
print(json.dumps(result))

{"ORDER": {"PIZZAORDER": [{"NUMBER": "fourteen", "SIZE": null, "STYLE": null, "AllTopping": [{"NOT": false, "Quantity": null, "Topping": "garlic powder"}]}], "DRINKORDER": [{"NUMBER": "6", "VOLUME": null, "DRINKTYPE": "diet ice teas", "CONTAINERTYPE": "cans"}]}}


In [7]:
input_text = '(ORDER (PIZZAORDER (TOPPING garlic powder ) (NUMBER fourteen ) ) (DRINKORDER (NUMBER 6 ) (DRINKTYPE diet ice teas ) ) )'

result = parse_order(input_text)
print(json.dumps(result))

{"ORDER": {"PIZZAORDER": [{"NUMBER": "fourteen", "SIZE": null, "STYLE": null, "AllTopping": [{"NOT": false, "Quantity": null, "Topping": "garlic powder"}]}], "DRINKORDER": [{"NUMBER": "6", "VOLUME": null, "DRINKTYPE": "diet ice teas", "CONTAINERTYPE": null}]}}


### Save TOP-DECOUPLED

In [6]:

# Extract parsed data from the DataFrame
parsed_data = []
for _, row in df.iterrows():
    if "train.TOP-DECOUPLED" in row:
        parsed_entry = parse_order(row["train.TOP-DECOUPLED"])
        parsed_data.append(parsed_entry)

# Save the parsed data to a file
output_path = "../dataset/parsed_order_data.json"
with open(output_path, "w") as parsed_file:
    json.dump(parsed_data, parsed_file)

In [None]:
del parsed_data

In [None]:
del src_data

In [13]:
del df
del dev

In [None]:

del X_test_processed
del X_train_processed
del y_test_processed
del y_train_processed

### Load train.TOP-DECOUPLED

In [None]:
import json

# Step 1: Open the JSON file containing multiple objects
with open('../dataset/parsed_order_data.json', 'r') as file:
    # Step 2: Read each JSON object (assuming each JSON object is on a new line)
    for line in file:
        data = json.loads(line.strip())  # Parse the JSON object

In [None]:
data

### Turn TOP into TOP-DECOUPLED

In [6]:
import re
def clean_text(text):
    """
    Remove special characters and unnecessary symbols from text.
    """
    #stop_words = set(stopwords.words('english')) # takes much time
    stop_words = [
    "an", "the", "and", "or", "but", "if", "in",  "at", 
    "by", "from", "to", "of", "for", "this", "that", "those", "these", 
    "can", "could", "would", "should", "will", "might", "may", "i", "you", 
    "we", "he", "she", "it", "they", "is", "are", "was", "were", "be", 
    "been", "have", "has", "had", "please","'", "d",
    ]#### i'd with without no "on", "with", "without",    "a", 
    custom_remove = [
    r"please",
    r"thank\s?you", 
    r"kindly", 
    r"just", 
    r"really",
    r"actually",
    r"like",
    r"want",
    ]
    # Remove special characters
    text = re.sub(r"[^\w\s]", " ", text)  # Remove punctuation and special characters
    
    # Remove extra whitespace
    text = re.sub(r"\s+", " ", text).strip()
    
    # Remove stopwords
    if stop_words:
        text = " ".join([word for word in text.split() if word.lower() not in stop_words])
    
    # Remove custom characters or substrings
    if custom_remove:
        for pattern in custom_remove:
            text = re.sub(pattern, "", text)
    return text


In [23]:
TOP = df['train.TOP'].apply(clean_text)

In [24]:
TOP.iloc[:, 1]

IndexingError: Too many indexers

### Ensure Decoding works right

In [23]:
total_sequences = 0
correct_sequences = 0

for src,tgt in zip(y_test_processed, y_test):
    #print(src)

    predicted_sequences = decode_sequence(src, vocab) 
    #print(predicted_sequences)
    # print(predicted_sequences)
    # print(target_sequences)
    # Calculate sequence accuracy
    if predicted_sequences == tgt:
        # print(pred)
        # print(tgt)
        correct_sequences += 1
    total_sequences += 1
print(f"Correct {correct_sequences}, Total {total_sequences}")
sequence_accuracy = correct_sequences / total_sequences if total_sequences > 0 else 0
sequence_accuracy * 100

Correct 348, Total 348


100.0

In [24]:
len(vocab)

183

In [25]:
input_dim = X_train_processed.shape[1]
input_dim

250

In [26]:
output_dim = y_train_processed.shape[1] 
output_dim

250

In [27]:
import torch.optim as optim

INPUT_DIM = input_dim
OUTPUT_DIM = output_dim
ENC_EMB_DIM = 64
DEC_EMB_DIM = 64
HID_DIM = 128
N_LAYERS = 2
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

encoder = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT)
decoder = Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT)
model = Seq2Seq(encoder, decoder, device).to(device)

optimizer = optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss()


In [28]:
import torch
from torch.utils.data import Dataset, DataLoader

class SequenceDataset(Dataset):
    def __init__(self, inputs, targets):
        self.inputs = torch.tensor(inputs, dtype=torch.long) 
        self.targets = torch.tensor(targets, dtype=torch.long) 

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        return {
            "src_input_ids": self.inputs[idx],
            "tgt_input_ids": self.targets[idx],
        }

train_dataset = SequenceDataset(X_train_processed, y_train_processed)
test_dataset = SequenceDataset(X_test_processed, y_test_processed)

batch_size = 128  # Adjust based GPU ;-;  memory
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [29]:
def evaluate_model_with_accuracy(model, dataloader, criterion, device):
    model.eval()
    epoch_loss = 0
    total_tokens = 0
    correct_tokens = 0

    with torch.no_grad():
        for batch in dataloader:
            src = batch["src_input_ids"].to(device)
            tgt = batch["tgt_input_ids"].to(device)

            output = model(src,tgt, teacher_forcing_ratio=0)
            output_dim = output.shape[-1]
            output = output[:, 1:].reshape(-1, output_dim)  # Reshape for token comparison
            tgt = tgt[:, 1:].reshape(-1)
            # Apply mask to remove padding tokens
            mask = tgt != 0  # Mask to ignore padding indices
            output = output[mask]  # Filter model outputs
            tgt = tgt[mask]  # Filter targets
            # Calculate loss
            loss = criterion(output, tgt)
            epoch_loss += loss.item()

            # Calculate accuracy
            predictions = output.argmax(dim=1)  # Get the index of the max log-probability
            correct_tokens += (predictions == tgt).sum().item()
            total_tokens += tgt.size(0)

    accuracy = correct_tokens / total_tokens if total_tokens > 0 else 0
    return epoch_loss / len(dataloader), accuracy

In [30]:
import torch.optim as optim
from tqdm import tqdm

criterion = nn.CrossEntropyLoss(ignore_index=0)  # Use for multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)

# Training loop
for epoch in range(2):  # Number of epochs
    model.train()
    epoch_loss = 0
    total_batches = len(train_dataloader)
    progress_bar = tqdm(train_dataloader,desc="Training Progress", unit="batch", leave=True)
    for  batch_idx, batch in enumerate(progress_bar): # Assuming a DataLoader is used for batches
        src = batch["src_input_ids"].to(device)  # Input tokens
        tgt = batch["tgt_input_ids"].to(device)  # Target tokens

        optimizer.zero_grad()
        output = model(src,tgt)  # Forward pass


        # Flatten the model output
        output = output.view(-1, output_dim) 
        tgt = tgt.view(-1)
        
        # Apply mask to remove padding tokens
        mask = tgt != 0  # Mask to ignore padding indices
        output = output[mask]  # Filter model outputs
        tgt = tgt[mask]  # Filter targets
        
        loss = criterion(output, tgt)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
        optimizer.step()

        epoch_loss += loss.item()
        avg_loss = epoch_loss / (batch_idx + 1)
        progress_bar.set_description(f"Training Progress: Batch {batch_idx + 1}/{total_batches}, Avg Loss: {avg_loss:.4f}")
    val_loss, accuracy = evaluate_model_with_accuracy(model, test_dataloader, criterion, device)
    print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(train_dataloader):.4f}, Val Loss: {val_loss / len(test_dataloader):.4f}, Accuracy: {accuracy * 100:.4f}%")



Training Progress: Batch 504/3722, Avg Loss: 1.7514:  14%|█▎        | 504/3722 [03:09<20:09,  2.66batch/s]


KeyboardInterrupt: 

In [None]:
evaluate_model_with_accuracy(model, test_dataloader, criterion, device)

(0.6574204663435618, 0.9081516933840772)

### Saving the model

In [None]:
def save_model(model, path):
    torch.save(model.state_dict(), path)
    print(f"Model saved to {path}")

def load_model(model, path):
    model.load_state_dict(torch.load(path))
    print(f"Model loaded from {path}")
    return model

In [None]:
save_model(model, "../weights/transformer_lstm.pt")

Model saved to ../weights/transformer_lstm.pt


### Testing Real Output sequence

In [None]:
# model = BiLSTMModel(input_dim, embedding_dim, hidden_dim, output_dim, num_layers, dropout).to(device)
# model = load_model(model,"../weights/Bilstm.pt")
model.eval()
epoch_loss = 0
total_tokens = 0
correct_tokens = 0

with torch.no_grad():
    for batch in test_dataloader:
        src = batch["src_input_ids"].to(device)
        tgt = batch["tgt_input_ids"].to(device)

        output = model(src,tgt, teacher_forcing_ratio=0)
        output_dim = output.shape[-1]
        output = output[:, 1:].reshape(-1, output_dim)  # Reshape for token comparison
        tgt = tgt[:, 1:].reshape(-1)

        # Calculate loss
        loss = criterion(output, tgt)
        epoch_loss += loss.item()

        # Calculate accuracy
        predictions = output.argmax(dim=1)  # Get the index of the max log-probability
        break
len(predictions)

19136

In [None]:
tgt= test_dataset.__getitem__(0)["tgt_input_ids"].to(device)
tgt

tensor([  1,   3,   4,   3,   5,   3,   6, 105,   8,   3,   9,  21,   8,   3,
         15,   3,  16,  17,   8,   3,  13,  32,   8,   8,   3,  13,  25,   8,
          8,   3,   5,   3,   6, 105,   8,   3,   9,  21,   8,   3,  13,  57,
          8,   3,  13,  81,   8,   8,   3,   5,   3,   6, 104,   8,   3,   9,
         23,   8,   3,  13,  25,   8,   3,  13,  81,   8,   8,   8,   2,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  

In [None]:

sequence = predictions[:300].cpu().tolist()
pred_sequence= decode_sequence(sequence,vocab)
pred_sequence

'('

In [None]:
sequence = tgt.cpu().tolist()
tgt_sequence= decode_sequence(sequence,vocab)
tgt_sequence

'(ORDER (PIZZAORDER (NUMBER 2 ) (SIZE MEDIUM ) (COMPLEX_TOPPING (QUANTITY EXTRA ) (TOPPING CHEESE ) ) (TOPPING PEPPERONI ) ) (PIZZAORDER (NUMBER 2 ) (SIZE MEDIUM ) (TOPPING OLIVES ) (TOPPING SAUSAGE ) ) (PIZZAORDER (NUMBER 3 ) (SIZE LARGE ) (TOPPING PEPPERONI ) (TOPPING SAUSAGE ) ) ) (ORDER (PIZZAORDER (NUMBER 5 ) (SIZE MEDIUM ) (TOPPING HAM ) (TOPPING TOMATOES ) ) ) (ORDER (PIZZAORDER (NUMBER 1 ) (SIZE LARGE ) (STYLE VEGETARIAN ) (COMPLEX_TOPPING (QUANTITY EXTRA ) (TOPPING BANANA_PEPPERS ) ) ) ) (ORDER (PIZZAORDER (NUMBER 1 ) (SIZE LARGE ) (TOPPING ONIONS ) (TOPPING PEPPERS ) ) ) (ORDER (PIZZAORDER (NOT (TOPPING OLIVES ) ) (NUMBER 1 ) (TOPPING HAM ) (TOPPING PESTO ) ) ) (ORDER (DRINKORDER (DRINKTYPE COKE ) (NUMBER 6 ) (SIZE LARGE ) ) (PIZZAORDER (NUMBER 1 ) (SIZE LARGE ) (TOPPING BACON ) (TOPPING HAM ) (TOPPING OLIVES ) (TOPPING ONIONS ) ) (PIZZAORDER (NUMBER 1 ) (SIZE MEDIUM ) (TOPPING ONIONS ) (TOPPING SAUSAGE ) ) ) (ORDER (PIZZAORDER (NOT (TOPPING PINEAPPLE ) ) (NUMBER 1 ) (SIZE ME

In [None]:
pred_sequence == tgt_sequence

False

In [None]:
def evaluate_model_with_sequence_accuracy(model, dataloader, device):
    model.eval()
    epoch_loss = 0
    total_sequences = 0
    correct_sequences = 0

    with torch.no_grad():
        for batch in dataloader:
            src = batch["src_input_ids"].to(device)
            src_lengths = (batch["src_input_ids"] != 0).sum(dim=1).to(device)
            tgt = batch["tgt_input_ids"].to(device)

            output = model(src,tgt, teacher_forcing_ratio=0)  # No teacher forcing during evaluation
            output_dim = output.shape[-1]
            output = output.argmax(dim=-1)  # Get the predicted tokens

            # Decode sequences for comparison
            predicted_sequences = [decode_sequence(seq.cpu().tolist(), vocab) for seq in output]
            target_sequences = [decode_sequence(seq.cpu().tolist(), vocab) for seq in tgt]
            print(predicted_sequences)
            print(target_sequences)
            # Calculate sequence accuracy
            for pred, tgt in zip(predicted_sequences, target_sequences):
                if pred == tgt:
                    correct_sequences += 1
                total_sequences += 1

    sequence_accuracy = correct_sequences / total_sequences if total_sequences > 0 else 0
    return epoch_loss / len(dataloader), sequence_accuracy


In [None]:
evaluate_model_with_sequence_accuracy(model, test_dataloader, device)

['', '(ORDER (PIZZAORDER (NUMBER 1 ) ((PIZZAORDER ) (TOPPING () TOPPING ) (TOPPING ) ) ) (TOPPING ) ) ) (((TOPPING ) () (TOPPING ) ) (TOPPING ) ) ) (((TOPPING ) () (TOPPING ) ) (TOPPING ) ) ) )', '(ORDER (PIZZAORDER (NUMBER 1 ) ((PIZZAORDER ) (TOPPING () TOPPING ) (TOPPING ) ) ) (TOPPING ) ) ) (((TOPPING ) () (TOPPING ) ) (TOPPING ) ) ) (((TOPPING ) () (TOPPING ) ) (TOPPING ) ) ) )', '(ORDER (PIZZAORDER (NUMBER 1 ) ((PIZZAORDER ) (TOPPING () TOPPING ) (TOPPING ) ) ) (TOPPING ) ) ) (((TOPPING ) () (TOPPING ) ) (TOPPING ) ) ) (((TOPPING ) () (TOPPING ) ) (TOPPING ) ) ) )', '(ORDER (PIZZAORDER (NUMBER 1 ) ((PIZZAORDER ) (TOPPING () TOPPING ) (TOPPING ) ) ) (TOPPING ) ) ) (((TOPPING ) () (TOPPING ) ) (TOPPING ) ) ) (((TOPPING ) () (TOPPING ) ) (TOPPING ) ) ) )', '(ORDER (PIZZAORDER (NUMBER 1 ) ((PIZZAORDER ) (TOPPING () TOPPING ) (TOPPING ) ) ) (TOPPING ) ) ) (((TOPPING ) () (TOPPING ) ) (TOPPING ) ) ) (((TOPPING ) () (TOPPING ) ) (TOPPING ) ) ) )', '(ORDER (PIZZAORDER (NUMBER 1 ) (TOPPING

(0.0, 0.0)