In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class BiLSTMModel(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim, num_layers=3, dropout=0.5):
        super(BiLSTMModel, self).__init__()
        self.embedding = nn.Embedding(input_dim, embedding_dim)#nn.Linear(input_dim, embedding_dim) # 
        
        # Bidirectional LSTM layers
        self.bilstm = nn.LSTM(
            input_size=embedding_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            bidirectional=True,
            batch_first=True,
            dropout=dropout,
        )
        
        # Fully connected layers
        self.fc1 = nn.Linear(hidden_dim * 2, 512)
        self.fc2 = nn.Linear(512, 512)
        self.fc_out = nn.Linear(512, output_dim)

        # Dropout layer
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        # Embedding layer
        embedded = self.embedding(x)  # (batch_size, seq_len, embedding_dim)

        # BiLSTM layers
        lstm_out, _ = self.bilstm(embedded)  # (batch_size, seq_len, hidden_dim * 2)

        # TimeDistributed fully connected layers
        output = self.fc1(lstm_out)  # (batch_size, seq_len, 512)
        output = F.relu(output)
        output = self.dropout(output)

        output = self.fc2(output)  # (batch_size, seq_len, 512)
        output = F.relu(output)
        output = self.dropout(output)

        output = self.fc_out(output)  # (batch_size, seq_len, output_dim)
        return F.log_softmax(output, dim=-1)



In [2]:
import pandas as pd
train_path = '../dataset/PIZZA_train.json'
test_path = '../dataset/PIZZA_dev.json'
df = pd.read_json(train_path, lines=True)
dev = pd.read_json(test_path, lines=True)

In [3]:
unique_patterns = df['train.EXR'].value_counts()
unique_patterns.describe()
# Subset the DataFrame for rows with unique values
unique_related_dataset = df[df['train.EXR'].isin(unique_patterns.index[:5000])] # reduce to 5000 patterns

# Describe the resulting dataset
unique_related_dataset.describe()

Unnamed: 0,train.SRC,train.EXR,train.TOP,train.TOP-DECOUPLED
count,720617,720617,720617,720617
unique,720617,5000,720617,136030
top,can i have a large bbq pulled pork,(ORDER (PIZZAORDER (NUMBER 1 ) (SIZE PARTY_SIZ...,(ORDER can i have (PIZZAORDER (NUMBER a ) (SIZ...,(ORDER (PIZZAORDER (NUMBER three ) (NOT (TOPPI...
freq,1,1999,1,167


In [4]:
unique_related_dataset =unique_related_dataset.reset_index(drop=True)
unique_related_dataset

Unnamed: 0,train.SRC,train.EXR,train.TOP,train.TOP-DECOUPLED
0,can i have a large bbq pulled pork,(ORDER (PIZZAORDER (NUMBER 1 ) (SIZE LARGE ) (...,(ORDER can i have (PIZZAORDER (NUMBER a ) (SIZ...,(ORDER (PIZZAORDER (NUMBER a ) (SIZE large ) (...
1,party size stuffed crust pie with american che...,(ORDER (PIZZAORDER (NUMBER 1 ) (SIZE PARTY_SIZ...,(ORDER (PIZZAORDER (SIZE party size ) (STYLE s...,(ORDER (PIZZAORDER (SIZE party size ) (STYLE s...
2,can i have one personal sized artichoke,(ORDER (PIZZAORDER (NUMBER 1 ) (SIZE PERSONAL_...,(ORDER can i have (PIZZAORDER (NUMBER one ) (S...,(ORDER (PIZZAORDER (NUMBER one ) (SIZE persona...
3,pie with banana pepper and peppperonis and ext...,(ORDER (PIZZAORDER (NUMBER 1 ) (TOPPING BANANA...,(ORDER (PIZZAORDER pie with (TOPPING banana pe...,(ORDER (PIZZAORDER (TOPPING banana pepper ) (T...
4,i want one regular pizza without any fried onions,(ORDER (PIZZAORDER (NUMBER 1 ) (SIZE REGULARSI...,(ORDER i want (PIZZAORDER (NUMBER one ) (SIZE ...,(ORDER (PIZZAORDER (NUMBER one ) (SIZE regular...
...,...,...,...,...
720612,i'd like a party - size pizza with chicken pep...,(ORDER (PIZZAORDER (NUMBER 1 ) (SIZE PARTY_SIZ...,(ORDER i'd like (PIZZAORDER (NUMBER a ) (SIZE ...,(ORDER (PIZZAORDER (NUMBER a ) (SIZE party - s...
720613,i'd like a party sized pizza with green olive ...,(ORDER (PIZZAORDER (NUMBER 1 ) (SIZE PARTY_SIZ...,(ORDER i'd like (PIZZAORDER (NUMBER a ) (SIZE ...,(ORDER (PIZZAORDER (NUMBER a ) (SIZE party siz...
720614,i'd like a personal - sized pizza with green o...,(ORDER (PIZZAORDER (NUMBER 1 ) (SIZE PERSONAL_...,(ORDER i'd like (PIZZAORDER (NUMBER a ) (SIZE ...,(ORDER (PIZZAORDER (NUMBER a ) (SIZE personal ...
720615,i'd like a party - size pizza with green olive...,(ORDER (PIZZAORDER (NUMBER 1 ) (SIZE PARTY_SIZ...,(ORDER i'd like (PIZZAORDER (NUMBER a ) (SIZE ...,(ORDER (PIZZAORDER (NUMBER a ) (SIZE party - s...


In [5]:
X_train = unique_related_dataset['train.SRC']
y_train = unique_related_dataset['train.EXR']
X_test = dev['dev.SRC']
y_test = dev['dev.EXR']
print(len(unique_related_dataset))
print(X_train[476368])
print(y_train[476368])
print(dev['dev.SRC'][0])

720617
i'd like four party size pizzas no green olive
(ORDER (PIZZAORDER (NUMBER 4 ) (SIZE PARTY_SIZE ) (NOT (TOPPING GREEN_OLIVES ) ) ) )
i want to order two medium pizzas with sausage and black olives and two medium pizzas with pepperoni and extra cheese and three large pizzas with pepperoni and sausage


In [6]:
import sys
sys.path.append("..")
from utils.data_preprocessing import preprocess_text
from utils.feature_extraction import bag_of_words, tfidf_features, extract_embeddings
X_train = [" ".join(preprocess_text(text)) for text in X_train]
X_test = [" ".join(preprocess_text(text)) for text in X_test]

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Hima\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [7]:
X_train

['large bbq pulled pork',
 'party size stuffed crust pie american cheese mushroom',
 'one personal sized artichoke',
 'pie banana pepper peppperonis low fat cheese',
 'want one regular pizza any fried onion',
 'want stuffed crust pizza american cheese little bit peperonni',
 'one party sized high rise dough pizza american cheese lot peperonni',
 'one pie banana pepper peperonis yellow pepper',
 'd like one party sized pie american cheese pesto sauce',
 'one high rise dough pie american cheese lot meatball',
 'party sized pie any bean',
 'party sized big meat pizza american cheese mushroom',
 'd like party sized high rise dough pie lot banana pepper pecoricheese',
 'one party size high rise dough pie green olive',
 'd like party sized stuffed crust pie banana pepper pesto',
 'd like lunch sized pie alfredo chicken',
 'want one personal sized pizza any bean',
 'one party size mediterranean pie',
 'd like one lunch sized pizza any caramelized red onion',
 'd like one large stuffed crust p

In [8]:
max_str_1 = len(max(X_train, key=len))
max_str_2 = len(y_train[y_train.str.len().idxmax()])
max_str_1, max_str_2

(99, 265)

In [9]:
import re
from tensorflow.keras.preprocessing.sequence import pad_sequences

def tokenize_output(output):
    """
    Tokenizes the structured output into meaningful tokens.
    Example:
        Input: "(ORDER (PIZZAORDER (NUMBER a ) (SIZE large ) (TOPPING bbq pulled pork ) ) )"
        Output: ["(ORDER", "(PIZZAORDER", "(NUMBER", "a", "(SIZE", "large", "(TOPPING", "bbq", "pulled", "pork", ")", ")", ")", ")"]
    """
    tokens = re.findall(r"\(|\)|\w+|[^\s()]+", output)
    return tokens

def build_vocab(outputs):
    """
    Builds a vocabulary from tokenized outputs.
    """
    vocab = {"<PAD>": 0, "<SOS>": 1, "<EOS>": 2}  # Special tokens
    i = 2
    for output in outputs:
        tokens = tokenize_output(output)
        for token in tokens:
            if token not in vocab:
                vocab[token] = i
                i += 1
    return vocab
def encode_outputs(outputs, vocab):
    """
    Encodes tokenized outputs into sequences of integers.
    """
    encoded = []
    for output in outputs:
        tokens = tokenize_output(output)
        sequence = [vocab["<SOS>"]] + [vocab[token] for token in tokens if token in vocab] + [vocab["<EOS>"]]
        encoded.append(sequence)
    return encoded

def pad_sequences_to_fixed_length(sequences, max_len):
    """
    Pads sequences to a fixed length.
    """
    return pad_sequences(sequences, maxlen=max_len, padding="post", value=0)

def decode_sequence(sequence, vocab):
    """
    Decodes a sequence of integers back into the structured output string.
    """
    inv_vocab = {v: k for k, v in vocab.items()}  # Reverse the vocabulary
    tokens = [inv_vocab[idx] for idx in sequence if idx > 0]  # Ignore <PAD> tokens
    return " ".join(tokens)


In [10]:
# import gensim.downloader as api
# glove_vectors = api.load("glove-wiki-gigaword-100")  # 100-dimension GloVe
# glove_vectors

In [11]:


def prepare_data(
    X_train, y_train, X_test, y_test, feature_type="bow", glove_vectors=None, max_len_1=20, max_len_2 = 20
):

    vectorizer = None

    # # Feature Extraction for X_train and X_test
    # if feature_type == "bow":
    #     X_train_processed, vectorizer = bag_of_words(X_train)
    #     X_train_processed = X_train_processed.toarray()
    #     X_test_processed = vectorizer.transform(X_test).toarray()
    # elif feature_type == "tfidf":
    #     X_train_processed, vectorizer = tfidf_features(X_train)
    #     X_train_processed = X_train_processed.toarray()
    #     X_test_processed = vectorizer.transform(X_test).toarray()
    # elif feature_type == "embeddings":
    #     if not glove_vectors:
    #         raise ValueError("GloVe vectors must be provided for embeddings.")
    #     X_train_tokenized = [sentence.split() for sentence in X_train]
    #     X_test_tokenized = [sentence.split() for sentence in X_test]
    #     X_train_processed = extract_embeddings(X_train_tokenized)
    #     X_test_processed = extract_embeddings(X_test_tokenized)
    # else:
    #     raise ValueError("Invalid feature type. Choose 'bow', 'tfidf', or 'embeddings'.")
    
    X_vocab = build_vocab(X_train)  # Build vocabulary from training outputs
    X_train_encoded = encode_outputs(X_train, X_vocab)  # Encode training outputs
    X_test_encoded = encode_outputs(X_test, X_vocab)  # Encode testing outputs
    X_train_processed = pad_sequences_to_fixed_length(X_train_encoded, max_len_1)
    X_test_processed = pad_sequences_to_fixed_length(X_test_encoded, max_len_1)

    vocab = build_vocab(y_train)  # Build vocabulary from training outputs
    y_train_encoded = encode_outputs(y_train, vocab)  # Encode training outputs
    y_test_encoded = encode_outputs(y_test, vocab)  # Encode testing outputs
    y_train_processed = pad_sequences_to_fixed_length(y_train_encoded, max_len_2)
    y_test_processed = pad_sequences_to_fixed_length(y_test_encoded, max_len_2)


    return (
        X_train_processed,
        X_test_processed,
        y_train_processed,
        y_test_processed,
        X_vocab,
        vocab,  # Return vocabulary for decoding
    )


In [12]:
X_train_processed, X_test_processed, y_train_processed, y_test_processed, X_vocab, vocab = prepare_data( X_train, y_train, X_test, y_test, feature_type="embeddings", max_len_1=300, max_len_2=300)

In [13]:
X_vocab

{'<PAD>': 0,
 '<SOS>': 1,
 '<EOS>': 2,
 'large': 2,
 'bbq': 3,
 'pulled': 4,
 'pork': 5,
 'party': 6,
 'size': 7,
 'stuffed': 8,
 'crust': 9,
 'pie': 10,
 'american': 11,
 'cheese': 12,
 'mushroom': 13,
 'one': 14,
 'personal': 15,
 'sized': 16,
 'artichoke': 17,
 'banana': 18,
 'pepper': 19,
 'peppperonis': 20,
 'low': 21,
 'fat': 22,
 'want': 23,
 'regular': 24,
 'pizza': 25,
 'any': 26,
 'fried': 27,
 'onion': 28,
 'little': 29,
 'bit': 30,
 'peperonni': 31,
 'high': 32,
 'rise': 33,
 'dough': 34,
 'lot': 35,
 'peperonis': 36,
 'yellow': 37,
 'd': 38,
 'like': 39,
 'pesto': 40,
 'sauce': 41,
 'meatball': 42,
 'bean': 43,
 'big': 44,
 'meat': 45,
 'pecoricheese': 46,
 'green': 47,
 'olive': 48,
 'lunch': 49,
 'alfredo': 50,
 'chicken': 51,
 'mediterranean': 52,
 'caramelized': 53,
 'red': 54,
 'apple': 55,
 'wood': 56,
 'bacon': 57,
 'combination': 58,
 'mozzarella': 59,
 'chorrizo': 60,
 'peperroni': 61,
 'basil': 62,
 'feta': 63,
 'medium': 64,
 'need': 65,
 'cumin': 66,
 'peperron

In [14]:
X_train_processed.shape, y_train_processed.shape

((720617, 300), (720617, 300))

In [15]:
X_train_processed[1]

array([ 1,  6,  7,  8,  9, 10, 11, 12, 13,  2,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0

In [16]:
y_train_processed[1]

array([ 1,  2,  3,  2,  4,  2,  5,  6,  7,  2,  8, 12,  7,  2, 13, 14,  7,
        2, 10, 15,  7,  2, 10, 16,  7,  7,  7,  2,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0

In [17]:
len(vocab)

158

In [18]:
input_dim = X_train_processed.shape[1]
input_dim

300

In [19]:
output_dim = y_train_processed.shape[1] 
output_dim

300

In [20]:
input_dim = X_train_processed.shape[1]  # Vocabulary size
embedding_dim = 300  # Dimension of embedding vectors    vocabs  260,158       max seq lens  (99, 265)
hidden_dim = 256  # Hidden state size for LSTM
output_dim = y_train_processed.shape[1]  # Number of output classes
num_layers = 3  # Number of BiLSTM layers
dropout = 0.5  # Dropout probability

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BiLSTMModel(input_dim, embedding_dim, hidden_dim, output_dim, num_layers, dropout).to(device)


In [21]:
import torch
from torch.utils.data import Dataset, DataLoader

class SequenceDataset(Dataset):
    def __init__(self, inputs, targets):
        self.inputs = torch.tensor(inputs, dtype=torch.long) 
        self.targets = torch.tensor(targets, dtype=torch.long) 

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        return {
            "src_input_ids": self.inputs[idx],
            "tgt_input_ids": self.targets[idx],
        }

train_dataset = SequenceDataset(X_train_processed, y_train_processed)
test_dataset = SequenceDataset(X_test_processed, y_test_processed)

batch_size = 64  # Adjust based GPU ;-;  memory
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [22]:
train_dataset.__getitem__(3)

{'src_input_ids': tensor([ 1, 10, 18, 19, 20, 21, 22, 12,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0

In [23]:
def evaluate_model_with_accuracy(model, dataloader, criterion, device):
    model.eval()
    epoch_loss = 0
    total_tokens = 0
    correct_tokens = 0

    with torch.no_grad():
        for batch in dataloader:
            src = batch["src_input_ids"].to(device)
            tgt = batch["tgt_input_ids"].to(device)

            output = model(src)
            output_dim = output.shape[-1]
            output = output[:, 1:].reshape(-1, output_dim)  # Reshape for token comparison
            tgt = tgt[:, 1:].reshape(-1)

            # Calculate loss
            loss = criterion(output, tgt)
            epoch_loss += loss.item()

            # Calculate accuracy
            predictions = output.argmax(dim=1)  # Get the index of the max log-probability
            correct_tokens += (predictions == tgt).sum().item()
            total_tokens += tgt.size(0)

    accuracy = correct_tokens / total_tokens if total_tokens > 0 else 0
    return epoch_loss / len(dataloader), accuracy

In [24]:
import torch.optim as optim
from tqdm import tqdm

criterion = nn.CrossEntropyLoss()  # Use for multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(1):  # Number of epochs
    model.train()
    epoch_loss = 0
    total_batches = len(train_dataloader)
    progress_bar = tqdm(train_dataloader,desc="Training Progress", unit="batch", leave=True)
    for  batch_idx, batch in enumerate(progress_bar): # Assuming a DataLoader is used for batches
        src = batch["src_input_ids"].to(device)  # Input tokens
        tgt = batch["tgt_input_ids"].to(device)  # Target tokens

        optimizer.zero_grad()
        output = model(src)  # Forward pass


        # Flatten the model output
        output = output.view(-1, output_dim) 
        tgt = tgt.view(-1)

        loss = criterion(output, tgt)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        avg_loss = epoch_loss / (batch_idx + 1)
        progress_bar.set_description(f"Training Progress: Batch {batch_idx + 1}/{total_batches}, Avg Loss: {avg_loss:.4f}")
    val_loss, accuracy = evaluate_model_with_accuracy(model, test_dataloader, criterion, device)
    print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(train_dataloader):.4f}, Val Loss: {val_loss / len(test_dataloader):.4f}, Accuracy: {accuracy * 100:.4f}%")




Training Progress: Batch 6659/11260, Avg Loss: 0.0519:  59%|█████▉    | 6659/11260 [21:06<14:35,  5.26batch/s]


KeyboardInterrupt: 

In [25]:
def save_model(model, path):
    torch.save(model.state_dict(), path)
    print(f"Model saved to {path}")

def load_model(model, path):
    model.load_state_dict(torch.load(path))
    print(f"Model loaded from {path}")
    return model

In [27]:
save_model(model, "../weights/Bilstm.pt")

Model saved to ../weights/Bilstm.pt


In [28]:
model_2 = BiLSTMModel(input_dim, embedding_dim, hidden_dim, output_dim, num_layers, dropout).to(device)
model_2 = load_model(model_2,"../weights/Bilstm.pt")
with torch.no_grad():
    output = model(train_dataset.__getitem__(1)["src_input_ids"].to(device))
    predictions = output.argmax(dim=1)
predictions

Model loaded from ../weights/Bilstm.pt


tensor([ 1,  2,  3,  2,  4,  2,  5,  6,  7,  2,  8, 12,  7,  2, 13, 14,  7,  2,
        10, 15,  7,  2, 10,  2,  7, 15,  7,  2,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0, 

In [None]:
tgt= train_dataset.__getitem__(1)["tgt_input_ids"].to(device)
tgt

tensor([ 1,  2,  3,  2,  4,  2,  5,  6,  7,  2,  8, 12,  7,  2, 13, 14,  7,  2,
        10, 15,  7,  2, 10, 16,  7,  7,  7,  2,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0, 

In [46]:

def decode_sequence(sequence, vocab):
    """
    Decodes a sequence of integers back into the structured output string.
    """
    inv_vocab = {v: k for k, v in vocab.items()}  # Reverse the vocabulary
    sequence = sequence.cpu().tolist()  # Convert tensor to a list of integers
    tokens = [inv_vocab.get(idx, "") for idx in sequence if idx > 0]  # Ignore <PAD> tokens (0)
    return " ".join(tokens)

ou= decode_sequence(predictions,vocab)
ou

'<SOS> ( ORDER ( PIZZAORDER ( NUMBER 1 ) ( SIZE PARTY_SIZE ) ( STYLE STUFFED_CRUST ) ( TOPPING AMERICAN_CHEESE ) ( TOPPING ( ) AMERICAN_CHEESE ) ('

In [None]:

ou= decode_sequence(tgt[1],vocab)
ou

'<SOS> ( ORDER ( PIZZAORDER ( NUMBER 1 ) ( SIZE PARTY_SIZE ) ( STYLE VEGETARIAN ) ( TOPPING AMERICAN_CHEESE ) ( TOPPING PECORINO_CHEESE ) ) ) ('

In [None]:
model.eval()
with torch.no_grad():
    sample_input = torch.tensor([[1, 2, 3, 4, 5]], dtype=torch.long).to(device)  # Example input
    output = model(sample_input)
    predicted_classes = torch.argmax(output, dim=-1)  # Get class with highest probability
    print(predicted_classes)
