In [21]:
import torch
import pickle

In [22]:
vocab = pickle.load(open('live/vocab.pkl', 'rb'))

tags_ct = pickle.load(open('live/tags_complex_topping.pkl', 'rb'))             # Complex Topping tags
tags_pz = pickle.load(open('live/tags_pizza_orders.pkl', 'rb'))        # Pizza orders tags
tags_dr = pickle.load(open('live/tags_drink_orders.pkl', 'rb'))        # Drink orders tags
tags_ob = pickle.load(open('live/tags_orders.pkl', 'rb'))              # Order boundary tags

print('Vocab size:', len(vocab))

Vocab size: 461


In [23]:
print(tags_ct)
print(tags_pz)
print(tags_dr)
print(tags_ob)

{'QUANTITY': 0, 'TOPPING': 1, 'TOPPING_S': 2, 'QUANTITY_S': 3, 'NONE': 4}
{'TOPPING_S': 0, 'COMPLEX_TOPPING_S': 1, 'NOT_STYLE_S': 2, 'STYLE': 3, 'NUMBER': 4, 'TOPPING': 5, 'SIZE': 6, 'NONE': 7, 'NOT_TOPPING': 8, 'NOT_TOPPING_S': 9, 'NUMBER_S': 10, 'NOT_COMPLEX_TOPPING_S': 11, 'STYLE_S': 12, 'NOT_COMPLEX_TOPPING': 13, 'COMPLEX_TOPPING': 14, 'SIZE_S': 15, 'NOT_STYLE': 16}
{'SIZE': 0, 'CONTAINERTYPE': 1, 'DRINKTYPE': 2, 'DRINKTYPE_S': 3, 'NONE': 4, 'CONTAINERTYPE_S': 5, 'VOLUME': 6, 'NUMBER': 7, 'VOLUME_S': 8, 'SIZE_S': 9, 'NUMBER_S': 10}
{'DRINKORDER': 0, 'DRINKORDER_S': 1, 'PIZZAORDER': 2, 'PIZZAORDER_S': 3, 'NONE': 4}


In [26]:
def tags_inverse(tags):
    inv_tags = {}
    for tag, value in tags.items():
        inv_tags[value] = tag
    return inv_tags

inv_tags_ct = tags_inverse(tags_ct)
inv_tags_pz = tags_inverse(tags_pz)
inv_tags_dr = tags_inverse(tags_dr)
inv_tags_ob = tags_inverse(tags_ob)

print(inv_tags_ct)
print(inv_tags_pz)
print(inv_tags_dr)
print(inv_tags_ob)

silent_correction = True

{0: 'QUANTITY', 1: 'TOPPING', 2: 'TOPPING_S', 3: 'QUANTITY_S', 4: 'NONE'}
{0: 'TOPPING_S', 1: 'COMPLEX_TOPPING_S', 2: 'NOT_STYLE_S', 3: 'STYLE', 4: 'NUMBER', 5: 'TOPPING', 6: 'SIZE', 7: 'NONE', 8: 'NOT_TOPPING', 9: 'NOT_TOPPING_S', 10: 'NUMBER_S', 11: 'NOT_COMPLEX_TOPPING_S', 12: 'STYLE_S', 13: 'NOT_COMPLEX_TOPPING', 14: 'COMPLEX_TOPPING', 15: 'SIZE_S', 16: 'NOT_STYLE'}
{0: 'SIZE', 1: 'CONTAINERTYPE', 2: 'DRINKTYPE', 3: 'DRINKTYPE_S', 4: 'NONE', 5: 'CONTAINERTYPE_S', 6: 'VOLUME', 7: 'NUMBER', 8: 'VOLUME_S', 9: 'SIZE_S', 10: 'NUMBER_S'}
{0: 'DRINKORDER', 1: 'DRINKORDER_S', 2: 'PIZZAORDER', 3: 'PIZZAORDER_S', 4: 'NONE'}


In [25]:
from utils_2 import tokenize, preprocess_tokens, project_tokens, check_in_vocab
from extra_sets import isPersonalPronoun, isNumber
import language_tool_python
from spellchecker import SpellChecker
import enchant

spell_tool = language_tool_python.LanguageTool('en-US')
spell = SpellChecker()
dictionary = enchant.Dict("en_US")

def suggest_by_insertion(word):
    if dictionary.check(word) and (len(word) > 1 or isPersonalPronoun(word) or isNumber(word)):
        return [word]
        
    alphabet = 'abcdefghijklmnopqrstuvwxyz'
    suggestions = set()
    
    # Try inserting a letter at each position
    for i in range(len(word) + 1):
        for letter in alphabet:
            new_word = word[:i] + letter + word[i:]
            if dictionary.check(new_word):
                suggestions.add(new_word)
            # # Optionally try inserting a second letter for severely misspelled words
            # if len(suggestions) == 0:
            #     for j in range(i + 1, len(new_word) + 1):
            #         for second_letter in alphabet:
            #             double_insert = new_word[:j] + second_letter + new_word[j:]
            #             if dictionary.check(double_insert):
            #                 suggestions.add(double_insert)
    
    return list(suggestions)
    

def correct_token(token_index, tokens):
    token = tokens[token_index]
    # Try fixing the word by inserting one letter first
    suggestions = suggest_by_insertion(token)
    for sug in suggestions:
        if check_in_vocab(sug, vocab):
            if not silent_correction: print(f"fixing (insertion): {token} -> {sug}")
            return sug # if we found something that is in the vocab, then return it
    
    # else: Try language-tool-python
    matches = spell_tool.check(' '.join(tokens))
    spelling_matches = [m for m in matches if m.ruleId.startswith('MORFOLOGIK_')]
    current_word_start = len(' '.join(tokens[:token_index])) + (1 if token_index > 0 else 0)
    current_word_end = current_word_start + len(token)
    relevant_matches = [
        m for m in spelling_matches
        if current_word_start <= m.offset < current_word_end
    ]
    
    # If language-tool-python found matches
    if relevant_matches and relevant_matches[0].replacements:
        corrected_token = relevant_matches[0].replacements[0]
        
        # If the correction seems too different, try fallback
        if len(corrected_token) < len(token): # mistakes are mainly missing letters (exploit that)
            fallback_correction = spell.correction(token)
            if fallback_correction and len(fallback_correction) > len(corrected_token):
                if not silent_correction: print(f"fixing (lang): {token} -> {fallback_correction}")
                return fallback_correction
        if not silent_correction: print(f"fixing (spell): {token} -> {corrected_token}")
        return corrected_token
    
    # Use pyspellchecker as fallback if language-tool-python found no matches
    fallback_correction = spell.correction(token)
    if fallback_correction and fallback_correction != token:
        if not silent_correction: print(f"fixing (spell): {token} -> {fallback_correction}")
        return fallback_correction
    
    # Return original token if no corrections were found
    if not silent_correction: print(f"fixing (fail): {token} -> {token}")
    return token

def feed_model(model, query, inv_tags):
    s = tokenize(query)
    s = [f'{correct_token(i, s) if not check_in_vocab(x, vocab) else x}' for i, x in enumerate(s)]
    if not silent_correction: print(s)
    s = preprocess_tokens(s, 0)
    s = project_tokens(s, vocab)
    x_tensor = torch.tensor(s)
    device = torch.device("cuda:0")
    with torch.no_grad():
        output = model.forward(x_tensor.to(device))
        output = torch.argmax(output, dim=-1).to("cpu")
        return [inv_tags[x.item()] for x in output]

In [27]:
test = tokenize("f let me prefer a extra large sauteed spinach and tomatoes pizza without any sausage")
test = [f'{correct_token(i, test) if not check_in_vocab(x, vocab) else x}' for i, x in enumerate(test)]
print(test)

['of', 'let', 'me', 'prefer', 'a', 'extra', 'large', 'sauted', 'spinach', 'and', 'tomatoes', 'pizza', 'without', 'any', 'sausage']


In [28]:
from ner import NER
from model_io import load_model_state

model_boundary    = NER(embedding_dim=95, hidden_size=600, n_classes=len(tags_ob), vocab_size=len(vocab), num_layers=2, dropout=0.5)
model_pizza_order = NER(embedding_dim=95, hidden_size=600, n_classes=len(tags_pz), vocab_size=len(vocab), num_layers=2, dropout=0.5)
model_drink_order = NER(embedding_dim=95, hidden_size=600, n_classes=len(tags_dr), vocab_size=len(vocab), num_layers=2, dropout=0.5)
model_complex     = NER(embedding_dim=95, hidden_size=600, n_classes=len(tags_ct), vocab_size=len(vocab), num_layers=2, dropout=0.5)


load_model_state(model_boundary, "live/order_boundary_e95_h600_l2_d0.5_cv0_x99.4.pth", torch.device("cuda:0"))
load_model_state(model_pizza_order, "live/pizza_order_e95_h600_l2_d0.5_cv0_x98.6.pth", torch.device("cuda:0")) 
load_model_state(model_drink_order, "live/drink_order_e95_h600_l2_d0.5_cv0_x1.pth", torch.device("cuda:0"))
load_model_state(model_complex, "live/complex_e95_h600_l2_d0.5_cv0_x1.pth", torch.device("cuda:0"))

  state_dict = torch.load(path, device)


NER(
  (embedding): Embedding(461, 95)
  (dropout): Dropout(p=0.5, inplace=False)
  (lstm): LSTM(95, 600, num_layers=2, batch_first=True, dropout=0.5, bidirectional=True)
  (linear): Linear(in_features=1200, out_features=5, bias=True)
)

In [29]:
feed_model(model_complex, "a lot of osama", inv_tags_ct)   # just a test :)

Word: "Osama" -> "osama" not in vocab
isPersonalPronoun: False
isNumber: False
isTopping: False
isQuantity: False


['QUANTITY_S', 'QUANTITY', 'QUANTITY', 'TOPPING_S']

In [30]:
def run_complex(order):
    words = [token for token in order.split(' ') if token != '']
    order_result = feed_model(model_complex, order, inv_tags_ct)
    # print(f"COMPLEX:\n\t IN -> {order}\n\t OUT -> {order_result}")
    result = ""
    index = 0
    
    TAGS_STARTERS = ["TOPPING_S", "QUANTITY_S"]
    TAGS_CONT     = ["TOPPING"  , "QUANTITY"  ]
    while index < len(order_result):  # len(order_result) == len(words)
        found = False
        for tag_s, tag in zip(TAGS_STARTERS, TAGS_CONT):
            if order_result[index] == tag_s:
                found = True
                content = [words[index]]
                index = index + 1
                while index < len(order_result) and order_result[index] == tag:
                    content = content + [words[index]]
                    index = index + 1
                result += f"({tag} {' '.join(content)} ) "
                break
        if not found:
            result += words[index] + " "
            index = index + 1
    
    return result

def run_pizza_order(order):
    words = [token for token in order.split(' ') if token != '']
    order_result = feed_model(model_pizza_order, order, inv_tags_pz)
    result = ""
    # print(f"PIZZA:\n\t IN -> {order}\n\t OUT -> {order_result}") 
    index = 0
    
    NORMAL_TAGS_STARTERS = ["TOPPING_S", "STYLE_S", "SIZE_S", "NUMBER_S"]
    NORMAL_TAGS_CONT     = ["TOPPING"  , "STYLE"  , "SIZE"  , "NUMBER"]
    
    NOT_TAGS_STARTERS    = ["NOT_TOPPING_S", "NOT_STYLE_S", "NOT_SIZE_S", "NOT_NUMBER_S"]   # last two doesn't exist but aahhh whatever xD
    NOT_TAGS_CONT        = ["NOT_TOPPING"  , "NOT_STYLE"  , "NOT_SIZE"  , "NOT_NUMBER"]     # I'll keep it just in case the model is tripping or something :)
    while index < len(order_result):  # len(order_result) == len(words)
        found = False
        for tag_s, tag in zip(NORMAL_TAGS_STARTERS, NORMAL_TAGS_CONT):
            if order_result[index] == tag_s:
                found = True
                content = [words[index]]
                index = index + 1
                while index < len(order_result) and order_result[index] == tag:
                    content = content + [words[index]]
                    index = index + 1
                if tag == "TOPPING":
                    for c in content:
                        result += f"({tag} {c} ) " # ' '.join(content)
                else:
                    result += f"({tag} {' '.join(content)} ) "
                break
        if found:
           continue
        
        for tag_s, tag in zip(NOT_TAGS_STARTERS, NOT_TAGS_CONT):
            if order_result[index] == tag_s:
                found = True
                content = [words[index]]
                index = index + 1
                while index < len(order_result) and order_result[index] == tag:
                    content = content + [words[index]]
                    index = index + 1
                result += f"(NOT ({tag[4:]} {' '.join(content)} ) ) "
                break
                
        if found:
           continue
            
        # special case: COMPLEX_TOPPING_S & NOT_COMPLEX_TOPPING_S
        if "COMPLEX_TOPPING_S" in order_result[index]:
            found = True
            negated = "NOT" in order_result[index]
            content = [words[index]]
            index = index + 1
            while index < len(order_result) and "COMPLEX_TOPPING" in order_result[index]:
                content = content + [words[index]]
                index = index + 1
            val = run_complex(' '.join(content))
            if negated:
                result += f"(NOT (COMPLEX_TOPPING {val}) ) "
            else:
                result += f"(COMPLEX_TOPPING {val}) "
        if found:
           continue
        result += words[index] + " "
        index = index + 1
    
    return result

def run_drink_order(order):
    words = [token for token in order.split(' ') if token != '']
    order_result = feed_model(model_drink_order, order, inv_tags_dr)
    # print(f"DRINK:\n\t IN -> {order}\n\t OUT -> {order_result}")
    result = ""
    index = 0
    
    TAGS_STARTERS = ["SIZE_S", "VOLUME_S", "NUMBER_S", "DRINKTYPE_S", "CONTAINERTYPE_S"]
    TAGS_CONT     = ["SIZE"  , "VOLUME"  , "NUMBER"  , "DRINKTYPE"  , "CONTAINERTYPE"]
    while index < len(order_result):  # len(order_result) == len(words)
        found = False
        for tag_s, tag in zip(TAGS_STARTERS, TAGS_CONT):
            if order_result[index] == tag_s:
                found = True
                content = [words[index]]
                index = index + 1
                while index < len(order_result) and order_result[index] == tag:
                    content = content + [words[index]]
                    index = index + 1
                result += f"({tag} {' '.join(content)} ) "
                break
        if not found:
            result += words[index] + " "
            index = index + 1
    
    return result

def run_order(order):
    words = [token for token in order.split(' ') if token != '']
    order_result = feed_model(model_boundary, order, inv_tags_ob)
    # print(f"ORDER:\n\t IN -> {order}\n\t OUT -> {order_result}")
    
    result = ""
    index = 0
    while index < len(order_result):  # len(order_result) == len(words)
        if order_result[index] in 'PIZZAORDER_S':  # read a pizza order
            order = [words[index]]
            index = index + 1
            while index < len(order_result) and order_result[index] == 'PIZZAORDER':
                order = order + [words[index]]
                index = index + 1
            if len(order) < 2: # pizza order must be at least two words
                result += ' '.join(order) + " "
            else:
                result += f"(PIZZAORDER {run_pizza_order(' '.join(order))}) "
        elif order_result[index] in 'DRINKORDER_S':  # read a drink order
            order = [words[index]]
            index = index + 1
            while index < len(order_result) and order_result[index] == 'DRINKORDER':
                order = order + [words[index]]
                index = index + 1
            if len(order) < 2: # drink order must be at least two words
                result += ' '.join(order) + " "
            else:
                result += f"(DRINKORDER {run_drink_order(' '.join(order))}) "
        else:
            result += words[index] + " "
            index = index + 1
    return result
    
def run_query(query):
    query = query.lower()
    return f"(ORDER {run_order(query)})"

In [31]:
result = run_query("we want some chicken pizza")
# expected = "(ORDER id like to order (PIZZAORDER (NUMBER two ) (SIZE large ) (STYLE gluten-free crust ) (TOPPING pepperoni ) pizzas ) with (DRINKORDER (NUMBER a ) (SIZE large ) (DRINKTYPE coke ) ) )"
print(f"Model   : {result}")
# print(f"Expected: {expected}")

Model   : (ORDER we want some (PIZZAORDER (TOPPING chicken ) pizza ) )


In [13]:
import json

def process_file(filename, prediction_function):
    total = 0
    correct = 0
    
    with open(filename, 'r') as file:
        for line in file:
            try:
                # Parse JSON object from each line
                data = json.loads(line.strip())
                
                # Extract source text and target parsing
                src = data['test.SRC']
                target = data['test.TOP']
                
                # Get prediction from the provided function
                prediction = prediction_function(src)
                
                # Compare prediction with target
                if prediction == target:
                    correct += 1
                else:
                    print(f"SRC     :  {src}")
                    print(f"EXPECTED:  {target}")
                    print(f"GOT     :  {prediction}")
                total += 1
                
            except json.JSONDecodeError:
                print(f"Warning: Skipping invalid JSON line: {line.strip()}")
            except KeyError as e:
                print(f"Warning: Missing required field: {e}")
                
    # Calculate accuracy
    accuracy = (correct / total) * 100 if total > 0 else 0
    return accuracy, correct, total


In [11]:
accuracy, correct, total = process_file("PIZZA_test.json", run_query)
    
print(f"Results:")
print(f"Total examples: {total}")
print(f"Correct predictions: {correct}")
print(f"Accuracy: {accuracy:.2f}%")

SRC     :  one medium pizza with feta cheese and bacon
EXPECTED:  (ORDER (PIZZAORDER (NUMBER one ) (SIZE medium ) pizza with (TOPPING feta cheese ) and (TOPPING bacon ) ) )
GOT     :  (ORDER (PIZZAORDER (NUMBER one ) (SIZE medium ) pizza with (TOPPING feta ) (TOPPING cheese ) and (TOPPING bacon ) ) )
SRC     :  i need two medium pepperoni pizzas one small pizza with onions black olives and peppers and three large sprites
EXPECTED:  (ORDER i need (PIZZAORDER (NUMBER two ) (SIZE medium ) (TOPPING pepperoni ) pizzas ) (PIZZAORDER (NUMBER one ) (SIZE small ) pizza with (TOPPING onions ) (TOPPING black olives ) and (TOPPING peppers ) ) and (DRINKORDER (NUMBER three ) (SIZE large ) (DRINKTYPE sprites ) ) )
GOT     :  (ORDER i need (PIZZAORDER (NUMBER two ) (SIZE medium ) (TOPPING pepperoni ) pizzas ) (PIZZAORDER (NUMBER one ) (SIZE small ) pizza with (TOPPING onions ) (TOPPING black ) (TOPPING olives ) and (TOPPING peppers ) ) and (DRINKORDER (NUMBER three ) (SIZE large ) (DRINKTYPE sprites 

In [70]:
def run_complex_json(order):
    words = [token for token in order.split(' ') if token != '']
    order_result = feed_model(model_complex, order, inv_tags_ct)
    print(f"COMPLEX:\n\t IN -> {order}\n\t OUT -> {order_result}")
    result = {}
    index = 0
    
    TAGS_STARTERS = ["TOPPING_S", "QUANTITY_S"]
    TAGS_CONT     = ["TOPPING"  , "QUANTITY"  ]
    while index < len(order_result):  # len(order_result) == len(words)
        found = False
        for tag_s, tag in zip(TAGS_STARTERS, TAGS_CONT):
            if order_result[index] == tag_s:
                found = True
                content = [words[index]]
                index = index + 1
                while index < len(order_result) and order_result[index] == tag:
                    content = content + [words[index]]
                    index = index + 1
                if "QUANTITY" in tag:
                    result["QUANTITY"] = ' '.join(content)
                elif "TOPPING" in tag:
                    result["TOPPING"] = ' '.join(content)
                break
        if not found:
            index = index + 1
    
    return result

def run_pizza_order_json(order):
    words = [token for token in order.split(' ') if token != '']
    order_result = feed_model(model_pizza_order, order, inv_tags_pz)
    result = {
        "NUMBER": None,
        "SIZE": None,
        "STYLE": [],
        "AllTopping": []
    }
    print(f"PIZZA:\n\t IN -> {order}\n\t OUT -> {order_result}") 
    index = 0
    
    NORMAL_TAGS_STARTERS = ["TOPPING_S", "STYLE_S", "SIZE_S", "NUMBER_S"]
    NORMAL_TAGS_CONT     = ["TOPPING"  , "STYLE"  , "SIZE"  , "NUMBER"]
    
    NOT_TAGS_STARTERS    = ["NOT_TOPPING_S", "NOT_STYLE_S"]   # last two doesn't exist but aahhh whatever xD
    while index < len(order_result):  # len(order_result) == len(words)
        found = False
        for tag_s, tag in zip(NORMAL_TAGS_STARTERS, NORMAL_TAGS_CONT):
            if order_result[index] == tag_s:
                found = True
                content = [words[index]]
                index = index + 1
                while index < len(order_result) and tag in order_result[index]:
                    content = content + [words[index]]
                    index = index + 1
                if "TOPPING" in tag:
                    result["AllTopping"].append({
                            "NOT": False,
                            "QUANTITY": None,
                            "TOPPING": ' '.join(content)
                        })  
                elif "STYLE" in tag:
                    result["STYLE"].append({
                        "NOT": False,
                        "TYPE": ' '.join(content),
                    })
                elif "SIZE" in tag:
                    result["SIZE"] = ' '.join(content)
                elif "NUMBER" in tag:
                    result["NUMBER"] = ' '.join(content)
                    
                break
        if found:
           continue
        
        for tag_s, tag in zip(NOT_TAGS_STARTERS, NORMAL_TAGS_CONT):
            if order_result[index] == tag_s:
                found = True
                content = [words[index]]
                index = index + 1
                while index < len(order_result) and tag in order_result[index]:
                    content = content + [words[index]]
                    index = index + 1
                    
                if "TOPPING" in tag:
                    result["AllTopping"].append({
                        "NOT": True,
                        "QUANTITY": None,
                        "TOPPING": ' '.join(content)
                    })
                elif "STYLE" in tag:
                    result["STYLE"].append({
                        "NOT": True,
                        "TYPE": ' '.join(content),
                    })
                elif "SIZE" in tag:
                    result["SIZE"] = ' '.join(content)
                elif "NUMBER" in tag:
                    result["NUMBER"] = ' '.join(content)
                break
                
        if found:
           continue
            
        # special case: COMPLEX_TOPPING_S & NOT_COMPLEX_TOPPING_S
        if "COMPLEX_TOPPING_S" in order_result[index]:
            found = True
            negated = "NOT" in order_result[index]
            content = [words[index]]
            index = index + 1
            while index < len(order_result) and "COMPLEX_TOPPING" in order_result[index]:
                content = content + [words[index]]
                index = index + 1
            val = run_complex_json(' '.join(content))
            if negated:
                if "QUANTITY" in val and "TOPPING" in val:
                    result["AllTopping"].append({
                        "NOT": True,
                        "QUANTITY": val["QUANTITY"],
                        "TOPPING": val["TOPPING"]
                    })
            else:
                if "QUANTITY" in val and "TOPPING" in val:
                    result["AllTopping"].append({
                        "NOT": False,
                        "QUANTITY": val["QUANTITY"],
                        "TOPPING": val["TOPPING"]
                    })
        if found:
           continue
        index = index + 1
    return result

def run_drink_order_json(order):
    words = [token for token in order.split(' ') if token != '']
    order_result = feed_model(model_drink_order, order, inv_tags_dr)
    print(f"DRINK:\n\t IN -> {order}\n\t OUT -> {order_result}")
    result = {}
    index = 0
    
    TAGS_STARTERS = ["SIZE_S", "VOLUME_S", "NUMBER_S", "DRINKTYPE_S", "CONTAINERTYPE_S"]
    TAGS_CONT     = ["SIZE"  , "VOLUME"  , "NUMBER"  , "DRINKTYPE"  , "CONTAINERTYPE"]
    while index < len(order_result):  # len(order_result) == len(words)
        found = False
        for tag_s, tag in zip(TAGS_STARTERS, TAGS_CONT):
            if order_result[index] == tag_s:
                found = True
                content = [words[index]]
                index = index + 1
                while index < len(order_result) and order_result[index] == tag:
                    content = content + [words[index]]
                    index = index + 1
                
                if "SIZE" in tag:
                    result["SIZE"] = ' '.join(content)
                if "VOLUME" in tag:
                    result["VOLUME"] = " ".join(content)
                if "NUMBER" in tag:
                    result["NUMBER"] = ' '.join(content)
                if "DRINKTYPE" in tag:
                    result["DRINKTYPE"] = " ".join(content)
                if "CONTAINERTYPE" in tag:
                    result["CONTAINERTYPE"] = " ".join(content)
                break
        if not found:
            index = index + 1
    
    return result

def run_order_json(order):
    words = [token for token in order.split(' ') if token != '']
    order_result = feed_model(model_boundary, order, inv_tags_ob)
    print(f"ORDER:\n\t IN -> {order}\n\t OUT -> {order_result}")
    pizza_order = []
    drink_order = []
    index = 0
    while index < len(order_result):  # len(order_result) == len(words)
        if order_result[index] in 'PIZZAORDER_S':  # read a pizza order
            order = [words[index]]
            index = index + 1
            while index < len(order_result) and 'PIZZAORDER' in order_result[index]:
                order = order + [words[index]]
                index = index + 1
            if len(order) >= 2:
                pizza_order.append(run_pizza_order_json(' '.join(order)))
                
        elif order_result[index] in 'DRINKORDER_S':  # read a drink order
            order = [words[index]]
            index = index + 1
            while index < len(order_result) and 'DRINKORDER' in order_result[index]:
                order = order + [words[index]]
                index = index + 1
            if len(order) >= 2:
                pizza_order.append(run_pizza_order_json((' '.join(order))))
        else:
            index = index + 1
        
    return {
        "PIZZAORDER": pizza_order,
        "DRINKORDER": drink_order,
    }
    
def run_query_json(query):
    query = query.lower()
    return {"ORDER": run_order_json(query)}

In [71]:
import json
from rich.console import Console
from rich.panel import Panel
from rich.columns import Columns

def BeautifulPrintJson(obj):
    # Pretty JSON strings
    json_str = json.dumps(obj, indent=4)

    # Create Panels for each JSON
    panel = Panel(json_str, title="JSON", expand=True)

    # Render them side by side
    console = Console()
    console.print(Columns([panel]))

def BeautifulCompareJson(obj1, obj2):
    # Pretty JSON strings
    json1_str = json.dumps(obj1, indent=4)
    json2_str = json.dumps(obj2, indent=4)

    # Create Panels for each JSON
    panel1 = Panel(json1_str, title="JSON 1", expand=True)
    panel2 = Panel(json2_str, title="JSON 2", expand=True)

    # Render them side by side
    console = Console()
    console.print(Columns([panel1, panel2]))


In [34]:
index = 0
import pandas as pd
test_csv = pd.read_csv('tests/test_set_1.csv')
orders  = test_csv['order']

In [74]:
order = orders[index]
print(f"{order}")
BeautifulPrintJson(run_query_json(order))
index = index + 1

i'llhave a capers and bacon pizza smal please and i want peppers on it too
Word: "ll have" -> "ll have" not in vocab
isPersonalPronoun: False
isNumber: False
isTopping: False
isQuantity: False
ORDER:
	 IN -> i'llhave a capers and bacon pizza smal please and i want peppers on it too
	 OUT -> ['NONE', 'PIZZAORDER_S', 'PIZZAORDER', 'PIZZAORDER', 'PIZZAORDER', 'PIZZAORDER', 'PIZZAORDER', 'PIZZAORDER', 'PIZZAORDER', 'PIZZAORDER', 'PIZZAORDER', 'PIZZAORDER', 'PIZZAORDER', 'PIZZAORDER', 'PIZZAORDER']
PIZZA:
	 IN -> a capers and bacon pizza smal please and i want peppers on it too
	 OUT -> ['NUMBER_S', 'TOPPING_S', 'NONE', 'TOPPING_S', 'NONE', 'SIZE_S', 'NONE', 'NONE', 'NONE', 'NONE', 'TOPPING_S', 'NONE', 'NONE', 'NONE']


In [72]:
index = index - 1