## 05 - Name and food type extractor
In this notebook, we will find and extract the names (hotel names, restaurant names) and food type (asian/chinese, etc) from the text

In [1]:
import pandas as pd

In [2]:
# read train data
train_file = "./data/train.csv"
train_df = pd.read_csv(train_file)

train_df.head()

Unnamed: 0,text,answer,intent,slots
0,"Guten Tag, I am staying overnight in Cambridge...","['find_hotel', 'hotel-area=centre', 'hotel-int...",find_hotel,"{'hotel-area': 'centre', 'hotel-internet': 'ye..."
1,Hi there! Can you give me some info on Cityroomz?,"['find_hotel', 'hotel-name=cityroomz']",find_hotel,{'hotel-name': 'cityroomz'}
2,I am looking for a hotel named alyesbray lodge...,"['find_hotel', 'hotel-name=alyesbray lodge gue...",find_hotel,{'hotel-name': 'alyesbray lodge guest house'}
3,I am looking for a restaurant. I would like so...,"['find_restaurant', 'restaurant-food=chinese',...",find_restaurant,"{'restaurant-food': 'chinese', 'restaurant-pri..."
4,I'm looking for an expensive restaurant in the...,"['find_restaurant', 'restaurant-area=centre', ...",find_restaurant,"{'restaurant-area': 'centre', 'restaurant-pric..."


In [3]:
# pre-processing and covert slots to json objects
train_df["text_lower"] = train_df["text"].str.lower()
# convert slots to json objects
train_df["slots"] = train_df["slots"].apply(lambda s: eval(s))
train_df.head()

Unnamed: 0,text,answer,intent,slots,text_lower
0,"Guten Tag, I am staying overnight in Cambridge...","['find_hotel', 'hotel-area=centre', 'hotel-int...",find_hotel,"{'hotel-area': 'centre', 'hotel-internet': 'ye...","guten tag, i am staying overnight in cambridge..."
1,Hi there! Can you give me some info on Cityroomz?,"['find_hotel', 'hotel-name=cityroomz']",find_hotel,{'hotel-name': 'cityroomz'},hi there! can you give me some info on cityroomz?
2,I am looking for a hotel named alyesbray lodge...,"['find_hotel', 'hotel-name=alyesbray lodge gue...",find_hotel,{'hotel-name': 'alyesbray lodge guest house'},i am looking for a hotel named alyesbray lodge...
3,I am looking for a restaurant. I would like so...,"['find_restaurant', 'restaurant-food=chinese',...",find_restaurant,"{'restaurant-food': 'chinese', 'restaurant-pri...",i am looking for a restaurant. i would like so...
4,I'm looking for an expensive restaurant in the...,"['find_restaurant', 'restaurant-area=centre', ...",find_restaurant,"{'restaurant-area': 'centre', 'restaurant-pric...",i'm looking for an expensive restaurant in the...


In [4]:
# filter by names
kw = 'name='
names_df = train_df.query("answer.str.contains(@kw)")
print(len(names_df))
names_df.reset_index(inplace=True)
names_df.drop(columns=["index"], inplace=True)
names_df.head()

556


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  names_df.drop(columns=["index"], inplace=True)


Unnamed: 0,text,answer,intent,slots,text_lower
0,Hi there! Can you give me some info on Cityroomz?,"['find_hotel', 'hotel-name=cityroomz']",find_hotel,{'hotel-name': 'cityroomz'},hi there! can you give me some info on cityroomz?
1,I am looking for a hotel named alyesbray lodge...,"['find_hotel', 'hotel-name=alyesbray lodge gue...",find_hotel,{'hotel-name': 'alyesbray lodge guest house'},i am looking for a hotel named alyesbray lodge...
2,I am looking for a particular restaurant. Its ...,"['find_restaurant', 'restaurant-name=traveller...",find_restaurant,{'restaurant-name': 'travellers rest'},i am looking for a particular restaurant. its ...
3,"Hi, what can you tell me about the bangkok cit...","['find_restaurant', 'restaurant-name=bangkok c...",find_restaurant,{'restaurant-name': 'bangkok city'},"hi, what can you tell me about the bangkok cit..."
4,I'm looking for a particular restaurant called...,"['find_restaurant', 'restaurant-name=ask']",find_restaurant,{'restaurant-name': 'ask'},i'm looking for a particular restaurant called...


In [5]:
# go through each of the item to analyze
for i in range(len(names_df)):
    item = names_df.iloc[i]
    # print(item)
    # print(type(item))
    print(item["text"])
    print(item["slots"])
    print("--------")

Hi there! Can you give me some info on Cityroomz?
{'hotel-name': 'cityroomz'}
--------
I am looking for a hotel named alyesbray lodge guest house.
{'hotel-name': 'alyesbray lodge guest house'}
--------
I am looking for a particular restaurant. Its name is called travellers rest
{'restaurant-name': 'travellers rest'}
--------
Hi, what can you tell me about the bangkok city restaurant?
{'restaurant-name': 'bangkok city'}
--------
I'm looking for a particular restaurant called ask. Please give me information on that restaurant.
{'restaurant-name': 'ask'}
--------
I would like to find out information about a hotel called Warkworth House.
{'hotel-name': 'warkworth house'}
--------
I don't know if this is possible but can you please get me some information on the alexander bed and breakfast?
{'hotel-name': 'alexander bed and breakfast'}
--------
I'm looking for a hotel called Kirkwood House please.
{'hotel-name': 'kirkwood house'}
--------
I'm looking for a hotel called city centre north b a

In [6]:
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline

model_name = "deepset/roberta-base-squad2"
# model_name = "consciousAI/question-answering-roberta-base-s-v2"


# a) Get predictions
model = pipeline('question-answering', model=model_name, tokenizer=model_name)
QA_input = {
    'question': 'What is the name?',
    'context': """I'm looking to go to dinner tonight and am in the mood for some good Bistro in the centre of town, can you find me some options?"""
}
res = model(QA_input)



  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


In [7]:
print(res)

{'score': 0.05401258170604706, 'start': 69, 'end': 75, 'answer': 'Bistro'}


In [8]:
def get_name(text, nlp_model=model):
    """
        extract the name (hote/restaurant) from the text
    """
    query = {
    'question': 'What is the name?',
    'context': text
    }
    res = nlp_model(query)
    return res["answer"]

s = """I'm looking for a hotel called the a and b guest house. Can you help me out?"""
get_name(s)

'a and b guest house'

In [9]:
# go through each of the item to analyze and predict name
gold_names = []
sys_names = []
correct_count = 0
wrong_preds = []

n = len(names_df)
max_row = n # set to n for full set
for i in range(max_row):
    item = names_df.iloc[i]

    txt = item["text"]

    print(txt)
    print(item["slots"])
    # extract gold name
    gold_name = ""
    if item["slots"].get("hotel-name", "") != "":
        gold_name = item["slots"].get("hotel-name", "")
    else:
        gold_name = item["slots"].get("restaurant-name", "")
    print("Gold name =", gold_name)
        
    pred_name = get_name(txt).lower()
    
    # apply some post-processing (remove 'the', 'restaurant', etc)
    print("Extracted =", pred_name)
    pred_name = (pred_name
                 # .replace("the", "")
                 # .replace("restaurant", "")
                 # .replace("hotel", "")
                 .replace("'s", "")
                 .replace(".", "")
                 .replace(",", "")
                 .strip()
                )
    print("Post-processed extracted =", pred_name)

    if gold_name == pred_name:
        print("Correct")
        correct_count += 1
    else:
        wrong_preds.append([txt, pred_name, gold_name])
    
    print("--------")

print("Completed")


Hi there! Can you give me some info on Cityroomz?
{'hotel-name': 'cityroomz'}
Gold name = cityroomz
Extracted = cityroomz
Post-processed extracted = cityroomz
Correct
--------
I am looking for a hotel named alyesbray lodge guest house.
{'hotel-name': 'alyesbray lodge guest house'}
Gold name = alyesbray lodge guest house
Extracted = alyesbray lodge guest house
Post-processed extracted = alyesbray lodge guest house
Correct
--------
I am looking for a particular restaurant. Its name is called travellers rest
{'restaurant-name': 'travellers rest'}
Gold name = travellers rest
Extracted = travellers rest
Post-processed extracted = travellers rest
Correct
--------
Hi, what can you tell me about the bangkok city restaurant?
{'restaurant-name': 'bangkok city'}
Gold name = bangkok city
Extracted = bangkok city restaurant
Post-processed extracted = bangkok city restaurant
--------
I'm looking for a particular restaurant called ask. Please give me information on that restaurant.
{'restaurant-name'

In [10]:
score = correct_count * 100 / max_row
print(f"Accuracy = {score:.1f} %")

Accuracy = 77.3 %


In [20]:
wrong_preds

[['Heya, can you find me an expensive restaurant with north african food?',
  'north african',
  'african'],
 ["I want something that's moderately priced. Any type of food.",
  'any type of',
  'dontcare'],
 ["I'm looking for a moderate priced place to dine serving modern european food.",
  'european',
  'modern european'],
 ["I'm looking for a restaurant in the centre serving modern european food.",
  'european',
  'modern european'],
 ["I'm so hungry! Can you find me a really great modern european restaurant? Money is no object!",
  'european',
  'modern european'],
 ['Is there a modern European restaurant in the north?',
  'european',
  'modern european'],
 ['AM looking for a place to dine. The restaurant should be in the south and should serve gastropod food.',
  'gastropod',
  'gastropub'],
 ["Hi! I'd like to find a seafood restaurant in the centre of town, please.",
  'sea',
  'seafood'],
 ['im looking for a place that serves morden european food and with a cheap price range',
  

In [12]:
# extract food type
def get_food_type(text, nlp_model=model):
    """
        extract the food type from the text
    """
    query = {
    'question': 'What is the food origin?',
    'context': text
    }
    res = nlp_model(query)
    return res["answer"]

s = """I need to find a good vegetarian restaurant"""
get_food_type(s)

'vegetarian'

In [13]:
# filter by names
kw2 = 'restaurant-food='
foodtype_df = train_df.query("answer.str.contains(@kw2)")
print(len(names_df))
foodtype_df.reset_index(inplace=True)
foodtype_df.drop(columns=["index"], inplace=True)
foodtype_df.head()

556


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  foodtype_df.drop(columns=["index"], inplace=True)


Unnamed: 0,text,answer,intent,slots,text_lower
0,I am looking for a restaurant. I would like so...,"['find_restaurant', 'restaurant-food=chinese',...",find_restaurant,"{'restaurant-food': 'chinese', 'restaurant-pri...",i am looking for a restaurant. i would like so...
1,"Yeah, could you recommend a good gastropub?","['find_restaurant', 'restaurant-food=gastropub']",find_restaurant,{'restaurant-food': 'gastropub'},"yeah, could you recommend a good gastropub?"
2,I want to find an expensive restaurant and ser...,"['find_restaurant', 'restaurant-food=european'...",find_restaurant,"{'restaurant-food': 'european', 'restaurant-pr...",i want to find an expensive restaurant and ser...
3,Where's a good place to eat crossover food in ...,"['find_restaurant', 'restaurant-food=crossover']",find_restaurant,{'restaurant-food': 'crossover'},where's a good place to eat crossover food in ...
4,Can you help me find an expensive Chinese food...,"['find_restaurant', 'restaurant-food=chinese',...",find_restaurant,"{'restaurant-food': 'chinese', 'restaurant-pri...",can you help me find an expensive chinese food...


In [14]:
# go through each of the item to analyze and predict name
gold_names = []
sys_names = []
correct_count = 0
wrong_preds = []

n = len(foodtype_df)
max_row = n # set to n for full set
for i in range(max_row):
    item = foodtype_df.iloc[i]

    txt = item["text"]

    print(txt)
    print(item["slots"])
    # extract gold name
    # gold_name = ""
    gold_name = item["slots"].get("restaurant-food", "")
    print("Gold name =", gold_name)
        
    pred_name = get_food_type(txt).lower()
    
    # apply some post-processing (remove 'the', 'restaurant', etc)
    print("Extracted =", pred_name)
    pred_name = (pred_name
                 # .replace("the", "")
                 .replace("food", "")
                 .replace("'s", "")
                 .replace(".", "")
                 .replace(",", "")
                 .strip()
                )
    print("Post-processed extracted =", pred_name)

    if gold_name == pred_name:
        print("Correct")
        correct_count += 1
    else:
        wrong_preds.append([txt, pred_name, gold_name])
    
    print("--------")

print("Completed")


I am looking for a restaurant. I would like something cheap that has Chinese food.
{'restaurant-food': 'chinese', 'restaurant-pricerange': 'cheap'}
Gold name = chinese
Extracted = chinese
Post-processed extracted = chinese
Correct
--------
Yeah, could you recommend a good gastropub?
{'restaurant-food': 'gastropub'}
Gold name = gastropub
Extracted = gastropub
Post-processed extracted = gastropub
Correct
--------
I want to find an expensive restaurant and serves european food. Can i also have the address, phone number and its area. ?
{'restaurant-food': 'european', 'restaurant-pricerange': 'expensive'}
Gold name = european
Extracted = european
Post-processed extracted = european
Correct
--------
Where's a good place to eat crossover food in Cambridge?
{'restaurant-food': 'crossover'}
Gold name = crossover
Extracted = crossover food
Post-processed extracted = crossover
Correct
--------
Can you help me find an expensive Chinese food restaurant?
{'restaurant-food': 'chinese', 'restaurant-pr

In [15]:
score = correct_count * 100 / max_row
print(f"Accuracy = {score:.1f} %")

Accuracy = 94.3 %


In [19]:
print("Wrong counts =", len(wrong_preds))
for text, sys, gold in wrong_preds:
    print("Text =", text)
    print("Sys =", sys)
    print("Gold =", gold)
    print("-----")

Wrong counts = 63
Text = Heya, can you find me an expensive restaurant with north african food?
Sys = north african
Gold = african
-----
Text = I want something that's moderately priced. Any type of food.
Sys = any type of
Gold = dontcare
-----
Text = I'm looking for a moderate priced place to dine serving modern european food.
Sys = european
Gold = modern european
-----
Text = I'm looking for a restaurant in the centre serving modern european food.
Sys = european
Gold = modern european
-----
Text = I'm so hungry! Can you find me a really great modern european restaurant? Money is no object!
Sys = european
Gold = modern european
-----
Text = Is there a modern European restaurant in the north?
Sys = european
Gold = modern european
-----
Text = AM looking for a place to dine. The restaurant should be in the south and should serve gastropod food.
Sys = gastropod
Gold = gastropub
-----
Text = Hi! I'd like to find a seafood restaurant in the centre of town, please.
Sys = sea
Gold = seafood
