In [None]:
!pip install spacy
!pip install transformers
!pip install requests
!pip install rapidfuzz
!pip install word2number
!pip install torch

Collecting rapidfuzz
  Downloading rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Downloading rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m22.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rapidfuzz
Successfully installed rapidfuzz-3.12.1
Collecting word2number
  Downloading word2number-1.1.zip (9.7 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: word2number
  Building wheel for word2number (setup.py) ... [?25l[?25hdone
  Created wheel for word2number: filename=word2number-1.1-py3-none-any.whl size=5568 sha256=14ad4d8280d766d67b83f1684cfbb27abf097256d049d742f79d7a516d268da7
  Stored in directory: /root/.cache/pip/wheels/cd/ef/ae/073b491b14d25e2efafcffca9e16b2ee6d114ec5c643ba4f06
Successfully built word2number
Installing collected packages: word2numb

Import relevent dependencies

In [None]:
import spacy
import re
import requests
import json
import string
import rapidfuzz
import torch
import random
from word2number import w2n
from transformers import pipeline
from datetime import datetime
from spacy.matcher import PhraseMatcher
from rapidfuzz import process, fuzz
from transformers import AutoTokenizer, AutoModelForSequenceClassification

Load spaCy and transformer model

In [None]:
# Load spaCy model for NLP processing
nlp = spacy.load("en_core_web_sm")

Menu

In [None]:
# General menu
menu = {
    "sushi": {"vegan": "Avocado Rolls", "gluten_free": "Salmon Rolls"},
    "rice": {"vegan": "Spicy Garlic Fried Rice", "gluten_free": "Teriyaki Chicken Rice"},
    "noodles": {"vegan": "Sesame Glass Noodles", "gluten_free": "Classic Beef Pho"}
}

all_dishes = [dish.lower() for category_dishes in menu.values() for dish in category_dishes.values()]

Functions that use spaCy to analyse sentence through tokens and lemmatization. After which, the processed sentence is then sent into a function to find relevant data

In [None]:
"Extracts customer's name from a message."

def extract_name(sentence):
    # Capitalize the first letter of each word to help spaCy recognize names
    sentence = sentence.title()

    # Process the sentence with spaCy's NLP model
    doc = nlp(sentence)

    # Extract all detected names labelled as 'PERSON'
    names = [ent.text for ent in doc.ents if ent.label_ == "PERSON"]

    # Return the extracted name(s) as a single string, or None if no name is found
    return " ".join(names) if names else None


In [None]:
"Extracts Singapore phone numbers from a message."

def extract_sg_phone_number(sentence):
    # Process the sentence using spaCy's NLP model
    doc = nlp(sentence)

    # Loop through recognized entities to find potential phone numbers
    for ent in doc.ents:

        # Numbers are often labeled as "CARDINAL" by spaCy
        if ent.label_ == "CARDINAL":

            # Validate SG phone number pattern
            match = re.fullmatch(r"(8|9)\d{3} ?\d{4}", ent.text)
            if match:
                return ent.text

    # If no entity matched, use regex as fallback
    pattern = r"\b(8|9)\d{3} ?\d{4}\b"
    match = re.search(pattern, sentence)
    if match:
        return match.group()

    return None


In [None]:
API_KEY = "Your API Key here"  # GOOGLE_MAPS_API_KEY
address = "1 Raffles Place, Singapore"
GEOCODING_URL = "https://maps.googleapis.com/maps/api/geocode/json"

# Make the API request and store the response in the 'response' variable
params = {"address": address, "key": API_KEY}
response = requests.get(GEOCODING_URL, params=params)

print(response.json())  # Check API response

{'results': [{'address_components': [{'long_name': '1', 'short_name': '1', 'types': ['street_number']}, {'long_name': 'Raffles Place', 'short_name': 'Raffles Pl', 'types': ['route']}, {'long_name': 'Downtown Core', 'short_name': 'Downtown Core', 'types': ['neighborhood', 'political']}, {'long_name': 'Singapore', 'short_name': 'Singapore', 'types': ['locality', 'political']}, {'long_name': 'Singapore', 'short_name': 'SG', 'types': ['country', 'political']}, {'long_name': '048616', 'short_name': '048616', 'types': ['postal_code']}], 'formatted_address': '1 Raffles Pl, Singapore 048616', 'geometry': {'location': {'lat': 1.2835792, 'lng': 103.850207}, 'location_type': 'ROOFTOP', 'viewport': {'northeast': {'lat': 1.285055730291502, 'lng': 103.8516257802915}, 'southwest': {'lat': 1.282357769708498, 'lng': 103.8489278197085}}}, 'navigation_points': [{'location': {'latitude': 1.2838359, 'longitude': 103.8503093}, 'restricted_travel_modes': ['WALK']}, {'location': {'latitude': 1.2835774, 'longi

In [None]:
def validate_singapore_address(address):
    # Validates if the extracted address is in Singapore using Google Maps API.
    params = {"address": address, "key": API_KEY}
    response = requests.get(GEOCODING_URL, params=params)
    data = response.json()

    if data.get("status") == "OK":
        for result in data.get("results", []):
            for component in result.get("address_components", []):
                if "country" in component.get("types", []) and component["short_name"] == "SG":
                    return result['formatted_address']  # Return only the formatted address
    return None

def extract_address(sentence):
    # Extracts address from a message and validates if it's in Singapore.
    doc = nlp(sentence)
    possible_addresses = []

    for ent in doc.ents:
        #print(f"Entity: {ent.text}, Label: {ent.label_}")  ## Debugging print
        if ent.label_ in ["GPE", "LOC", "FAC"]:  # Identifies geographical locations, facilities, etc.
            possible_addresses.append(ent.text)

    # If no named entities are detected, assume the entire sentence is an address
    if not possible_addresses:
        possible_addresses.append(sentence)

    valid_addresses = [validate_singapore_address(addr) for addr in possible_addresses]
    valid_addresses = [addr for addr in valid_addresses if addr]  # Filter out None values

    return valid_addresses[0] if valid_addresses else None  # Return a single string instead of a list


In [None]:
def convert_number_words(sentence):
    words = sentence.split()
    for i, word in enumerate(words):
        try:
            words[i] = str(w2n.word_to_num(word))  # Convert word to number if possible
        except ValueError:
            continue  # Ignore words that aren't numbers
    return " ".join(words)

def extract_pax(sentence):
    sentence = re.sub(r'[^\w\s]', '', sentence)  # Remove punctuation
    sentence = convert_number_words(sentence)  # Convert number words to digits
    doc = nlp(sentence)

    pax_keywords = {"pax", "people", "guests", "persons", "group", "table", "adults", "grown-ups", "children", "kids", "toddlers", "infants"}
    total_pax = 0
    counted_numbers = set()

    # Regex-based extraction (for cases like "20pax")
    regex_pattern = r"(\d+)\s*(pax|people|guests|persons|adults|children|kids|group)"
    matches = re.findall(regex_pattern, sentence.lower())

    for match in matches:
        num, keyword = int(match[0]), match[1]
        if (num, keyword) not in counted_numbers:
            counted_numbers.add((num, keyword))
            total_pax += num

    # Dependency parsing-based extraction (for cases like "seven adults")
    for token in doc:
        if token.dep_ == "nummod" and token.head.text.lower() in pax_keywords:
            num = int(token.text)
            keyword = token.head.text.lower()
            if (num, keyword) not in counted_numbers:
                counted_numbers.add((num, keyword))
                total_pax += num

    # Handle cases where keyword comes first ("group of seven")
    for i, token in enumerate(doc):
        if token.text.lower() in pax_keywords and i < len(doc) - 2:
            # Check if "of" follows and then a valid number
            if doc[i + 1].text.lower() == "of" and (doc[i + 2].text.isdigit() or doc[i + 2].text.lower() in w2n.american_number_system):
                num = w2n.word_to_num(doc[i + 2].text) if not doc[i + 2].text.isdigit() else int(doc[i + 2].text)
                keyword = token.text.lower()
                if (num, keyword) not in counted_numbers:
                    counted_numbers.add((num, keyword))
                    total_pax += num

    # Handle standalone numbers (assume they indicate pax if no explicit pax count was found)
    if total_pax == 0:  # Only apply this fallback if no pax keywords were matched
        standalone_numbers = [int(token.text) for token in doc if token.text.isdigit()]
        if standalone_numbers:
            total_pax = max(standalone_numbers)  # Take the largest number as a reasonable assumption

    return total_pax if total_pax > 0 else None


In [None]:
def load_allergy_detector():

    # List of common food allergens
    food_allergens = [
        "peanuts", "tree nuts", "milk", "eggs", "wheat", "soy", "fish", "shellfish", "gluten", "sesame",
        "dairy"
    ]

    # Create a phrase matcher and add allergy terms
    matcher = PhraseMatcher(nlp.vocab, attr="LOWER")
    patterns = [nlp.make_doc(allergen) for allergen in food_allergens]
    matcher.add("ALLERGY", patterns)

    return nlp, matcher, food_allergens

def extract_allergies(text, nlp, matcher, food_allergens):
    doc = nlp(text)
    allergies = set()

    # Check for keywords indicating no allergies
    no_allergy_keywords = {"none", "nil", "no allergies", "no allergy", "no food allergies"}
    if any(word in no_allergy_keywords for word in doc.text.split()):
        return []

    # Match against predefined allergens
    matches = matcher(doc)
    for match_id, start, end in matches:
        allergies.add(doc[start:end].text)

    # Fuzzy matching for misspellings
    words = [token.text for token in doc]
    for word in words:
        # Handle the case when extractOne returns None
        extract_result = process.extractOne(word, food_allergens, score_cutoff=80)
        if extract_result:  # Check if extract_result is not None
            match, score, _ = extract_result
            if match:
                allergies.add(match)

    if list(allergies) is not None:
        return list(allergies)
    return None

In [None]:
def extract_dietary_restrictions(user_input):
    #Detects dietary restrictions like halal, vegetarian, and vegan from user input with fuzzy matching.

    # Process input text
    doc = nlp(user_input.lower())

    # Define keywords for dietary restrictions
    dietary_keywords = {
        "halal": ["halal"],
        "vegetarian": ["vegetarian", "ovo-vegetarian", "lacto-vegetarian", "lacto-ovo vegetarian"],
        "vegan": ["vegan", "plant-based", "strictly no animal products"]
    }

    detected_restrictions = set()

    # Check for dietary keywords in the input with fuzzy matching
    for token in doc:
        for restriction, keywords in dietary_keywords.items():
            # Check if extractOne returns None before unpacking
            extract_result = process.extractOne(token.text, keywords, score_cutoff=80)
            if extract_result:
                match, score, _ = extract_result
                if match:
                    detected_restrictions.add(restriction)

    # Return detected dietary restrictions
    if detected_restrictions:
        return f"{', '.join(detected_restrictions)}"
    else:
        return "NIL"

In [None]:
def get_customer_name():
  while True:
      get_name = input("Bot: How can we address you?\nYour name: ")
      name = extract_name(get_name)
      while name is not None:
          check = input(f"Bot: Hi {name}. Did I get your name right?\nyes/no: ")
          if check.lower() in ("yes", "y", "yesh", "yep", "ye"):
              return name
          elif check.lower() in ("no", "n", "nope"):
              print("Bot: Oops, let's try that again.")
              break  # Break out of the inner while loop to ask for name again
          else:
              print("Bot: Invalid input. Please enter 'yes' or 'no'.")
              continue  # Continue to the next iteration of the inner while loop
      print("Bot: Invalid input. Please try again")


In [None]:
def get_customer_number():
  while True:
      get_number = input("Bot: How can we contact you?\nYour phone number: ")
      number = extract_sg_phone_number(get_number)
      while number is not None:
          check = input(f"Bot: Got it, your phone number is {number}. Is that correct?\nyes/no: ")
          if check.lower() in ("yes", "y", "yesh", "yep", "ye"):
              return number
          elif check.lower() in ("no", "n", "nope"):
              print("Bot: Oops, let's try that again.")
              break
          else:
              print("Bot: Invalid input. Please enter 'yes' or 'no'.")
              continue
      print("Bot: Invalid input. Please try again")


In [None]:
def get_customer_address():
  while True:
      get_address = input("Bot: Where would you like your food to be delivered to?\nAddress: ")
      address = extract_address(get_address)
      while address is not None:
          check = input(f"Bot: Okay, we will deliver to {address}. Is that correct?\nyes/no: ")
          if check.lower() in ("yes", "y", "yesh", "yep", "ye"):
              return address
          elif check.lower() in ("no", "n", "nope"):

              print("Bot: Oops, let's try that again.")
              break
          else:
              print("Bot: Invalid input. Please enter 'yes' or 'no'.")
              continue
      if address is None:
          print("Bot: Invalid input. Please try again.")


In [None]:
def get_customer_pax():
  while True:
    get_pax = input("Bot: How many people will the catering be for?\nYou: ")
    pax = extract_pax(get_pax)
    while pax is not None:
      check = input(f"Bot: Okay, we will cater for {pax}. Is that correct?\nyes/no: ")
      if check.lower() in ("yes", "y", "yesh", "yep", "ye"):
        return pax
      elif check.lower() in ("no", "n", "nope"):
        print("Bot: Oops, let's try that again.")
        break
      else:
        print("Bot: Invalid input. Please enter 'yes' or 'no'.")
        continue
    print("Bot: Invalid input. Please try again.")


In [None]:
def get_customer_allergy():
    nlp, matcher, food_allergens = load_allergy_detector()

    while True:
        get_allergy = input("Bot: Please enter any food allergies you or your guests have: ")
        allergies = extract_allergies(get_allergy, nlp, matcher, food_allergens)

        # Check if allergies is a list and if it's empty or contains "nil", "none", etc.
        if isinstance(allergies, list) and (not allergies or any(a.lower() in ("nil", "none", "no allergies", "no allergy", "no food allergies") for a in allergies)):
            allergies = "NIL"

        while allergies is not None:
            # Join the elements of the 'allergies' list directly
            check = input(f"Bot: Allergies: {', '.join(allergies) if isinstance(allergies, list) else allergies}. Is that correct?\nyes/no: ")
            if check.lower() in ("yes", "y", "yesh", "yep", "ye"):
                return allergies
            elif check.lower() in ("no", "n", "nope"):
                print("Bot: Oops, let's try that again.")
                break
            else:
                print("Bot: Invalid input. Please try again.")
                continue
        print("Bot: Invalid input. Please try again.")

In [None]:
def get_customer_dietary_restriction():

    while True:
        get_dietary_restriction = input("Bot: Please enter any dietary restrictions you or your guests have: ")
        dietary_restriction = extract_dietary_restrictions(get_dietary_restriction)

        if dietary_restriction.lower() in ("nil", "none", "no restrictions", "no dietary restriction", "no dietary restrictions"):
                dietary_restriction = "NIL"

        while dietary_restriction is not None:
            # Join the elements of the 'dietary_restriction' list directly
            check = input(f"Bot: Dietary Restriction: {dietary_restriction}. Is that correct?\nyes/no: ")  # Modified check statement
            if check.lower() in ("yes", "y", "yesh", "yep", "ye"):
                return dietary_restriction
            elif check.lower() in ("no", "n", "nope"):
                print("Bot: Oops, let's try that again.")
                break
            else:
                print("Bot: Invalid input. Please try again.")
                continue
        print("Bot: Invalid input. Please try again.")

In [None]:
def get_recommend_course(dietary_preferences, allergies):
    if not isinstance(dietary_preferences, list):
        dietary_preferences = [dietary_preferences]

    # Convert each element in the list to lowercase
    dietary_preferences = [restriction.lower() for restriction in dietary_preferences]

    # Check if 'halal' is present in the lowercase dietary preferences after joining the list into a string
    if "halal" in ', '.join(dietary_preferences):
        print("Bot: Rest assured, our entire menu is halal-compliant, so you have nothing to worry about on that front.")

    # Check if both "vegan" and "gluten" conditions are met
    if "vegan" in dietary_preferences and "gluten" in allergies:
        print("Bot: We apologize, but we currently don't have any dishes that are both vegan and gluten-free.")  # Apology message
        return []  # Return an empty list to indicate no recommendations

    if "vegan" in dietary_preferences or "vegetarian" in dietary_preferences:
        return [menu[category]["vegan"] for category in menu]

    if "gluten" in allergies:
        return [menu[category]["gluten_free"] for category in menu]

    return [menu[category]["gluten_free"] for category in menu]

In [None]:
from google.colab import drive
drive.mount('/content/drive')

def save_reservation(data, filename="/content/drive/My Drive/reservations.json"):
    try:
        with open(filename, "r") as file:
            reservations = json.load(file)
    except (FileNotFoundError, json.JSONDecodeError):
        reservations = []

    reservations.append(data)

    with open(filename, "w") as file:
        json.dump(reservations, file, indent=4)

Mounted at /content/drive


In [None]:
def handle_reservation():
    # Guides the user through the reservation process and stores details.
    reservation_data = {}

    print("Bot: Thank you for choosing S.P.A.C.Y Catering! Let's start your reservation.")

    # Step 1: Collect customer details
    reservation_data["Name"] = get_customer_name()

    reservation_data["Address"] = get_customer_address()

    reservation_data["Phone Number"] = get_customer_number()

    reservation_data["Number of Pax"] = get_customer_pax()

    reservation_data["Allergies"] = get_customer_allergy()

    reservation_data["Dietary Restriction"] = get_customer_dietary_restriction()

    reservation_data["Dishes"] = get_recommend_course(reservation_data["Dietary Restriction"], reservation_data["Allergies"])


    if reservation_data["Dishes"]:  # Check if recommendations is not empty
        print("Bot: Based on your preferences, we recommend the following dishes:", ", ".join(reservation_data["Dishes"]) +".")
    else:
        # Return a message indicating no recommendations found
        return "We apologize, but we couldn't find any suitable dishes based on your preferences."

    # Check and convert to a single string if necessary
    if isinstance(reservation_data["Allergies"], list):  # If it's a list
        reservation_data["Allergies"] = ", ".join(reservation_data["Allergies"])

    # Step 4: Confirmation
    print("\nHere is your reservation summary:")
    for key, value in reservation_data.items():
        if key == "Dishes":  # Check if the current key is "dishes"
            print(f"{key}:")  # Print the "dishes" key
            for dish in value:  # Iterate through the list of dishes
                print(f"  - {dish}")  # Print each dish with indentation
        else:
            print(f"{key}: {value}")  # Print other key-value pairs as usual
    confirmation = input("Does everything look correct? (yes/no): ").strip().lower()
    if confirmation.lower() in ("yes", "y", "yesh", "yep", "ye"):
        save_reservation(reservation_data)

        # Return success message
        return "Thank you! Your reservation has been recorded.\nIs there anything else I can help you with?"

    else:
        print("Let's correct your details. Restarting reservation...")
        # Call handle_reservation again to restart the process and assign its return value
        return handle_reservation()



In [None]:
def display_menu():
    gluten_free_dishes = [menu[category]['gluten_free'] for category in menu]
    vegan_dishes = [menu[category]['vegan'] for category in menu]

    gluten_free_menu = f"Our recommended course, which is gluten-free, include {', '.join(gluten_free_dishes)}."
    vegan_menu = f"For vegan course, we have {', '.join(vegan_dishes)}."

    return f"{gluten_free_menu}\n{vegan_menu}"

In [None]:

# menu with more information on all six dishes
menu_info = {
    "Avocado Rolls": {
        "ingredient": ["Avocado", "Seaweed", "Sushi Rice", "Sesame Seeds"],
        "prepare": "Avocado Rolls are made by rolling avocado slices and sushi rice in seaweed, then cutting into pieces.",
        "allergen": ["Sesame"]
    },
    "Salmon Rolls": {
        "ingredient": ["Salmon", "Seaweed", "Sushi Rice", "Soy Sauce"],
        "prepare": "Salmon Rolls are made by rolling fresh salmon slices with sushi rice and seaweed, served with soy sauce.",
        "allergen": ["Fish", "Soy"]
    },
    "Spicy Garlic Fried Rice": {
        "ingredient": ["Rice", "Garlic", "Chili", "Soy Sauce", "Vegetables"],
        "prepare": "Spicy Garlic Fried Rice is stir-fried with aromatic garlic, chili, and soy sauce, served with fresh vegetables.",
        "allergen": ["Soy"]
    },
    "Teriyaki Chicken Rice": {
        "ingredient": ["Chicken", "Teriyaki Sauce", "Rice", "Spring Onions"],
        "prepare": "Grilled chicken glazed with teriyaki sauce, served over steamed rice.",
        "allergen": ["Soy", "Gluten"]
    },
    "Classic Beef Pho": {
        "ingredient": ["Beef", "Rice Noodles", "Beef Broth", "Herbs"],
        "prepare": "Classic Beef Pho is made by simmering beef bones to create a flavorful broth, served with rice noodles and fresh herbs.",
        "allergen": []
    },
    "Sesame Glass Noodles": {
        "ingredient": ["Glass Noodles", "Sesame Oil", "Soy Sauce", "Vegetables"],
        "prepare": "Sesame Glass Noodles are tossed with sesame oil, soy sauce, and fresh vegetables, making a light and flavorful dish.",
        "allergen": ["Sesame", "Soy"]
    }
}



In [None]:
# Load BERT-based models for sentiment analysis and masked token prediction
classifier = pipeline("text-classification", model="nlptown/bert-base-multilingual-uncased-sentiment")
bert_nlp = pipeline("fill-mask", model="bert-base-uncased")

# Load a transformer-based named entity recognition model
ner_pipeline = pipeline("ner", model="dslim/bert-base-NER")

# Load a zero-shot classification model to detect inquiry type
inquiry_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

inquiry_labels = ["prepare", "ingredient", "allergen"]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/953 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/669M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/39.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/872k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Device set to use cpu


config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

BertForMaskedLM has generative capabilities, as `prepare_inputs_for_generation` is explicitly overwritten. However, it doesn't directly inherit from `GenerationMixin`. From 👉v4.50👈 onwards, `PreTrainedModel` will NOT inherit from `GenerationMixin`, and this model will lose the ability to call `generate` and other related functions.
  - If you are the owner of the model architecture code, please modify your model class such that it inherits from `GenerationMixin` (after `PreTrainedModel`, otherwise you'll get an exception).
  - If you are not the owner of the model architecture class, please contact the model code owner to update it.
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another archite

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Device set to use cpu


config.json:   0%|          | 0.00/829 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/433M [00:00<?, ?B/s]

Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tokenizer_config.json:   0%|          | 0.00/59.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Device set to use cpu


config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cpu


In [None]:
def extract_dish_name(user_input):
    #Identifies the most relevant menu item from user input using spaCy and rapidfuzz.

    doc = nlp(user_input)
    stopwords = {"prepare", "ingredient", "allergen", "allergy", "recipe", "contain", "make", "with", "use"}

    # Filter out stopwords and extract meaningful words (nouns, proper nouns)
    keywords = [token.lemma_.lower() for token in doc if token.pos_ in ["NOUN", "PROPN"] and token.lemma_.lower() not in stopwords]
    user_keywords = " ".join(keywords)  # Convert to a single string for fuzzy matching

    # Get menu item names
    menu_items = list(menu_info.keys())

    # Use rapidfuzz to find the best match
    match_result = process.extractOne(user_keywords, menu_items, score_cutoff=50)

    # Step 5: Return match if found
    return match_result[0] if match_result else None


In [None]:
def extract_inquiry_type(user_input):
    # Classify the inquiry type using BART's zero-shot classification
    inquiry_result = inquiry_classifier(user_input, inquiry_labels)
    inquiry_type = inquiry_result["labels"][0] if inquiry_result["scores"][0] > 0.6 else None
    # print(inquiry_type) ## debugging print
    return inquiry_type


def get_dish_info(user_input):

    dish_name = extract_dish_name(user_input)
    info_type = extract_inquiry_type(user_input)

    # Retrieves the appropriate response for a given dish and requested information.
    dish_name = dish_name.title()  # Normalize capitalization
    if dish_name is not None:  # Check if dish_name is not None before proceeding
        dish_name = dish_name.title()  # Normalize capitalization
        if dish_name in menu_info:
            if info_type in menu_info[dish_name]:
                response_data = menu_info[dish_name][info_type]
                if isinstance(response_data, list):
                    response_data = ", ".join(response_data)
                if info_type == "prepare":
                  info_type = "preparation"
                if info_type == "ingredient":
                  info_type = "ingredients"
                if info_type == "allergen":
                  info_type = "allergens"
                return f"Here is the {info_type} for {dish_name}: \n{response_data}"
            else:
                return f"I'm sorry, but I don't have information on {info_type} for {dish_name}."
    return "I'm sorry, I couldn't find that dish in our menu. Could you check the spelling or ask about another dish?"


In [None]:
# Load pre-trained BART model
intent_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# Expanded list of intents
intents = ["greeting", "menu", "price", "reservation", "feedback", "farewell", "preparation", "ingredients", "allergens"]

def classify_intent(user_input):
    result = intent_classifier(user_input, intents)
    predicted_intent = result["labels"][0]
    return predicted_intent

Device set to use cpu


In [None]:
tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

nlp2 = spacy.blank("en")
ruler = nlp2.add_pipe("entity_ruler")

food_list = ["sushi", "rice", "noodle", "noodles"]
patterns = [{"label": "FOOD", "pattern": food} for food in food_list]
ruler.add_patterns(patterns)

nlp_full = spacy.load("en_core_web_sm")

# Sentiment Analysis function
def sentiment(text):
    tokens = tokenizer.encode(text, return_tensors='pt')
    result = model(tokens)
    return int(torch.argmax(result.logits))+1

# Improved Lemmatization Function
def lemmatize_text(text):
    doc = nlp_full(text)
    return " ".join([token.lemma_.lower() for token in doc])

# Food Detection Function
def collect_feedback(text):
    lemmatized_text = lemmatize_text(text)
    doc = nlp2(lemmatized_text)
    food_found = False
    for ent in doc.ents:
        if ent.text in food_list:
            food_found = True
            break

    if food_found:
        return sentiment(lemmatized_text)
    else:
        return "We do not serve that kind of food"


In [None]:
def handle_feedback(feedback):
    # Handles customer feedback with a 1-5 rating scale.

    rating = collect_feedback(feedback)  # Function to collect customer feedback
    # print(rating) ## Debugging print

    while True:
        try:
            if rating not in [1, 2, 3, 4, 5]:
                raise ValueError
            break
        except ValueError:
            return "An error occured while processing your feedback. Please try again."

    if rating in [4, 5]:  # Positive Feedback
        print(random.choice([
            "Bot: Thank you for your kind words! We're glad you enjoyed it!",
            "Bot: Awesome! We appreciate your feedback!",
            "Bot: Thanks for your support!"
        ]))

    elif rating == 3:  # Neutral Feedback
        print(random.choice([
            "Bot: Thanks for your feedback! How can we improve?",
            "Bot: We appreciate your response! Anything we can do better?",
            "Bot: Noted! Let us know if there's anything we can improve."
        ]))

    else:  # Negative Feedback (1 or 2)
        print(random.choice([
            "Bot: We're sorry to hear that. Can you share what went wrong?",
            "Bot: Oh no! We'd love to make things right. Could you tell us more?",
            "Bot: We apologize for any inconvenience. Would you like to speak to our support team? (Call 98765432)"
        ]))
        additional_feedback = input("You: ")
        print("Bot: Thank you for your feedback. We’ll work on improving this!")

    # Store feedback in a log file
    with open("feedback_log.txt", "a") as file:
        file.write(f"Rating: {rating} | Feedback: {feedback}\n")

    print("Your feedback has been recorded.")
    return "Is there anything else I can help you with?"




In [None]:
def chatbot_main_loop():
    # Main loop for the catering chatbot.
    print("Welcome to S.P.A.C.Y Catering, how can I assist you today? \nFeel free to ask about our menu, pricing, or place a reservation!")

    while True:
        user_input = input("You: ").strip().lower()

        # Simulated intent classification using BERT (Assume intent detection is accurate)
        intent = classify_intent(user_input)  # Function to classify intent using BERT
        #print(f"Intent: {intent}") ## Debugging print

        if intent == "greeting": # Function to handle greetings
            bot_response = random.choice(["Hello! How can I assist you today?", "Hi there! Need help with anything?", "Welcome! How may I help?"])

        elif intent == "menu":
            bot_response =  display_menu()  # Function to display menu items

        elif intent == "price":
            price = "30"
            bot_response =  f"Our pricing is SGD${price} per pax for any course on the menu."

        elif intent == "reservation":
            bot_response =  handle_reservation()  # Function to handle reservation process

        elif intent == "preparation" or intent == "ingredients" or intent == "allergens":
            bot_response =  get_dish_info(user_input)

        elif intent == "feedback":  # Function to handle feedbacks
            print("Bot: Would you like to share more details about your experience?")
            feedback = input("You: ")
            bot_response =  handle_feedback(feedback)

        elif intent == "farewell": # Function to exit chatbot
            bot_response = random.choice(["Goodbye! Have a great day!", "See you next time!", "Thanks for visiting us!"])
            print(f"Bot: {bot_response}")
            break

        else:
            bot_response = "I'm sorry, I didn't understand that. Can you please rephrase?"

        print(f"Bot: {bot_response}")

    print("Chatbot session ended.")




In [None]:
chatbot_main_loop()

Welcome to S.P.A.C.Y Catering, how can I assist you today? 
Feel free to ask about our menu, pricing, or place a reservation!
You: may i see the menu
Bot: Our recommended course, which is gluten-free, include Salmon Rolls, Teriyaki Chicken Rice, Classic Beef Pho.
For vegan course, we have Avocado Rolls, Spicy Garlic Fried Rice, Sesame Glass Noodles.
You: quit
Bot: See you next time!
Chatbot session ended.
