# Yelp Chatbot

In [1]:
from IPython.display import Image, display, HTML

# URL de la imagen
image_url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/a/ad/Yelp_Logo.svg/1200px-Yelp_Logo.svg.png'

# HTML para mostrar la imagen con tamaño ajustado
html_code = f'<img src="{image_url}" alt="Yelp" width="500"/>'

# Mostrar la imagen usando HTML
display(HTML(html_code))

### Importing libraries

In [2]:
import streamlit as st
import pandas as pd
from chromadb.api.types import Document
from chromadb.utils import embedding_functions
from chromadb import Client
import os
import re
from typing import List
from openai import OpenAI
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

### Downloading NLTK Resources

In [3]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/erwinminor/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/erwinminor/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/erwinminor/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

### OpenAI API Key

### Reading data and sampling 10,000 for the RAG

In [5]:
df = pd.read_csv('/Users/erwinminor/Desktop/AI/data2.csv')

In [6]:
df = df.sample(10000)
df

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,review_count,categories,hours,tips,reviews,stars
65045,cD_If67mFKSEKFE1BZgJCA,Trolley Car Ice Cream Shoppe,7619 Germantown Ave,Philadelphia,PA,19119,40.065664,-75.195612,20,"Ice Cream & Frozen Yogurt, Desserts, Food","{'Monday': '12:0-22:0', 'Tuesday': '12:0-22:0'...",Gelati with root beer ice and vanilla ice crea...,"I haven't tried their water ice, however, I ha...",3.761905
48444,jYAol1aB_ZC8JhKMljG9zw,Christine's House of Kingfish Barbecue,926 Rt 206,Shamong,NJ,08088,39.767384,-74.738348,59,"Southern, Barbeque, Restaurants","{'Friday': '12:0-18:30', 'Saturday': '12:0-19:...",The crew were very friendly and inviting. The ...,AMAZING! I'm from the south (Western NC) and t...,4.531250
35384,NNab8Ympt7XaVWGWjqoykg,Every Body Massage,6654 Chippewa St,St. Louis,MO,63109,38.590182,-90.306076,26,"Massage Therapy, Hair Removal, Health & Medica...","{'Monday': '0:0-0:0', 'Tuesday': '9:0-21:0', '...",,I'm always hesitant to leave poor reviews for ...,4.846154
104156,LxBB61ONDmFoWSLBQZWBIA,O'Reilly Auto Parts,232 Los Altos Parkway,Sparks,NV,89436,39.584389,-119.740724,20,"Automotive, Auto Parts & Supplies","{'Monday': '7:30-21:0', 'Tuesday': '7:30-21:0'...",,Cant give enough kudos to Ben. He treated me i...,3.850000
68395,fNd-tOJGusNDcT0Kqzikaw,Indian Hut,1253 Churchmans Rd,Newark,DE,19713,39.691200,-75.661555,22,"Indian, Restaurants","{'Monday': '11:30-21:30', 'Tuesday': '11:30-21...",Not recommended,I love this place!! Their food tastes like aut...,2.727273
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
130846,pBP6Vxhx1PSH2TftigpqtQ,Core Wellness and Chiropractic,100-111 Broadway Boulevard,Sherwood Park,AB,T8H 2A8,53.544253,-113.312416,5,"Acupuncture, Beauty & Spas, Chiropractors, Hea...","{'Monday': '7:30-18:30', 'Tuesday': '7:30-18:0...",These guys crack me up!,I've been going to see Dr. Dustin at this clin...,5.000000
20633,lznQEqIirjbjIMHkJZuS8A,Calogero's,608 Iberville St,New Orleans,LA,70130,29.953699,-90.067618,10,"Bars, Nightlife","{'Monday': '12:0-6:0', 'Tuesday': '12:0-6:0', ...",Great happy hour!!! \nMonday double well drink...,Cheese and Crackers!! These guys make the bes...,4.600000
117702,fm5tEBP8q9COf360VdcxZg,The Detail Shop Reno,199 E Moana Ln,Reno,NV,89502,39.492596,-119.794103,7,"Auto Detailing, Automotive, Car Wash","{'Monday': '9:0-17:0', 'Tuesday': '9:0-17:0', ...",,Did an amazing job on my suv. They were respon...,5.000000
45441,FZm2oRJ_pka6u2KPcVEBkw,Burger King,3701 34th St N,Saint Petersburg,FL,33713,27.805841,-82.679086,9,"Restaurants, Fast Food, Burgers","{'Monday': '5:0-0:0', 'Tuesday': '5:0-0:0', 'W...",Sketchy burger king What a waste of time. Fast...,"It's a Burger King, nothing special. Food coul...",2.666667


### DataSource Class

In [7]:
class DataSource:
    def __init__(self, data):
        self.data = data

    def preprocess_text_advanced(self, text: str) -> str:
        text = text.lower()
        text = re.sub(r'[^a-zA-Z\s]', '', text)
        tokens = nltk.word_tokenize(text)
        stop_words = set(stopwords.words('english'))
        tokens = [token for token in tokens if token not in stop_words]
        lemmatizer = WordNetLemmatizer()
        tokens = [lemmatizer.lemmatize(token) for token in tokens]
        return ' '.join(tokens)

    def process_data(self):
        self.processed_data = [self.preprocess_text_advanced(sentence) for sentence in self.data]

### UserQuery Class

In [8]:
class UserQuery:
    def __init__(self, query: str):
        self.query = query

### LLM Class

In [9]:
class LLM:
    def __init__(self, api_key: str = None):
        self.api_key = api_key
        self.client = OpenAI(api_key=self.api_key) if self.api_key else None

    def answer_query(self, query: str, context: str) -> str:
        prompt = f"Based on the provided information about the business, answer the following user query: {query}\n\nContext:\n{context}"
        if self.api_key:
            chat_completion = self.client.chat.completions.create(
                messages=[
                    {"role": "system", "content": prompt},
                    {"role": "user", "content": query},
                ],
                model="gpt-3.5-turbo",
            )
            return chat_completion.choices[0].message.content.strip()
        else:
            return "API key is missing."

### RetrievalAndRanking Class using Chroma

In [10]:
class RetrievalAndRanking:
    def __init__(self, data_source: DataSource, document_store: Client):
        self.data_source = data_source
        self.document_store = document_store

    def retrieve_relevant_chunks_chroma(self, query: str, top_n: int = 2) -> List[str]:
        results = self.document_store.query(
            query_texts=[query],
            n_results=top_n,
        )

        relevant_chunks = [result[0] for result in results['documents'][0]]

        return relevant_chunks

### Process the Query

In [11]:
def process_query(query: str, data_source: DataSource, retrieval_and_ranking: RetrievalAndRanking, llm: LLM) -> str:
    user_query = UserQuery(query)
    business_name = extract_business_name(user_query.query)
    
    if business_name:
        results = retrieval_and_ranking.document_store.get(
            where={"name": business_name},
            include=['metadatas']
        )
        
        if results and results['metadatas']:
            metadata = results['metadatas'][0]
            context = f"Business Name: {metadata.get('name', 'N/A')}\n"
            context += f"Address: {metadata.get('address', 'N/A')}, {metadata.get('city', 'N/A')}, {metadata.get('state', 'N/A')} {metadata.get('postal_code', 'N/A')}\n"
            context += f"Categories: {metadata.get('categories', 'N/A')}\n"
            context += f"Hours: {metadata.get('hours', 'N/A')}\n"
            context += f"Stars: {metadata.get('stars', 'N/A')}\n"
            context += f"Tips: {metadata.get('tips', 'N/A')}\n"
            answer = llm.answer_query(user_query.query, context)
            return answer
    
    relevant_chunks = retrieval_and_ranking.retrieve_relevant_chunks_chroma(user_query.query)
    context = "\n".join(relevant_chunks)
    answer = llm.answer_query(user_query.query, context)
    return answer

In [12]:
def extract_business_name(query: str) -> str:
    if '"' in query:
        start_index = query.find('"') + 1
        end_index = query.find('"', start_index)
        if start_index > 0 and end_index > start_index:
            return query[start_index:end_index]
    return ""

### Main Function

In [13]:
def main(data_source: DataSource, document_store: Client, model_choice: str = "openai", model_name: str = None, device: str = "cpu"):
    retrieval_and_ranking = RetrievalAndRanking(data_source, document_store)

    if model_choice == "openai":
        llm = LLM(api_key=os.environ.get("OPENAI_API_KEY"))
    else:
        print('Coming soon')

    while True:
        user_input = input("Enter your query (or type 'exit' to quit): ")
        if user_input.lower() == 'exit':
            break

        answer = process_query(user_input, data_source, retrieval_and_ranking, llm)
        print(f"User Query: {user_input}")
        print(f"Answer: {answer}\n")

### Preparing Data and Documents using Chroma 

In [14]:
data_source = DataSource(df['reviews'].tolist())
data_source.process_data()


chroma_client = Client()
try:
    document_store = chroma_client.get_collection("my_collection")
    chroma_client.delete_collection("my_collection")
except ValueError:
    pass

document_store = chroma_client.get_or_create_collection("my_collection")


documents = []
ids = []
metadatas = []
for _, row in df.iterrows():
    meta = {
        'name': row['name'],
        'address': row['address'],
        'city': row['city'],
        'state': row['state'],
        'postal_code': row['postal_code'],
        'categories': row['categories'],
        'hours': row['hours'],
        'stars': row['stars'],
        'tips': row['tips']
    }
    document = str(row['reviews'])
    documents.append(document)
    ids.append(str(row['business_id']))
    metadatas.append(meta)

embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
embeddings = embedding_function(documents)
document_store.add(
    ids=ids,
    documents=documents,
    metadatas=metadatas,
    embeddings=embeddings
)



### Try

Possible queries:

- What are the hours of "..."?
- Where is located "..."?
- What is the sentiment in the reviews of "..."?
- Tell me some tips for "..."
- What is the rating of "..."?
- What are the categories of "..."?

In [19]:
main(data_source, document_store)

Enter your query (or type 'exit' to quit):  What are the hours of "Trolley Car Ice Cream Shoppe"?


User Query: What are the hours of "Trolley Car Ice Cream Shoppe"?
Answer: The hours of "Trolley Car Ice Cream Shoppe" are as follows:
- Monday: 12:00 PM - 10:00 PM
- Tuesday: 12:00 PM - 10:00 PM
- Wednesday: 12:00 PM - 10:00 PM
- Thursday: 12:00 PM - 10:00 PM
- Friday: 12:00 PM - 10:00 PM
- Saturday: 12:00 PM - 10:00 PM
- Sunday: 12:00 PM - 10:00 PM



Enter your query (or type 'exit' to quit):  Where is located "Core Wellness and Chiropractic"?


User Query: Where is located "Core Wellness and Chiropractic"?
Answer: "Core Wellness and Chiropractic" is located at 100-111 Broadway Boulevard, Sherwood Park, AB T8H 2A8.



Enter your query (or type 'exit' to quit):  What is the sentiment in the reviews of "Core Wellness and Chiropractic"?


User Query: What is the sentiment in the reviews of "Core Wellness and Chiropractic"?
Answer: The sentiment in the review provided for "Core Wellness and Chiropractic" is positive. The reviewer mentioned, "These guys crack me up!" which suggests that they had a pleasant and enjoyable experience at the business.



Enter your query (or type 'exit' to quit):  What is the rating of "Every Body Massage"?


User Query: What is the rating of "Every Body Massage"?
Answer: The rating of "Every Body Massage" is 4.85 out of 5 stars.



Enter your query (or type 'exit' to quit):  Give me 5 businesses with a rating of 5 stars


User Query: Give me 5 businesses with a rating of 5 stars
Answer: Here are 5 businesses with a rating of 5 stars:

1. The Blossom Cafe
2. Summit Spa Retreat
3. Green Thumb Garden Center
4. Ocean Breeze Yoga Studio
5. Rocky Mountain Adventures Tour Co.



Enter your query (or type 'exit' to quit):  What is the rating of "Ocean Breeze Yoga Studio"?


User Query: What is the rating of "Ocean Breeze Yoga Studio"?
Answer: Based on the information provided, the rating of "Ocean Breeze Yoga Studio" is 4.5 stars on both Google and Yelp.



Enter your query (or type 'exit' to quit):  exit


### Print data for queries

In [15]:
df

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,review_count,categories,hours,tips,reviews,stars
65045,cD_If67mFKSEKFE1BZgJCA,Trolley Car Ice Cream Shoppe,7619 Germantown Ave,Philadelphia,PA,19119,40.065664,-75.195612,20,"Ice Cream & Frozen Yogurt, Desserts, Food","{'Monday': '12:0-22:0', 'Tuesday': '12:0-22:0'...",Gelati with root beer ice and vanilla ice crea...,"I haven't tried their water ice, however, I ha...",3.761905
48444,jYAol1aB_ZC8JhKMljG9zw,Christine's House of Kingfish Barbecue,926 Rt 206,Shamong,NJ,08088,39.767384,-74.738348,59,"Southern, Barbeque, Restaurants","{'Friday': '12:0-18:30', 'Saturday': '12:0-19:...",The crew were very friendly and inviting. The ...,AMAZING! I'm from the south (Western NC) and t...,4.531250
35384,NNab8Ympt7XaVWGWjqoykg,Every Body Massage,6654 Chippewa St,St. Louis,MO,63109,38.590182,-90.306076,26,"Massage Therapy, Hair Removal, Health & Medica...","{'Monday': '0:0-0:0', 'Tuesday': '9:0-21:0', '...",,I'm always hesitant to leave poor reviews for ...,4.846154
104156,LxBB61ONDmFoWSLBQZWBIA,O'Reilly Auto Parts,232 Los Altos Parkway,Sparks,NV,89436,39.584389,-119.740724,20,"Automotive, Auto Parts & Supplies","{'Monday': '7:30-21:0', 'Tuesday': '7:30-21:0'...",,Cant give enough kudos to Ben. He treated me i...,3.850000
68395,fNd-tOJGusNDcT0Kqzikaw,Indian Hut,1253 Churchmans Rd,Newark,DE,19713,39.691200,-75.661555,22,"Indian, Restaurants","{'Monday': '11:30-21:30', 'Tuesday': '11:30-21...",Not recommended,I love this place!! Their food tastes like aut...,2.727273
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
130846,pBP6Vxhx1PSH2TftigpqtQ,Core Wellness and Chiropractic,100-111 Broadway Boulevard,Sherwood Park,AB,T8H 2A8,53.544253,-113.312416,5,"Acupuncture, Beauty & Spas, Chiropractors, Hea...","{'Monday': '7:30-18:30', 'Tuesday': '7:30-18:0...",These guys crack me up!,I've been going to see Dr. Dustin at this clin...,5.000000
20633,lznQEqIirjbjIMHkJZuS8A,Calogero's,608 Iberville St,New Orleans,LA,70130,29.953699,-90.067618,10,"Bars, Nightlife","{'Monday': '12:0-6:0', 'Tuesday': '12:0-6:0', ...",Great happy hour!!! \nMonday double well drink...,Cheese and Crackers!! These guys make the bes...,4.600000
117702,fm5tEBP8q9COf360VdcxZg,The Detail Shop Reno,199 E Moana Ln,Reno,NV,89502,39.492596,-119.794103,7,"Auto Detailing, Automotive, Car Wash","{'Monday': '9:0-17:0', 'Tuesday': '9:0-17:0', ...",,Did an amazing job on my suv. They were respon...,5.000000
45441,FZm2oRJ_pka6u2KPcVEBkw,Burger King,3701 34th St N,Saint Petersburg,FL,33713,27.805841,-82.679086,9,"Restaurants, Fast Food, Burgers","{'Monday': '5:0-0:0', 'Tuesday': '5:0-0:0', 'W...",Sketchy burger king What a waste of time. Fast...,"It's a Burger King, nothing special. Food coul...",2.666667
