In [100]:
import re
import json
import requests
import numpy as np
import pandas as pd
from openai import OpenAI
from pinecone import Pinecone
from langchain_openai import OpenAIEmbeddings
from langchain_core.documents import Document
from langchain_pinecone import PineconeVectorStore


In [4]:
types = '''acai_shop *
afghani_restaurant *
african_restaurant *
american_restaurant
asian_restaurant *
bagel_shop *
bakery
bar
bar_and_grill *
barbecue_restaurant
brazilian_restaurant
breakfast_restaurant
brunch_restaurant
buffet_restaurant *
cafe
cafeteria *
candy_store *
cat_cafe *
chinese_restaurant
chocolate_factory *
chocolate_shop *
coffee_shop
confectionery *
deli *
dessert_restaurant *
dessert_shop *
diner *
dog_cafe *
donut_shop *
fast_food_restaurant
fine_dining_restaurant *
food_court *
french_restaurant
greek_restaurant
hamburger_restaurant
ice_cream_shop
indian_restaurant
indonesian_restaurant
italian_restaurant
japanese_restaurant
juice_shop *
korean_restaurant *
lebanese_restaurant
meal_delivery
meal_takeaway
mediterranean_restaurant
mexican_restaurant
middle_eastern_restaurant
pizza_restaurant
pub *
ramen_restaurant
restaurant
sandwich_shop
seafood_restaurant
spanish_restaurant
steak_house
sushi_restaurant
tea_house *
thai_restaurant
turkish_restaurant
vegan_restaurant
vegetarian_restaurant
vietnamese_restaurant
wine_bar *'''

# construct list from provided Google Restaurant location subtypes
primary_types = []
types = types.replace(' *', '')
primary_types = types.split()

In [None]:
# set variables for storage and error flagging
master_data = {}
error = False

# location coordinates // current Dallas, TX
lat = 32.7949
long = -96.79929

# customizable Google Field Mask list
field_masks = ['displayName', 'formattedAddress', 'id', 'regularOpeningHours', 'priceLevel', 'websiteUri']
master_field_mask = ','.join(f'places.{i}' for i in field_masks)

# Google Places Nearby Search (NEW) API URL
post_test_url = 'https://places.googleapis.com/v1/places:searchNearby'

# Google Places Nearby Search (NEW) API Headers
post_headers = {
    'Content-Type': 'application/json',
    'X-Goog-Api-Key': API_KEY,
    'X-Goog-FieldMask': master_field_mask,

}
i = 0
for primary_type in primary_types:
    # fill in primary type with new type from master list

    # Google Places Nearby Search (NEW) API Body
    post_body = {
        "includedPrimaryTypes": [primary_type],
        'maxResultCount': 20,
        "rankPreference": "POPULARITY",
        "locationRestriction": {
            "circle": {
                "center": {
                    "latitude": lat,
                    "longitude": long
                    },
                "radius": 16903.0 # about 10 miles
            }
        }
    }

    # insert the new data to the master data dictionary
    post_response = requests.post(post_test_url, headers=post_headers, json=post_body)
    print(f'{i}:', post_response)
    # break from loop if unsuccessful API call
    if post_response.status_code != 200:
        error = True
        break
    try:
        # set data into master data dictionary
        new_data = post_response.json()['places']
        master_data[primary_type] = new_data
    # exception for empty API return for primaryPlaceType
    except KeyError:
        master_data[primary_type] = 'N/A'
    i += 1

# if all data has been successfully received then output the data to a JSON for storage
if not error:
    with open('data.json', 'w') as output:
        json.dump(master_data, output, indent=4)

In [5]:
# read in data from storage JSON file
with open('data.json', 'r') as fp:
    data = json.load(fp)


In [97]:
# determine different levels of pricing set by Google
plevels = set()
for ptype in data:
    for place in data[ptype]:
        if 'priceLevel' in place:
            plevels.add(place['priceLevel'])


print(plevels)

{'PRICE_LEVEL_VERY_EXPENSIVE', 'PRICE_LEVEL_INEXPENSIVE', 'PRICE_LEVEL_EXPENSIVE', 'PRICE_LEVEL_MODERATE'}


In [6]:
# create empty dataframe for storage
df = pd.DataFrame(columns=['id', 'displayName', 'primaryType', 'websiteUri', 'formattedAddress', 'priceLevel', 'reviews', 'editorialSummary', 'rating'])

for prim_type in data:
    if type(data[prim_type]) == list:
        # set default value for optional returned parameters
        websiteUri, formattedAddress, priceLevel = np.nan, np.nan, np.nan
        # extract data with some formatting
        for place in data[prim_type]:
            if 'websiteUri' in place:
                websiteUri = place['websiteUri']
            if 'formattedAddress'  in place:
                formattedAddress = place['formattedAddress']
            if 'priceLevel' in place:
                priceLevel = ' '.join(place['priceLevel'][12:].split('_')).lower()
            # add new row to end of dataframe
            df.loc[len(df)] = [place['id'], place['displayName']['text'].title(), ' '.join(prim_type.split('_')), websiteUri, formattedAddress, priceLevel, np.nan, np.nan, np.nan]
            

In [24]:
# Google Places Details (new) API URI
details_url = 'https://places.googleapis.com/v1/places/'

# Google Places Details (new) API headers
details_headers = {
    'Content-Type': 'application/json',
    'X-Goog-Api-Key': API_KEY,
    'X-Goog-FieldMask': 'rating,reviews,editorialSummary'
}

# use Google Places  Details (NEW) API to get customer reviews, ratings, and the editorial summary for each location
def get_details(row):
    res = requests.get(details_url+row['id'], headers=details_headers)
    temp = res.json()
    if temp:
        if 'reviews' in temp:
            row['reviews'] = temp['reviews']
        if 'rating' in temp:
            row['rating'] = temp['rating']
        if 'editorialSummary' in temp:
            row['editorialSummary'] = temp['editorialSummary']['text']
    return row

In [None]:
# retrieve and add data from Google Places Details API
df = df.apply(get_details, axis=1)

In [72]:
# parse the reviews from dictionary to string
def format_reviews(row):
    if type(row['reviews']) == list:
        hold = []
        for rev in row['reviews']:
            if 'text' in rev:
                if 'text' in rev['text']:
                    review = rev['text']['text']
                    review = review.replace('\n', ' ')
                    review = review.replace('\t', ' ')
                    hold.append(review)
        row['reviews'] = '; '.join(hold)

    return row


# format the reviews from list/dictionary to contiguous string
df = df.apply(format_reviews, axis=1)
# remove newline and tab characters from reviews to place them all on one row
df['reviews'] = df['reviews'].map(lambda r: r.replace('\n', ' ').replace('\t', ' ') if type(r) == str else r)



In [75]:
# write dataframe to csv file for storage
df.to_csv('data.csv', index=False, sep='|')

In [78]:
# prompts for easier refernce
prompts = [' is a ', ' It is located at ', ' Prices are typically ', ' Here are what people have to say about ', 'It is rated ', ' out of 5 stars overall.']

with open('data.csv', 'r', encoding='UTF-8') as infile, open('data.txt', 'w', encoding='UTF-8') as outfile:
    for place in infile:
        # begin new string and parse incoming string
        place_details = ''
        place = place.strip()
        place = place.split('|')

        # construct string
        # name and type
        place_details += place[1] + prompts[0] + place[2] + '.'
        # optional editorial summary
        if place[7]:
            place_details += ' Described as ' + place[7]
        # address
        place_details += prompts[1] + place[4] + '.'
        # optional price level
        if place[5]:
            place_details += prompts[2] + place[5] + '.'
        # optional reviews
        if place[6]:
            place_details += prompts[3] + place[1] + ': ' + place[6]
        # optional rating from Google
        if place[8]:
            place_details += prompts[4] + place[8] + prompts[5]

        # write the line the data file
        outfile.write(place_details + '\n')


In [81]:
pc = Pinecone(api_key=PINECONE_API_KEY)
index = pc.Index(PINECONE_INDEX)

embeddings = OpenAIEmbeddings(model="text-embedding-3-large", api_key=OPENAI_API_KEY)

vector_store = PineconeVectorStore(index=index, embedding=embeddings)

In [86]:
docs = []
with open('data.txt', 'r', encoding='UTF-8') as fp:
    for line in fp:
        line = line.strip()
        doc = Document(
            page_content=line,
            metadata={"source": "Google Maps Places and Reviews"}
        )
        docs.append(doc)



In [None]:
vector_store.add_documents(documents=docs)

In [94]:
# res = vector_store.similarity_search_by_vector_with_score(
#     "I want to eat at the best wine bar in Dallas. I particularly love red wine.",
#     k=3
# )

retriever = vector_store.as_retriever(
        search_type='similarity',
        search_kwargs={'k': 3}
    )


In [101]:
user_input = "What is the best wine bar in Dallas? In particular, I want a wine bar known for its red wine"
res = retriever.invoke(user_input)

In [102]:
rag_context = ', '.join([f"PLACE {i+1}: {place.page_content}" for i, place in enumerate(res)])
rag_context

"PLACE 1: Bodega Wine Bar is a wine bar. Described as Cozy hangout with a fireplace serving wine by the glass, bottle & flight, plus cheese & charcuterie. It is located at 6434 E Mockingbird Ln # 109, Dallas, TX 75214, USA. Prices are typically moderate. Here are what people have to say about Bodega Wine Bar: Best Wine bar in Dallas! The wine selection is amazing, everyone is very educated and friendly!; If DDD were looking for wine bars this would be the winner. This place is all about the glass. The staff will help with a selection from their extensive inventory if you need it. Don't forget to find a few bottles for the house.; Very friendly, helpful, and knowledgeable staff. Great selection of wine from all over the world. Relaxing atmosphere. They have your typical snacks that pair with wine but allow outside food.; Favorite wine bar in DFW! Amier (owner) is insanely knowledgeable and is always willing to share that knowledge with his patrons. Whether you're buying a bottle to go o

In [103]:
llm = OpenAI(api_key=OPENAI_API_KEY)

dev_prompt = '''
    You are a helpful restaurant recommender. You use the data provided in this query to provide recommendations for the user. If the user does not specify how many recommendations they want: provide one or two recommendations.
    Keep the recommendations friendly but concise. Use the data to address the parts of the users' questions. Use the following data to generate a concise and accurate repsonse to the users questions or comments: 
'''

completion = llm.chat.completions.create(
    model='gpt-4o-mini',
    messages=[
        {'role': 'developer', 'content': dev_prompt + rag_context},
        {'role': 'user', 'content': user_input}
    ]
)

print(completion.choices[0].message)

ChatCompletionMessage(content="For a top-notch wine bar in Dallas, I recommend **Bodega Wine Bar**. It's often hailed as the best wine bar in the area, with an amazing selection of wines, including reds from all over the world. The staff is praised for their knowledge and friendliness, making it a great place to explore different varietals. Plus, the cozy atmosphere adds to the overall experience.\n\nAnother excellent option is **Barons Creek Vineyards Tasting Room Dallas**, known for its fantastic wines and a welcoming atmosphere. They offer a variety of reds and have received high praise for both the wine and the service.\n\nBoth places would be a great choice for red wine enthusiasts!", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None)
