<h2>Wrapper combining OpenAI, Melissa, Yelp</h2>

In [1]:
pip install xmltodict openai

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 23.3.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
pip install python-dotenv

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 23.3.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import urllib.request, urllib.parse, os, json, xmltodict, asyncio, requests
from openai import AsyncOpenAI
from dotenv import load_dotenv
load_dotenv()


True

In [4]:
async def openai_wrapper(location, preferences = ""):
    # Get a list of 10 locations form OpenAI api
    openai_client = AsyncOpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
    chat_completion = await openai_client.chat.completions.create(
        model="gpt-4-turbo-preview",
        response_format={ "type": "json_object" },
        messages=[
            {"role": "system", "content": "You are a helpful travel assistant designed to output JSON containing a list of objects that have atrribute name and address."},
            {"role": "user", "content": f"I am traveling to {location}, and I want to visit attractions around this area that fit these criteria: {preferences}. Generate a list of 10 attraction objects and return each attraction name and entire address"}
        ],
    )
    return json.loads(chat_completion.choices[0].message.content)["attractions"]

In [5]:
# async def yelp_wrapper(location, term = "attractions", radius = 20000, sort_by = "best_match", limit = 20):
#     # Get a list of 20 locations form Yelp api
#     params = {
#         "location": location,
#         "term": term,
#         "radius": radius,
#         "sort_by": sort_by,
#         "limit": limit
#     }
#     yelp_url = f"https://api.yelp.com/v3/businesses/search?{urllib.parse.urlencode(params)}"
#     yelp_header = {
#         "accept": "application/json",
#         "Authorization": f"Bearer {os.getenv('YELP_API_KEY')}"
#     }
#     response = await asyncio.to_thread(requests.get, yelp_url, headers = yelp_header)
#     if response.status_code == 200:
#         yelp_locations = response.json()
#     else:
#         response.raise_for_status()
#     return yelp_locations
async def yelp_wrapper(location, term="attractions", radius=20000, sort_by="best_match", limit=20):
    # Get a list of locations from Yelp API
    params = {
        "location": location,
        "term": term,
        "radius": radius,
        "sort_by": sort_by,
        "limit": limit
    }
    yelp_url = f"https://api.yelp.com/v3/businesses/search?{urllib.parse.urlencode(params)}"
    yelp_header = {
        "accept": "application/json",
        "Authorization": f"Bearer {os.getenv('YELP_API_KEY')}"
    }
    response = await asyncio.to_thread(requests.get, yelp_url, headers=yelp_header)
    if response.status_code == 200:
        yelp_data = response.json()
        # Parse and format the response
        formatted_locations = [
            {
                "name": business.get("name"),
                "address": " ".join(business["location"].get("display_address", [])),
                "photo": business.get("image_url"),
                "rating": business.get("rating"),
                "description": ", ".join(category["title"] for category in business.get("categories", [])),
                "lat": business["coordinates"].get("latitude"),
                "lon": business["coordinates"].get("longitude")
            }
            for business in yelp_data.get("businesses", [])
        ]
        
        
    
        return json.dumps(formatted_locations)
    
    else:
        response.raise_for_status()

In [6]:
async def attractions_wrapper(location, preferences=""):
    # Get results from OpenAI
    openai_result = await openai_wrapper(location, preferences)
    
    # Add a source field to OpenAI results
    openai_result_with_source = [{**attraction, "source": "openai"} for attraction in openai_result]
    
    # Feed addresses from openai_result back into yelp_wrapper
    yelp_tasks = [yelp_wrapper(attraction['address'], term=attraction['name']) for attraction in openai_result]
    yelp_results_from_openai = await asyncio.gather(*yelp_tasks)
    
    # Parse the new Yelp results and mark them as from Yelp
    yelp_results_from_openai_with_source = []
    for yelp_result in yelp_results_from_openai:
        for attraction in json.loads(yelp_result):
            yelp_results_from_openai_with_source.append({**attraction, "source": "yelp"})
    
    # Get initial Yelp results
    initial_yelp_result = await yelp_wrapper(location, preferences)
    initial_yelp_result_with_source = [{**attraction, "source": "yelp"} for attraction in json.loads(initial_yelp_result)]
    
    # Combine all results
    combined_results = openai_result_with_source + yelp_results_from_openai_with_source + initial_yelp_result_with_source
    
    # Deduplicate combined results based on name and address
    seen = set()
    unique_results = []
    for result in combined_results:
        identifier = (result['name'].lower(), result['address'].lower())
        if identifier not in seen:
            seen.add(identifier)
            unique_results.append(result)
    
    return unique_results
# async def attractions_wrapper(location, preferences=""):
#     # Run both openai_wrapper and yelp_wrapper concurrently
#     openai_result, yelp_result = await asyncio.gather(
#         openai_wrapper(location, preferences),
#         yelp_wrapper(location, preferences)
#     )
    
#     print(openai_result)
#     # print(yelp_result)
#     return yelp_result
    
#     # return {"OpenAI": openai_result, "Yelp": yelp_result}

In [7]:
def generate_yelp_graphql_query(search_locations):
    """
    Generates a GraphQL query for the Yelp API based on a list of search locations.

    Parameters:
    - search_locations (list of dict): A list of dictionaries, each containing 'name' and 'address' keys.

    Returns:
    - str: A GraphQL query string for the provided search locations.
    """
    # Start of the GraphQL query
    query_parts = ["query MyQuery{"]
    
    # Loop over each search location and add it to the query
    for index, location in enumerate(search_locations):
        # Create a unique name for each search query
        unique_name = f"search{location['name'].replace(' ', '')}{index}"
        # Add the search query to the query parts
        query_parts.append(
            f"{unique_name}: search(location: \"{location['address']}\", limit: 1) {{"
            "  business {"
            "    name"
            "    photos"
            "    rating"
            "    coordinates {"
            "      latitude"
            "      longitude"
            "    }"
            "    location {"
            "      formatted_address"
            "    }"
            "  }"
            "}"
        )
    
    # End of the GraphQL query
    query_parts.append("}")
    
    # Combine all parts into a single query string without newlines or carriage returns
    query = " ".join(query_parts)
    
    # Return the formatted query string
    return json.dumps({"query": query})

# Example usage:
search_locations = [
    {"name": "SanJose", "address": "San Jose"},
    {"name": "SanFran", "address": "San Francisco"}
]

# Generate the query
graphql_query = generate_yelp_graphql_query(search_locations)
print(graphql_query)

{"query": "query MyQuery{ searchSanJose0: search(location: \"San Jose\", limit: 1) {  business {    name    photos    rating    coordinates {      latitude      longitude    }    location {      formatted_address    }  }} searchSanFran1: search(location: \"San Francisco\", limit: 1) {  business {    name    photos    rating    coordinates {      latitude      longitude    }    location {      formatted_address    }  }} }"}


In [8]:
import requests
import json

url = "https://api.yelp.com/v3/graphql"
search_locations = [
    {"name": "SanJose", "address": "San Jose"},
    {"name": "SanFran", "address": "San Francisco"}
]
payload = generate_yelp_graphql_query(search_locations)
headers = {
  'Content-Type': 'application/json',
  'Authorization': 'Bearer P1NWCuhHEBTMBrneiu8SAUhX7VP8bUbBpT1Ztray5lEO-h3iRYtOeC4rZwLcdyuXJJQ509De39WIzIBGPXQHtVDlHiBnIZuf7rVWWlKOHNivnP6zYCeFGEto1xu2ZXYx'
}

response = requests.request("POST", url, headers=headers, data=payload)

def parse_yelp_response(response_text):
    """
    Parses the Yelp GraphQL API response and formats it into a list of dictionaries.

    Parameters:
    - response_text (str): The JSON response text from the Yelp API.

    Returns:
    - list: A list of dictionaries, each containing 'name', 'address', 'photo', 'rating', 'lat', and 'lon'.
    """
    data = json.loads(response_text).get('data', {})
    formatted_results = []

    for search_key, search_result in data.items():
        for business in search_result.get('business', []):
            formatted_results.append({
                'name': business.get('name'),
                'address': business['location']['formatted_address'].replace('\n', ' '),
                'photo': business['photos'][0] if business['photos'] else None,
                'rating': business.get('rating'),
                'lat': business['coordinates'].get('latitude'),
                'lon': business['coordinates'].get('longitude')
            })

    return formatted_results

#parse response so it returns a list of dictionaries

print(parse_yelp_response(response.text))

[{'name': 'The Table', 'address': '1110 Willow St San Jose, CA 95125', 'photo': 'https://s3-media4.fl.yelpcdn.com/bphoto/Y70NLnzrDZ-vkW9xiTAM1Q/o.jpg', 'rating': 4.0, 'lat': 37.308203, 'lon': -121.901284}, {'name': 'Fog Harbor Fish House', 'address': '39 Pier Ste 202A San Francisco, CA 94133', 'photo': 'https://s3-media2.fl.yelpcdn.com/bphoto/by8Hh63BLPv_HUqRUdsp_w/o.jpg', 'rating': 4.5, 'lat': 37.80889, 'lon': -122.41025}]


In [9]:
# print(await attractions_wrapper("American High School, Fremont", "technology museum"))
# 

In [10]:
import asyncio
import requests
import json

def generate_yelp_graphql_query(search_locations):
    """
    Generates a GraphQL query for the Yelp API based on a list of search locations.

    Parameters:
    - search_locations (list): A list of dictionaries, each containing 'name' and 'address' keys.

    Returns:
    - str: A GraphQL query string for the provided search locations.
    """
    query_parts = ["query MyQuery{"]
    for index, location in enumerate(search_locations):
        unique_name = f"search{location['name'].replace(' ', '')}{index}"
        query_parts.append(
            f"{unique_name}: search(location: \"{location['address']}\", limit: 1) {{"
            "  business {"
            "    name"
            "    photos"
            "    rating"
            "    coordinates {"
            "      latitude"
            "      longitude"
            "    }"
            "    location {"
            "      formatted_address"
            "    }"
            "  }"
            "}"
        )
    query_parts.append("}")
    query = " ".join(query_parts)
    return json.dumps({"query": query})


def parse_yelp_response(response_text):
    """
    Parses the Yelp GraphQL API response and formats it into a list of dictionaries.

    Parameters:
    - response_text (str): The JSON response text from the Yelp API.

    Returns:
    - list: A list of dictionaries, each containing 'name', 'address', 'photo', 'rating', 'lat', and 'lon'.
    """
    data = json.loads(response_text).get('data', {})
    formatted_results = []

    for search_key, search_result in data.items():
        for business in search_result.get('business', []):
            formatted_results.append({
                'name': business.get('name'),
                'address': business['location']['formatted_address'].replace('\n', ' '),
                'photo': business['photos'][0] if business['photos'] else None,
                'rating': business.get('rating'),
                'lat': business['coordinates'].get('latitude'),
                'lon': business['coordinates'].get('longitude')
            })

    return formatted_results

async def fetch_yelp_data_async(search_locations, headers):
    """
    Fetches data from Yelp API asynchronously using GraphQL.

    Parameters:
    - search_locations (list): A list of dictionaries with 'name' and 'address' for search locations.
    - headers (dict): Headers to include in the request.

    Returns:
    - dict: Parsed JSON response from the Yelp API.
    """
    url = "https://api.yelp.com/v3/graphql"
    payload = generate_yelp_graphql_query(search_locations)
    headers = {
        'Content-Type': 'application/json',
        'Authorization': f"Bearer {os.getenv('YELP_API_KEY')}"
    }
    try:
        response = await asyncio.to_thread(requests.post, url, headers=headers, data=payload)
        response.raise_for_status()  # Will raise an HTTPError if the HTTP request returned an unsuccessful status code
        return parse_yelp_response(response.text)
    except requests.exceptions.HTTPError as err:
        print(f"HTTP error occurred: {err}")
    except Exception as err:
        print(f"An error occurred: {err}")

In [11]:
search_locations = [
    {"name": "SanJose", "address": "San Jose"},
    {"name": "SanFran", "address": "San Francisco"}    
]

# generate_yelp_graphql_query(search_locations)
headers = {
  'Content-Type': 'application/json',
  'Authorization': f"Bearer {os.getenv('YELP_API_KEY')}"
}

response = await fetch_yelp_data_async(search_locations)


[{'name': 'The Table', 'address': '1110 Willow St San Jose, CA 95125', 'photo': 'https://s3-media4.fl.yelpcdn.com/bphoto/Y70NLnzrDZ-vkW9xiTAM1Q/o.jpg', 'rating': 4.0, 'lat': 37.308203, 'lon': -121.901284}, {'name': 'Fog Harbor Fish House', 'address': '39 Pier Ste 202A San Francisco, CA 94133', 'photo': 'https://s3-media2.fl.yelpcdn.com/bphoto/by8Hh63BLPv_HUqRUdsp_w/o.jpg', 'rating': 4.5, 'lat': 37.80889, 'lon': -122.41025}]
