# Puerto Rico Travel Planner AI Chatbot

In [76]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

OPENAI_API_KEY  = os.getenv('OPENAI_API_KEY')
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
GOOGLE_PLACES_API_KEY = os.getenv('OUR_GOOGLE_PLACES_API_KEY')


In [107]:
from pinecone import Pinecone, ServerlessSpec
import openai
import json
import time
from urllib.parse import urlparse, parse_qs
import requests


INDEX_NAME = "puerto-rico-travel"

# Initialize Pinecone
pc = Pinecone(api_key=PINECONE_API_KEY)

# Check if the index exists
if INDEX_NAME not in [index_info["name"] for index_info in pc.list_indexes()]:
    pc.create_index(
        name=INDEX_NAME,
        dimension=1536,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )

# Connect to the index
index = pc.Index(INDEX_NAME)

# Load data
with open("data/hotels.json", "r", encoding="utf-8") as f:
    hotels_data = json.load(f)

# Function to generate embeddings
def get_embedding(text):
    response = openai.embeddings.create(
        input=text,
        model="text-embedding-ada-002"
    )
    return response.data[0].embedding  # Updated to match the new API structure


def extract_place_id(url):
    """Extracts Google Maps Place ID from the given URL"""
    parsed_url = urlparse(url)
    query_params = parse_qs(parsed_url.query)
    return query_params.get("query_place_id", [None])[0]  # Return first item or None

def fetch_google_reviews(place_id):
    """Fetch rating and review count from Google Places API"""
    url = f"https://maps.googleapis.com/maps/api/place/details/json?place_id={place_id}&fields=name,rating,user_ratings_total&key={GOOGLE_PLACES_API_KEY}"
    response = requests.get(url)
    data = response.json()
    
    if "result" in data:
        return {
            "name": data["result"].get("name", "N/A"),
            "rating": data["result"].get("rating", 0),  # Default 0 if missing
            "review_count": data["result"].get("user_ratings_total", 0)  # Default 0 if missing
        }
    return {"name": "N/A", "rating": 0, "review_count": 0}

# Store data in Pinecone
for item in hotels_data:
    combined_text = f"{item['name']} {item['type']} {item['region']} {item['location']['city']} {item['description']}"
    
    # Get the embedding for the combined text
    vector = get_embedding(combined_text)

    
    place_id = extract_place_id(item['contact']['google_maps_url'])
    # Get rating & review count
    place_data = fetch_google_reviews(place_id)
    
    # Prepare metadata for the upsert
    metadata = {
        "name": item["name"],
        "text": item["description"],
        "description": item["description"],
        "type": item["type"],
        "region": item["region"],
        "location": json.dumps(item["location"]),  # Keeping the full location metadata
        "contact": json.dumps(item["contact"]),
        "url": item["url"],
        "rating": place_data["rating"],
        "review_count": place_data["review_count"]

    }
    index.upsert(vectors=[(str(item["id"]), vector, metadata)])
    time.sleep(1)  # Prevent hitting API rate limits

print("Data uploaded to Pinecone successfully!")


Data uploaded to Pinecone successfully!


In [108]:
from pinecone import Pinecone, ServerlessSpec
import openai
import json
import time


INDEX_NAME = "puerto-rico-travel"

# Initialize Pinecone
pc = Pinecone(api_key=PINECONE_API_KEY)

# Check if the index exists
if INDEX_NAME not in [index_info["name"] for index_info in pc.list_indexes()]:
    pc.create_index(
        name=INDEX_NAME,
        dimension=1536,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )

# Connect to the index
index = pc.Index(INDEX_NAME)

# Load data
with open("data/food_drinks.json", "r", encoding="utf-8") as f:
    food_drinks = json.load(f)

# Function to generate embeddings
def get_embedding(text):
    response = openai.embeddings.create(
        input=text,
        model="text-embedding-ada-002"
    )
    return response.data[0].embedding  # Updated to match the new API structure

def extract_place_id(url):
    """Extracts Google Maps Place ID from the given URL"""
    parsed_url = urlparse(url)
    query_params = parse_qs(parsed_url.query)
    return query_params.get("query_place_id", [None])[0]  # Return first item or None

def fetch_google_reviews(place_id):
    """Fetch rating and review count from Google Places API"""
    url = f"https://maps.googleapis.com/maps/api/place/details/json?place_id={place_id}&fields=name,rating,user_ratings_total&key={GOOGLE_PLACES_API_KEY}"
    response = requests.get(url)
    data = response.json()
    
    if "result" in data:
        return {
            "name": data["result"].get("name", "N/A"),
            "rating": data["result"].get("rating", 0),  # Default 0 if missing
            "review_count": data["result"].get("user_ratings_total", 0)  # Default 0 if missing
        }
    return {"name": "N/A", "rating": 0, "review_count": 0}

# Store data in Pinecone
for item in food_drinks:
    combined_text = f"{item['name']} {item['type']} {item['region']} {item['location']['city']} {item['description']}"
    
    # Get the embedding for the combined text
    vector = get_embedding(combined_text)

    
    place_id = extract_place_id(item['contact']['google_maps_url'])
    # Get rating & review count
    place_data = fetch_google_reviews(place_id)
    
    # Prepare metadata for the upsert
    metadata = {
        "name": item["name"],
        "text": item["description"],
        "description": item["description"],
        "type": item["type"],
        "region": item["region"],
        "location": json.dumps(item["location"]),  # Keeping the full location metadata
        "contact": json.dumps(item["contact"]),
        "url": item["url"],
        "rating": place_data["rating"],
        "review_count": place_data["review_count"]

    }
    index.upsert(vectors=[(str(item["id"]), vector, metadata)])
    time.sleep(1)  # Prevent hitting API rate limits

print("Data uploaded to Pinecone successfully!")


Data uploaded to Pinecone successfully!


In [109]:
from pinecone import Pinecone, ServerlessSpec
import openai
import json
import time


INDEX_NAME = "puerto-rico-travel"

# Initialize Pinecone
pc = Pinecone(api_key=PINECONE_API_KEY)

# Check if the index exists
if INDEX_NAME not in [index_info["name"] for index_info in pc.list_indexes()]:
    pc.create_index(
        name=INDEX_NAME,
        dimension=1536,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )

# Connect to the index
index = pc.Index(INDEX_NAME)

# Load data
with open("data/things_to_do.json", "r", encoding="utf-8") as f:
    to_do_data = json.load(f)

# Function to generate embeddings
def get_embedding(text):
    response = openai.embeddings.create(
        input=text,
        model="text-embedding-ada-002"
    )
    return response.data[0].embedding  # Updated to match the new API structure

def extract_place_id(url):
    """Extracts Google Maps Place ID from the given URL"""
    parsed_url = urlparse(url)
    query_params = parse_qs(parsed_url.query)
    return query_params.get("query_place_id", [None])[0]  # Return first item or None

def fetch_google_reviews(place_id):
    """Fetch rating and review count from Google Places API"""
    url = f"https://maps.googleapis.com/maps/api/place/details/json?place_id={place_id}&fields=name,rating,user_ratings_total&key={GOOGLE_PLACES_API_KEY}"
    response = requests.get(url)
    data = response.json()
    
    if "result" in data:
        return {
            "name": data["result"].get("name", "N/A"),
            "rating": data["result"].get("rating", 0),  # Default 0 if missing
            "review_count": data["result"].get("user_ratings_total", 0)  # Default 0 if missing
        }
    return {"name": "N/A", "rating": 0, "review_count": 0}

# Store data in Pinecone
for item in to_do_data:
    combined_text = f"{item['name']} {item['type']} {item['region']} {item['location']['city']} {item['description']}"
    
    # Get the embedding for the combined text
    vector = get_embedding(combined_text)

    
    place_id = extract_place_id(item['contact'].get('google_maps_url'))
    # Get rating & review count
    place_data = fetch_google_reviews(place_id)
    
    # Prepare metadata for the upsert
    metadata = {
        "name": item["name"],
        "text": item["description"],
        "description": item["description"],
        "type": item["type"],
        "region": item["region"],
        "location": json.dumps(item["location"]),  # Keeping the full location metadata
        "contact": json.dumps(item["contact"]),
        "url": item["url"],
        "rating": place_data["rating"],
        "review_count": place_data["review_count"],
        "accommodations": json.dumps(item["accommodations"]),
        "photo": item["photo"]

    }
    index.upsert(vectors=[(str(item["id"]), vector, metadata)])
    time.sleep(1)  # Prevent hitting API rate limits

print("Data uploaded to Pinecone successfully!")


Data uploaded to Pinecone successfully!


## Add Google reviews

In [None]:
import requests
import pinecone

# Initialize Pinecone
pinecone.init(api_key="YOUR_PINECONE_API_KEY", environment="YOUR_PINECONE_ENV")
index = pinecone.Index("your-index-name")

# Google API Key & Place ID
API_KEY = "YOUR_GOOGLE_PLACES_API_KEY"
PLACE_ID = "ChIJNWroByNvA4wRUBsD3LD2-ro"

def fetch_google_reviews(place_id):
    """Fetch rating and review count from Google Places API"""
    url = f"https://maps.googleapis.com/maps/api/place/details/json?place_id={place_id}&fields=name,rating,user_ratings_total&key={API_KEY}"
    response = requests.get(url)
    data = response.json()
    
    if "result" in data:
        return {
            "name": data["result"].get("name", "N/A"),
            "rating": data["result"].get("rating", 0),  # Default 0 if missing
            "review_count": data["result"].get("user_ratings_total", 0)  # Default 0 if missing
        }
    return {"name": "N/A", "rating": 0, "review_count": 0}

# Get rating & review count
place_data = fetch_google_reviews(PLACE_ID)

# Example vector ID (replace with actual embedding process)
vector_id = "example-hotel-123"

# Update Pinecone metadata
index.upsert([
    (vector_id, [], {  
        "name": place_data["name"],
        "rating": place_data["rating"],
        "review_count": place_data["review_count"]
    })
])

print(f"Updated Pinecone metadata for {place_data['name']}:")
print(f"⭐ Rating: {place_data['rating']} | 📝 Reviews: {place_data['review_count']}")


# chatGPT code

In [110]:
import openai
from pinecone import Pinecone

INDEX_NAME = "puerto-rico-travel"

# Initialize Pinecone
pc = Pinecone(api_key=PINECONE_API_KEY)

# Connect to the index
index = pc.Index(INDEX_NAME)

# Function to get the embedding of the user's query
def get_embedding(text):
    response = openai.embeddings.create(
        input=text,
        model="text-embedding-ada-002"
    )
    return response.data[0].embedding  

# Function to query Pinecone and format the results
def search_pinecone(user_query, top_k=5):
    # Get the vector embedding for the user's query
    query_vector = get_embedding(user_query)
    
    # Query Pinecone for the top_k matches
    response = index.query(
        vector=query_vector, 
        top_k=top_k, 
        include_metadata=True
    )
    
    # Format the search results
    results = [
        {
            "description": match["metadata"].get("description", "Unknown"),  # Use 'values' as the description
            "name": match["metadata"].get("name", "Unknown"),
            "type": match["metadata"].get("type", "Unknown"),
            "region": match["metadata"].get("region", "Unknown"),
            "url": match["metadata"].get("url", ""),
            "rating": match["metadata"].get("rating", 0),
            "review_count": match["metadata"].get("review_count", 0),
        }
        for match in response["matches"]
    ]
    
    return results


In [111]:
def rank_results(results):
    """Sort results based on rating (primary) and review count (secondary)."""
    if not results:
        return []
    
    return sorted(
        results, 
        key=lambda x: (
            x.get("rating", 0),  # Primary: Higher rating first
            x.get("review_count", 0)  # Secondary: More reviews as tiebreaker
        ), 
        reverse=True  # Sort in descending order
    )

def format_results(results):
    """Format ranked search results into a readable string."""
    if not results:
        return "Sorry, I couldn't find anything matching your request."

    response = "Here are some recommendations:\n\n"
    for i, item in enumerate(results, 1):
        response += (
            f"{i}. **{item.get('name', 'Unknown')}** ({item.get('type', 'Unknown Type')} in {item.get('region', 'Unknown Location')})\n"
            f"   ⭐ {item.get('rating', 'N/A')} ({item.get('review_count', 'N/A')} reviews)\n"
            f"   {item.get('description', 'No description available.')}\n"
            f"   [More Info]({item.get('url', '#')})\n\n"
        )
    
    return response

# Example user input
user_input = "What are the best activities in San Juan?"
search_results = search_pinecone(user_input)

# Rank results before formatting
ranked_results = rank_results(search_results)

# Print formatted ranked results
print(format_results(ranked_results))


Here are some recommendations:

1. **VIP Adventures Puerto Rico** (['Kayaking', 'Biking', 'Group-Friendly Activities & Tours', 'SUP / Paddleboarding', 'Training & Teambuilding Companies', 'Snorkeling', 'Other Watersports'] in Metro)
   ⭐ 4.8 (1142.0 reviews)
   VIP Adventure has more than 20 years of experience in the field of touristic activities around the island and event planning for top companies in Puerto Rico. Our company concept is based on three important aspects: outstanding experience: unique activities, human and nature relations, and the best-personalized service to create your best tour experience in Puerto Rico.
   [More Info](https://www.discoverpuertorico.com/profile/vip-adventures-puerto-rico/9402)

2. **Jaime Benítez National Park** (['Track & Field Venues', 'Other Watersports'] in Metro)
   ⭐ 4.7 (1829.0 reviews)
   Laguna del Condado National Park is a very popular spot for all the family. Great place for a walk or a jog, you can also practice sportslike paddleboar

In [112]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.vectorstores import Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings

# Initialize LLM
llm = ChatOpenAI(model_name="gpt-4", temperature=0)

# Initialize the embedding model
embedding = OpenAIEmbeddings(model="text-embedding-ada-002")

# Connect Pinecone to LangChain
vector_store = Pinecone(index=index, embedding=embedding, text_key="text")

def rank_results(results):
    """Sort retrieved results by rating and review count."""
    if not results:
        return []

    return sorted(
        results,
        key=lambda x: (
            x[0].metadata.get("rating", 0),  # Primary: Higher rating first
            x[0].metadata.get("review_count", 0)  # Secondary: More reviews as tiebreaker
        ),
        reverse=True  # Sort in descending order
    )

def search_and_rank(query):
    """Run the retrieval chain, rank results, and return sorted responses."""
    results = vector_store.similarity_search_with_score(query, k=10)
    ranked_results = rank_results(results)  # Rank the retrieved results

    if not ranked_results:
        return "Sorry, no relevant results found."

    response = "Here are some top recommendations:\n\n"
    for i, (doc, score) in enumerate(ranked_results, 1):
        metadata = doc.metadata

        response += (
            f"{i}. **{metadata.get('name', 'Unknown')}** ({metadata.get('type', 'Unknown Type')} in {metadata.get('region', 'Unknown Location')})\n"
            f"   ⭐ {metadata.get('rating', 'N/A')} ({metadata.get('review_count', 'N/A')} reviews)\n"
            f"   {metadata.get('description', 'No description available.')}\n"
            f"   [More Info]({metadata.get('url', '#' )})\n\n"
        )
    
    return response

# Example queries with ranked results
queries = [
    "luxury beachfront hotel in San Juan",
    "What are the best activities in San Juan?",
    "Find me a hotel in Puerto Rico with a pool.",
    "What are some popular restaurants in San Juan?", 
    "What are some historical sites in San Juan?"
]

for query in queries:
    print(f"\nQuery: {query}\n")
    print(search_and_rank(query))



Query: luxury beachfront hotel in San Juan

Here are some top recommendations:

1. **La Rosa de los Vientos** (Hotel in Metro)
   ⭐ 4.9 (12.0 reviews)
   This beach-side villa was actually an abandoned house before the owner decided to turn it into a tropical paradise guesthouse. A pink and white color scheme gives the property a vibrant and calm vibe.
   [More Info](https://www.discoverpuertorico.com/profile/la-rosa-de-los-vientos/2411)

2. **The Ritz-Carlton San Juan Hotel** (Hotel in Metro)
   ⭐ 4.5 (863.0 reviews)
   Let one of the most elegant San Juan luxury resorts, The Ritz-Carlton, San Juan awaken your senses as you fall under the spell of its enchanting rhythms and flavors. Our casual yet elegant luxury hotel envelops guests in a timeless Caribbean setting – perched on a two-mile stretch of a golden sandy beach. Puerto Rican fritters, mamposteao rice and mofongo introduce adventurous palates to the local cuisine. Rum flights find their way on to afternoon agendas. And the pe

In [102]:
# Retrieve some sample documents from Pinecone for debugging
sample_results = vector_store.similarity_search_with_score("test query", k=1)

# Print metadata to check what fields exist
for doc, score in sample_results:
    print(f"DEBUG: Metadata - {doc.metadata}")


DEBUG: Metadata - {'contact': '{"telephone": "(787) 379-4238", "website": null, "google_maps_url": "https://www.google.com/maps/search/?api=1&query=18.4316332000,-66.4064762000"}', 'location': '{"street_address": "Calle Fabi\\u00e1n Monta\\u00f1ez Parcelas Amadeo", "city": "Vega Baja", "postal_code": "693", "country": "USA"}', 'name': 'Luis Montanez Vega', 'rating': 0.0, 'region': 'North', 'review_count': 0.0, 'type': 'Hotel', 'url': 'https://www.discoverpuertorico.com/profile/luis-montanez-vega/17384'}


In [104]:
print(index.describe_index_stats())  # Check if the metadata exists


{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 2712}},
 'total_vector_count': 2712}


In [106]:
sample_result = index.query(
    vector=[0.1] * 1536,  # Replace with an actual query vector
    top_k=1,
    include_metadata=True
)

print("DEBUG: Sample Pinecone Document Metadata:")
print(sample_result)


DEBUG: Sample Pinecone Document Metadata:
{'matches': [{'id': '103591',
              'metadata': {'accommodations': '[]',
                           'contact': '{"telephone": "(787) 290-6617", '
                                      '"website": "", "google_maps_url": '
                                      '"https://www.google.com/maps/search/?api=1&query_place_id=ChIJTwnttYjVHIwR02pFkeHygr4&query=18.0502599000,-66.5715589000"}',
                           'description': 'The Museo de la Música '
                                          'Puertorriqueña documents Puerto '
                                          "Rico's rich musical traditions and "
                                          "recognizes the island's talented "
                                          'musicians. The museum is located in '
                                          "Ponce's historical town center in "
                                          'what used to be the family home of '
                      