In [1]:
import pandas as pd
import numpy as np
import faiss
import os
import re
import random
import pickle
import requests
import networkx as nx
from math import radians, sin, cos, sqrt, atan2
from sentence_transformers import SentenceTransformer
from openai import OpenAI
from dotenv import load_dotenv, find_dotenv
from datetime import datetime, timedelta

# Load environment variables
_ = load_dotenv(find_dotenv())  
OPENWEATHER_API_KEY = os.getenv("OPENWEATHER_API_KEY")  
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")  

# OpenAI client
openai_client = OpenAI(api_key=OPENAI_API_KEY)

# Load the processed landmarks data
landmark_file_path = "processed_landmarks_with_metadata.csv"
df_landmarks = pd.read_csv(landmark_file_path)

# Load a pre-trained sentence transformer model for embeddings
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

# Generate embeddings for each landmark summary
landmark_texts = df_landmarks["Cleaned Summary"].tolist()
embeddings = embedding_model.encode(landmark_texts, convert_to_numpy=True)

# Save embeddings in FAISS for efficient retrieval
embedding_dim = embeddings.shape[1]
faiss_index = faiss.IndexFlatIP(embedding_dim)
faiss_index.add(embeddings)

print(f"Embeddings complete! Stored {len(embeddings)} landmark vectors in FAISS.")

# AI-based function to interpret responses naturally
def interpret_user_response(user_input):
    """Uses OpenAI to determine if the user's response means 'yes', 'no', 'done', or is unclear."""
    
    # Normalize input: Remove excess spaces & repeated characters
    user_input = re.sub(r"([aeiou])\1{2,}", r"\1\1", user_input.strip().lower())

    prompt = f"""Classify the user's intent as 'yes', 'no', 'done', or 'unclear':
    
    User: "{user_input}"
    
    Rules:
    - If the user expresses uncertainty (maybe, not sure), classify as 'no'.
    - If the response is in another language, translate and classify.
    - If the response includes 'yes' or strong agreement, classify as 'yes'.
    - If the response includes 'no' or strong disagreement, classify as 'no'.
    - If the response expresses completion, classify as 'done'.
    - If unclear, respond as 'unclear'.
    
    Answer with one word: Yes, No, Done, or Unclear."""

    try:
        response = openai_client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "system", "content": "You are a helpful assistant analyzing responses."},
                      {"role": "user", "content": prompt}]
        )
        return response.choices[0].message.content.strip().lower()
    except Exception:
        return "unclear"


# Get weather forecast
def find_weather_forecast(location):
    """Retrieves the weather forecast using latitude & longitude."""
    landmark_info = df_landmarks[df_landmarks['Title'] == location]
    if landmark_info.empty:
        return f"Could not find coordinates for {location}."

    latitude, longitude = landmark_info['Latitude'].values[0], landmark_info['Longitude'].values[0]
    if pd.isna(latitude) or pd.isna(longitude):
        return f"No valid coordinates found for {location}."

    url = f"http://api.openweathermap.org/data/2.5/weather?lat={latitude}&lon={longitude}&appid={OPENWEATHER_API_KEY}&units=metric"
    response = requests.get(url)
    data = response.json()

    if response.status_code == 200:
        return f"Weather at {location}: {data['weather'][0]['description'].capitalize()}, Temp: {data['main']['temp']}°C"
    return f"Could not retrieve weather for {location}. Error: {data.get('message', 'Unknown error')}"

# AI-powered question answering
def answer_questions(recommendations):
    """Answers user questions using OpenAI, only based on the recommended locations."""
    while True:
        question = input("Do you have any questions about these locations? If not, just say you're good: ")

        if interpret_user_response(question) in {"done", "no"}:
            break

        # Format a prompt using only the recommended locations
        prompt = "Answer the user's question based ONLY on these locations:\n\n"
        for _, row in recommendations.iterrows():
            prompt += f"- {row['Title']}: {row['Cleaned Summary'][:500]}...\n"
        prompt += f"\nUser's question: {question}"

        try:
            response = openai_client.chat.completions.create(
                model="gpt-4",
                messages=[{"role": "system", "content": "You are a knowledgeable travel assistant."},
                          {"role": "user", "content": prompt}]
            )
            print("\n", response.choices[0].message.content)
        except Exception:
            print("I couldn’t find an answer to that. Let me know if you'd like to ask something else.")

# Lock locations without duplication
def lock_locations(recommendations, locked_locations):
    """Allows the user to lock locations while preventing duplicates."""
    while True:
        lock_choice = input("\nWould you like to add any of these locations to your itinerary? (Type the number or the name, or say you're done): ")

        user_intent = interpret_user_response(lock_choice)
        if user_intent in {"done", "no"}:
            break

        # Try matching user input by index
        try:
            selected_index = int(lock_choice) - 1
            if 0 <= selected_index < len(recommendations):
                location = recommendations.iloc[selected_index]['Title']
            else:
                print("That number doesn't match any option. Try again.")
                continue
        except ValueError:
            # Try matching by landmark name
            location_matches = recommendations[recommendations["Title"].str.lower() == lock_choice.lower()]
            if not location_matches.empty:
                location = location_matches.iloc[0]["Title"]
            else:
                print("I couldn't find that location in the list. Try again.")
                continue

        # Add to locked locations if not already there
        if location not in locked_locations:
            locked_locations.append(location)
            print(f"{location} is now added to your itinerary.")

            # Get and display weather immediately
            weather_info = find_weather_forecast(location)
            print(weather_info)
        else:
            print(f"{location} is already in your itinerary.")

    return locked_locations

# Rank locations by user interest
def rank_appropriate_locations(user_prompt, top_k=3):
    """Finds the best landmarks matching the user's interests."""
    query_embedding = embedding_model.encode([user_prompt], convert_to_numpy=True)
    _, indices = faiss_index.search(query_embedding, top_k)
    return df_landmarks.iloc[indices[0]].reset_index(drop=True)

# Function to generate sentence variations
def generate_variation(prompt):
    """Generates a slight variation of the sentence to keep the conversation natural."""
    variations = [
        "What kinds of places are you interested in visiting? For example, beaches, museums, or something else.",
        "Are there any specific types of places you'd like to check out? Beaches, museums, nature spots, or anything else?",
        "Tell me what you're in the mood to see in Puerto Rico. Whether it’s a beach, museum, or something else, I can help!",
        "What types of locations do you enjoy visiting? For instance, you can mention beaches, museums, or other spots."
    ]
    return random.choice(variations)

# Function to calculate travel days for each location based on the start date
def assign_travel_dates(start_date, travel_plan):
    """
    Assigns a date to each location in the travel plan.
    
    Parameters:
    - start_date: The starting date for the trip (as a string in 'YYYY-MM-DD' format).
    - travel_plan: List of locations to visit in order.
    
    Returns:
    - A list of tuples with location and assigned travel date.
    """
    start_date_obj = datetime.strptime(start_date, '%Y-%m-%d')
    travel_dates = []
    
    for i, location in enumerate(travel_plan):
        travel_day = start_date_obj + timedelta(days=i)
        travel_dates.append((location, travel_day.strftime('%Y-%m-%d')))
    
    return travel_dates

# Haversine formula for distance calculation
def compute_distance(lat1, lon1, lat2, lon2):
    """
    Compute the Haversine distance between two points on the Earth specified by their latitudes and longitudes.
    """
    R = 6371  # Radius of Earth in kilometers
    dlat = radians(lat2 - lat1)
    dlon = radians(lon2 - lon1)
    a = sin(dlat / 2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    return R * c  # Distance in kilometers

def get_location_coordinates(location):
    """
    Retrieves the latitude and longitude for a given location from df_landmarks.
    """
    location_info = df_landmarks[df_landmarks['Title'] == location]
    if location_info.empty:
        return None, None
    latitude = location_info['Latitude'].values[0]
    longitude = location_info['Longitude'].values[0]
    return latitude, longitude

def calculate_distances(travel_plan, df_landmarks):
    """
    Calculates the distance from each location to the next in the travel plan.
    
    Parameters:
    - travel_plan: List of locations in the optimized travel plan.
    - df_landmarks: DataFrame containing location data with latitude and longitude.
    
    Returns:
    - A list of distances between consecutive locations.
    """
    distances = []
    
    for i in range(len(travel_plan) - 1):
        loc1 = travel_plan[i]
        loc2 = travel_plan[i + 1]
        
        lat1, lon1 = get_location_coordinates(loc1)
        lat2, lon2 = get_location_coordinates(loc2)
        
        if lat1 is not None and lon1 is not None and lat2 is not None and lon2 is not None:
            distance = compute_distance(lat1, lon1, lat2, lon2)
            distances.append((loc1, loc2, distance))
    
    return distances


def compute_travel_plan(locked_locations):
    """
    Compute the optimal travel plan by ordering the locations based on the shortest distance between them.
    
    Parameters:
    - locked_locations: List of locations to visit.
    
    Returns:
    - A list of locations ordered by the best route (closest to each other).
    """
    if not locked_locations:
        return []  # If there are no locations, return an empty list.

    travel_plan = []
    remaining_locations = locked_locations.copy()

    # Start with the first location
    current_location = remaining_locations.pop(0)
    travel_plan.append(current_location)

    # Get the coordinates of the starting location
    current_lat, current_lon = get_location_coordinates(current_location)
    if current_lat is None or current_lon is None:
        print(f"Warning: Coordinates for {current_location} not found. Skipping optimization.")
        return locked_locations  # Return the original order if there's an issue.

    while remaining_locations:
        min_distance = float('inf')
        closest_location = None

        for location in remaining_locations:
            lat, lon = get_location_coordinates(location)
            if lat is None or lon is None:
                continue

            distance = compute_distance(current_lat, current_lon, lat, lon)

            if distance < min_distance:
                min_distance = distance
                closest_location = location
                closest_lat, closest_lon = lat, lon

        if closest_location:
            travel_plan.append(closest_location)
            remaining_locations.remove(closest_location)
            current_lat, current_lon = closest_lat, closest_lon
        else:
            break  # In case of unexpected errors.

    return travel_plan

# Function to calculate the distance from the current location to the next
def calculate_distances(travel_plan, df_landmarks):
    """
    Calculates the distance from each location to the next in the travel plan.
    
    Parameters:
    - travel_plan: List of locations in the optimized travel plan.
    - df_landmarks: DataFrame containing location data with latitude and longitude.
    
    Returns:
    - A list of distances between consecutive locations.
    """
    distances = []
    
    for i in range(len(travel_plan) - 1):
        loc1 = travel_plan[i]
        loc2 = travel_plan[i + 1]
        
        lat1, lon1 = get_location_coordinates(loc1)
        lat2, lon2 = get_location_coordinates(loc2)
        
        if lat1 is not None and lon1 is not None and lat2 is not None and lon2 is not None:
            distance = compute_distance(lat1, lon1, lat2, lon2)
            distances.append((loc1, loc2, distance))
    
    return distances

# Main function: Travel Assistant
def interactive_travel_assistant():
    """Main user interaction loop."""
    print("Welcome to the Puerto Rico Travel Assistant!")
    travel_start_date = input("When are you planning to visit? Please enter your start date (YYYY-MM-DD): ")

    locked_locations = []  # This will keep track of all selected locations
    all_recommendations = pd.DataFrame()  # To keep all the recommendations combined

    location_types_entered = []  # To track what types of locations the user has already added

    while True:
        user_interests = input(generate_variation("Tell me about the kinds of places you enjoy visiting."))

        if interpret_user_response(user_interests) in {"done", "no"}:
            break

        if user_interests not in location_types_entered:
            location_types_entered.append(user_interests)

            recommendations = rank_appropriate_locations(user_interests)

            all_recommendations = pd.concat([all_recommendations, recommendations]).drop_duplicates().reset_index(drop=True)

            print(f"\nHere are some great recommendations based on your interest in {user_interests}:")
            for idx, row in all_recommendations.iterrows():
                print(f"{idx+1}. {row['Title']}")

    answer_questions(all_recommendations)

    print("\nLet's create your travel plan from these awesome locations:")
    for idx, row in all_recommendations.iterrows():
        print(f"{idx+1}. {row['Title']}")

    locked_locations = lock_locations(all_recommendations, locked_locations)

    if locked_locations:
        # Compute the optimal travel plan based on proximity
        optimized_plan = compute_travel_plan(locked_locations)

        # Assign dates to each location
        travel_dates = assign_travel_dates(travel_start_date, optimized_plan)

        # Calculate the distances between each consecutive location
        distances = calculate_distances(optimized_plan, df_landmarks)

        print("\nHere's your final travel plan with assigned dates:")
        for location, date in travel_dates:
            print(f"- {location} on {date}")
        
        print("\nDistance to next location:")
        for loc1, loc2, distance in distances:
            print(f"From {loc1} to {loc2}: {distance:.2f} km")
        
        print("\nHave an amazing trip to Puerto Rico!")
    else:
        print("\nYou haven't selected any locations yet. Please add some locations to your itinerary!")

interactive_travel_assistant()


Embeddings complete! Stored 520 landmark vectors in FAISS.
Welcome to the Puerto Rico Travel Assistant!


When are you planning to visit? Please enter your start date (YYYY-MM-DD):  2025-5-5
What kinds of places are you interested in visiting? For example, beaches, museums, or something else. I want to visit the beaches



Here are some great recommendations based on your interest in I want to visit the beaches:
1. Blue Beach (Vieques)
2. Cayo Luis Peña
3. La Pocita de las Golondrinas Beach


Are there any specific types of places you'd like to check out? Beaches, museums, nature spots, or anything else? I want to also see museums



Here are some great recommendations based on your interest in I want to also see museums:
1. Blue Beach (Vieques)
2. Cayo Luis Peña
3. La Pocita de las Golondrinas Beach
4. Caguas Museum of Folk Arts
5. Caguas Museum of Art
6. Museo de Vida Silvestre


Are there any specific types of places you'd like to check out? Beaches, museums, nature spots, or anything else? no
Do you have any questions about these locations? If not, just say you're good:  no



Let's create your travel plan from these awesome locations:
1. Blue Beach (Vieques)
2. Cayo Luis Peña
3. La Pocita de las Golondrinas Beach
4. Caguas Museum of Folk Arts
5. Caguas Museum of Art
6. Museo de Vida Silvestre



Would you like to add any of these locations to your itinerary? (Type the number or the name, or say you're done):  1


Blue Beach (Vieques) is now added to your itinerary.
Weather at Blue Beach (Vieques): Few clouds, Temp: 25.75°C



Would you like to add any of these locations to your itinerary? (Type the number or the name, or say you're done):  5


Caguas Museum of Art is now added to your itinerary.
Weather at Caguas Museum of Art: Light rain, Temp: 27.08°C



Would you like to add any of these locations to your itinerary? (Type the number or the name, or say you're done):  3


La Pocita de las Golondrinas Beach is now added to your itinerary.
Weather at La Pocita de las Golondrinas Beach: Scattered clouds, Temp: 28.56°C



Would you like to add any of these locations to your itinerary? (Type the number or the name, or say you're done):  6


Museo de Vida Silvestre is now added to your itinerary.
Weather at Museo de Vida Silvestre: Few clouds, Temp: 26.59°C



Would you like to add any of these locations to your itinerary? (Type the number or the name, or say you're done):  done



Here's your final travel plan with assigned dates:
- Blue Beach (Vieques) on 2025-05-05
- Caguas Museum of Art on 2025-05-06
- Museo de Vida Silvestre on 2025-05-07
- La Pocita de las Golondrinas Beach on 2025-05-08

Distance to next location:
From Blue Beach (Vieques) to Caguas Museum of Art: 69.88 km
From Caguas Museum of Art to Museo de Vida Silvestre: 50.21 km
From Museo de Vida Silvestre to La Pocita de las Golondrinas Beach: 64.85 km

Have an amazing trip to Puerto Rico!


In [9]:
!pip install ipywidgets
!jupyter nbextension enable --py widgetsnbextension




[notice] A new release of pip is available: 25.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip
usage: jupyter [-h] [--version] [--config-dir] [--data-dir] [--runtime-dir]
               [--paths] [--json] [--debug]
               [subcommand]

Jupyter: Interactive Computing

positional arguments:
  subcommand     the subcommand to launch

options:
  -h, --help     show this help message and exit
  --version      show the versions of core jupyter packages and exit
  --config-dir   show Jupyter config dir
  --data-dir     show Jupyter data dir
  --runtime-dir  show Jupyter runtime dir
  --paths        show all Jupyter paths. Add --json for machine-readable
                 format.
  --json         output paths as machine-readable json
  --debug        output debug information about paths

Available subcommands: console dejavu events execute kernel kernelspec lab
labextension labhub migrate nbconvert notebook qtconsole run script server
troubleshoot trust

Ju

In [1]:
import ipywidgets as widgets
from IPython.display import display

# Test displaying a simple button
button = widgets.Button(description="Click me!")
display(button)

Button(description='Click me!', style=ButtonStyle())

In [9]:
!conda install -c conda-forge nodejs

^C
