In [3]:
import pymongo
from langchain.schema import SystemMessage, HumanMessage
from dotenv import load_dotenv
import random
import json
import re
import os
from openai import AzureOpenAI

load_dotenv(dotenv_path="../config.env")

True

In [4]:
def create_llm_client():
    return AzureOpenAI(
        api_key=os.getenv("AZURE_OPENAI_API_KEY"),
        api_version="2025-03-01-preview",
        azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
    )

def connect_to_db():
    db_client = pymongo.MongoClient(os.getenv("MONGODB_URI_REMOTE"))
    return db_client


def extract_locations_data(db):
    collection = db["Location"]
    locations_data = {doc["name"]: doc["coordinates"]["coordinates"] for doc in collection.find()}
    locations_data = {f"[{index}]": value for index, value in locations_data.items()}
    return locations_data

def extract_business_category_metadata(db):
    collection = db["Business"]
    business_metadata = []
    for doc in collection.find():
        if doc["category"] not in business_metadata:
            business_metadata.append(doc["category"])
    return business_metadata

def extract_business_data(db):
    collection = db["Business"]
    business_data = {doc["name"]: [doc["address"], doc["category"], doc["coordinates"]["coordinates"], doc["averageRating"]] for doc in collection.find()}
    business_data = {f"[{index}]": value for index, value in business_data.items()}
    return business_data

def clean_json_response(response):
    return re.sub(r"```json\n(.*?)\n```", r"\1", response, flags=re.DOTALL).strip()


In [5]:
# Connecting to DB
db_client = connect_to_db()
db = db_client["OdysseumDatabase"]

# LLM model
llm_client = create_llm_client()

# Extracting data and useful metadata
locations_data = extract_locations_data(db)
business_data = extract_business_data(db)
business_category_data = extract_business_category_metadata(db)

# business_data = trim_business_data(business_data)   # Comment out when we have bigger model

In [6]:
query = "Give me 2 destinations with only 1 stop. the stop should be a restaurant. i want to go from murree to chitral"

In [57]:
import json

def predict_user_locations_and_optimization(locations_data, query, llm_client, optimization_type="Rating"):
    valid_optimization_types = ["rating", "cost", "distance"]

    system_prompt = f"""
    You are an expert in data extraction and natural language processing. Your task is to figure out the most likely destinations for the user based on their query and the provided list of locations.
    The user wants to prioritize destinations based on {optimization_type}.

    Input Data:
    - List of locations: {json.dumps(locations_data, indent=2, ensure_ascii=False)}
    - User query: "{query}"
    - Optimization Types: {valid_optimization_types}

    Instructions:
    - Analyze the query to determine which locations from the list are most relevant to the user's request.
    - Return a list of the most likely destinations based on the query
    - Return the optimization type. If you cannot figure it out, default to "rating".
    - The response should be a dictionary with the optimization type as key, and a dictionary of location names with coordinates as value.

    Output Format (Strict):
    {{  
        "optimization": "optimization_type"
        "locations": {{
            "Location Name 1": [Latitude1, Longitude1],
            "Location Name 2": [Latitude2, Longitude2],
            ...
        }}
    }}
    """

    response = llm_client.chat.completions.create(
        model="o3-mini",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": query}
        ]
    )

    response_content = response.choices[0].message.content

    return clean_json_response(response_content)  # This should return a dict


In [58]:
predictions = predict_user_locations_and_optimization(locations_data, query, llm_client)
predictions = json.loads(predictions)
optimization = predictions["optimization"]
location_predictions = predictions["locations"]  

print("Optimization Type:", optimization)
print("Location Predictions:", location_predictions)


Optimization Type: rating
Location Predictions: {'[Murree, Punjab]': [73.3943, 33.907], '[Chitral, KPK]': [71.7864, 35.8516]}


In [63]:
STOP_CATEGORIES = ["hotel", "restaurant", "entertainment", "services"]

def predict_stops_and_categories(location_predictions, query, llm_client):
    # System prompt to guide the LLM
    system_prompt = f"""
    You are a travel assistant. Your task is to extract the number of stops, specific stops (locations), and the types of stops based on the provided query and the list of location predictions.

    Input Data:
    - List of predicted locations: {json.dumps(location_predictions, indent=2, ensure_ascii=False)}
    - User query: "{query}"

    Instructions:
    - Based on the query, identify how many stops the user intends to make.
    - Identify the specific stops (locations) from the predicted locations list that match the query.
    - Determine the types of stops (e.g., "hotel", "restaurant", "entertainment", "services") based on the query.

    Output Format (Strict dictionary):
    - A dictionary with the following structure:
    {{
        "number_of_stops": <int>,
        "stops": [
            {{
                "category": "Type of Stop (e.g., hotel, restaurant, entertainment, services)"
            }},
            ...
        ]
    }}
    """

    user_prompt = f"""
    Query: {query}
    Based on the list of predicted locations, please identify:
    1. The number of stops the user intends to make.
    2. The specific stops (locations) from the list that match the user's query.
    3. The category or type of each stop (e.g., hotel, restaurant, entertainment, services).
    """

    response = llm_client.chat.completions.create(
        model="o3-mini", 
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
    )
    response_content = response.choices[0].message.content
    return response_content


In [78]:
stop_predictions = predict_stops_and_categories(location_predictions, query, llm_client)
stop_predictions = json.loads(stop_predictions)
print(stop_predictions)

{'number_of_stops': 1, 'stops': [{'category': 'restaurant'}]}


In [None]:
def get_top_businesses_by_category(business_data, optimization_criteria='rating', top_n=5):
    categorized_businesses = {}
    
    for business_name, details in business_data.items():
        address, category, coordinates, rating = details
        
        if category not in categorized_businesses:
            categorized_businesses[category] = []
        
        # Append business details to the category
        categorized_businesses[category].append({
            'name': business_name.strip('[]'),  
            'address': address,
            'category': category,
            'rating': rating,
            'coordinates': coordinates
        })
    
    def sort_by_criteria(businesses):
        if optimization_criteria.lower() == 'rating':
            return sorted(businesses, key=lambda x: x['rating'], reverse=True)
        elif optimization_criteria.lower() == 'distance':
            return businesses  
        else:
            return sorted(businesses, key=lambda x: x['rating'], reverse=True)
    
    top_businesses = {}
    for category, businesses in categorized_businesses.items():
        sorted_businesses = sort_by_criteria(businesses)
        top_businesses[category] = sorted_businesses[:top_n]
    
    return top_businesses

top_businesses = get_top_businesses_by_category(business_data, optimization)

In [85]:

print("Optimization Type:", optimization)
print("Location Predictions:", location_predictions)
print(stop_predictions)
print(top_businesses)

Optimization Type: rating
Location Predictions: {'[Murree, Punjab]': [73.3943, 33.907], '[Chitral, KPK]': [71.7864, 35.8516]}
{'number_of_stops': 1, 'stops': [{'category': 'restaurant'}]}
{'Restaurant': [{'name': 'Rosemary Restaurant', 'address': 'VQ3Q+V8V, Shahi Masjid Rd, Chitrāl, Pakistan', 'category': 'Restaurant', 'rating': 5.0, 'coordinates': [71.7882861, 35.8547296]}, {'name': 'Akhtar Restaurant', 'address': 'Main Market Rd, Chitrāl, Pakistan', 'category': 'Restaurant', 'rating': 5.0, 'coordinates': [71.7828127, 35.84373]}, {'name': 'Conflux Fast Food', 'address': 'Village Gankorini Singur (Shotkhora), Tehsil &, Chitrāl, 17200, Pakistan', 'category': 'Restaurant', 'rating': 5.0, 'coordinates': [71.8107467, 35.904848]}, {'name': 'Café De Siesta', 'address': 'Mall of swat, GT Rd, Qambar, 19200, Pakistan', 'category': 'Restaurant', 'rating': 5.0, 'coordinates': [72.3174571, 34.7604291]}, {'name': 'Nest by Qubed', 'address': 'Upper Nathia Gali Rd, near St. Matthews Church, Upper, Na

In [97]:
from geopy.distance import geodesic

def generate_itinerary(optimization, location_predictions, stop_predictions, top_businesses):
    """
    Generate an itinerary based on the optimization criteria, locations, stops, and business data.
    Prevents repeating locations in the itinerary.
    
    Args:
        optimization (str): Optimization criteria (e.g., 'rating')
        location_predictions (dict): Dictionary of location names to coordinates
        stop_predictions (dict or str): Dictionary or JSON string with stop information
        top_businesses (dict): Dictionary of businesses by category
        
    Returns:
        str: Formatted itinerary
    """
    # Ensure stop_predictions is a dictionary
    if isinstance(stop_predictions, str):
        try:
            stop_predictions = json.loads(stop_predictions)
        except json.JSONDecodeError:
            # If it's not valid JSON, try to clean it up
            stop_predictions_str = stop_predictions.strip()
            try:
                stop_predictions = json.loads(stop_predictions_str)
            except:
                # If still failing, create a minimal valid structure
                stop_predictions = {
                    "number_of_stops": 1,
                    "stops": [{"category": "restaurant"}]
                }
    
    # Extracting route information
    route_locations = list(location_predictions.keys())
    start_location = route_locations[0].strip('[]').split(',')[0]  
    end_location = route_locations[1].strip('[]').split(',')[0] if len(route_locations) > 1 else None
    
    # Getting coordinates for start and end
    start_coords = location_predictions[route_locations[0]]
    end_coords = location_predictions[route_locations[1]] if len(route_locations) > 1 else None
    
    # Calculating midpoint for better stop selection
    if end_coords:
        midpoint = [(start_coords[1] + end_coords[1])/2, (start_coords[0] + end_coords[0])/2]
    else:
        midpoint = [start_coords[1], start_coords[0]] 
    
    # Track selected businesses to avoid duplicates
    selected_stops = []
    used_locations = set()  # Keep track of locations we've already selected
    
    # Process each requested stop category
    for stop in stop_predictions["stops"]:
        category = stop["category"].capitalize() 
        
        if category in top_businesses:
            scored_candidates = []
            for business in top_businesses[category]:
                # Skip this business if we've already used it or one at the same location
                location_key = f"{business['coordinates'][0]},{business['coordinates'][1]}"
                if location_key in used_locations:
                    continue
                    
                business_coords = [business['coordinates'][1], business['coordinates'][0]]
                try:
                    distance = geodesic(midpoint, business_coords).km
                except:
                    distance = 1000 
                
                normalized_distance = distance / 100 if distance > 0 else 0
                score = business['rating'] - normalized_distance
                
                scored_candidates.append((business, score))
            
            # Sort by score (highest first)
            scored_candidates.sort(key=lambda x: x[1], reverse=True)
            
            # Select the best non-duplicate candidate
            if scored_candidates:
                best_business = scored_candidates[0][0]
                selected_stops.append(best_business)
                
                # Add this location to our used set
                location_key = f"{best_business['coordinates'][0]},{best_business['coordinates'][1]}"
                used_locations.add(location_key)
    
    # Calculate total route distance
    if end_coords:
        try:
            start_geopy = [start_coords[1], start_coords[0]]
            end_geopy = [end_coords[1], end_coords[0]]
            total_distance = geodesic(start_geopy, end_geopy).km
        except:
            total_distance = None
    else:
        total_distance = None
    
    # Generate the itinerary text
    itinerary = f"✨ Journey from {start_location} to {end_location} ✨\n\n"
    itinerary += f"🚩 Starting Point: {start_location}\n"
    itinerary += f"🏁 Destination: {end_location}\n"
    
    if total_distance:
        itinerary += f"📏 Total Distance: {round(total_distance, 1)} km\n"
    
    itinerary += "\n"
    
    if selected_stops:
        itinerary += "📍 Recommended Stops:\n"
        
        for i, stop in enumerate(selected_stops, 1):
            # Calculate distance from starting point
            stop_coords = [stop['coordinates'][1], stop['coordinates'][0]]
            start_geopy = [start_coords[1], start_coords[0]]
            
            try:
                distance_from_start = geodesic(start_geopy, stop_coords).km
                distance_text = f"(~{round(distance_from_start, 1)} km from start)"
            except:
                distance_text = ""
            
            # Formatting locations
            itinerary += f"  {i}. {stop['name']} ({stop['category']}) {distance_text}\n"
            itinerary += f"     Rating: {'⭐' * round(stop['rating'])} ({stop['rating']}/5)\n"
            itinerary += f"     Address: {stop['address']}\n\n"
    else:
        itinerary += "No stops were selected for this itinerary.\n"
    
    itinerary += "Happy travels! 🚗💨"
    
    return itinerary

In [98]:
generated_itinerary = generate_itinerary(optimization, location_predictions, stop_predictions, top_businesses)
print(generated_itinerary)

✨ Journey from Fairy Meadows to Swat ✨

🚩 Starting Point: Fairy Meadows
🏁 Destination: Swat
📏 Total Distance: 197.5 km

📍 Recommended Stops:
  1. Imran Car Rental & Tours Chitral (Services) (~255.7 km from start)
     Rating: ⭐⭐⭐⭐⭐ (5.0/5)
     Address: Ayun, Kalash Valley Road, Chitrāl, 17210, Pakistan

  2. Café De Siesta (Restaurant) (~219.7 km from start)
     Rating: ⭐⭐⭐⭐⭐ (5.0/5)
     Address: Mall of swat, GT Rd, Qambar, 19200, Pakistan

  3. Nest by Qubed (Restaurant) (~186.8 km from start)
     Rating: ⭐⭐⭐⭐⭐ (4.9/5)
     Address: Upper Nathia Gali Rd, near St. Matthews Church, Upper, Nathia Gali, 22290, Pakistan

  4. Rosemary Restaurant (Restaurant) (~257.2 km from start)
     Rating: ⭐⭐⭐⭐⭐ (5.0/5)
     Address: VQ3Q+V8V, Shahi Masjid Rd, Chitrāl, Pakistan

  5. Akhtar Restaurant (Restaurant) (~257.5 km from start)
     Rating: ⭐⭐⭐⭐⭐ (5.0/5)
     Address: Main Market Rd, Chitrāl, Pakistan

  6. Conflux Fast Food (Restaurant) (~256.2 km from start)
     Rating: ⭐⭐⭐⭐⭐ (5.0/5)
 

In [99]:
# query = "Give me 2 destinations with only 1 stop. the stop should be a restaurant. i want to go from murree to chitral"
query = "I want to go from Ferry medows to Muree. I want to visit 5 restaurants in the middle"

# Connecting to DB
db_client = connect_to_db()
db = db_client["OdysseumDatabase"]

# LLM model
llm_client = create_llm_client()

# Extracting data and useful metadata
locations_data = extract_locations_data(db)
business_data = extract_business_data(db)
business_category_data = extract_business_category_metadata(db)

predictions = predict_user_locations_and_optimization(locations_data, query, llm_client)
predictions = json.loads(predictions)
optimization = predictions["optimization"]
location_predictions = predictions["locations"]  
stop_predictions = predict_stops_and_categories(location_predictions, query, llm_client)
stop_predictions = json.loads(stop_predictions)

generated_itinerary = generate_itinerary(optimization, location_predictions, stop_predictions, top_businesses)
print(generated_itinerary)

✨ Journey from Fairy Meadows to Murree ✨

🚩 Starting Point: Fairy Meadows
🏁 Destination: Murree
📏 Total Distance: 201.8 km

📍 Recommended Stops:
  1. Nest by Qubed (Restaurant) (~186.8 km from start)
     Rating: ⭐⭐⭐⭐⭐ (4.9/5)
     Address: Upper Nathia Gali Rd, near St. Matthews Church, Upper, Nathia Gali, 22290, Pakistan

  2. Café De Siesta (Restaurant) (~219.7 km from start)
     Rating: ⭐⭐⭐⭐⭐ (5.0/5)
     Address: Mall of swat, GT Rd, Qambar, 19200, Pakistan

  3. Akhtar Restaurant (Restaurant) (~257.5 km from start)
     Rating: ⭐⭐⭐⭐⭐ (5.0/5)
     Address: Main Market Rd, Chitrāl, Pakistan

  4. Rosemary Restaurant (Restaurant) (~257.2 km from start)
     Rating: ⭐⭐⭐⭐⭐ (5.0/5)
     Address: VQ3Q+V8V, Shahi Masjid Rd, Chitrāl, Pakistan

  5. Conflux Fast Food (Restaurant) (~256.2 km from start)
     Rating: ⭐⭐⭐⭐⭐ (5.0/5)
     Address: Village Gankorini Singur (Shotkhora), Tehsil &, Chitrāl, 17200, Pakistan

Happy travels! 🚗💨
