In [10]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
from scipy.spatial.distance import pdist, squareform
from math import radians, sin, cos, sqrt, atan2

In [11]:
# -------------------------------
# 1. Load and Preprocess Dataset
# -------------------------------
locations = pd.read_csv("../../data/recommendation/location_data.csv")

# Convert numeric columns and fill missing values with 0
locations["Avg_Rating"] = pd.to_numeric(locations["Avg_Rating"], errors='coerce').fillna(0)
locations["Review_Count"] = pd.to_numeric(locations["Review_Count"], errors='coerce').fillna(0)
# Fill missing activities with an empty string
locations['Activities'] = locations['Activities'].fillna('')

# Normalize numerical features using MinMaxScaler
scaler = MinMaxScaler()
locations[["Avg_Rating", "Review_Count"]] = scaler.fit_transform(locations[["Avg_Rating", "Review_Count"]])

In [12]:
# Location type emojis
LOCATION_TYPE_EMOJIS = {
    "Beaches": "🏖️",
    "Beach": "🏖️",
    "Waterfalls": "💦",
    "Waterfall": "💦",
    "National Parks": "🌲",
    "National Park": "🌲",
    "Museums": "🏛️",
    "Museum": "🏛️",
    "Historical Sites": "🏰",
    "Historical Site": "🏰",
    "Temple": "🛕",
    "Temples": "🛕",
    "Religious Site": "🛐",
    "Religious Sites": "🛐",
    "Wildlife": "🦁",
    "Mountains": "⛰️",
    "Mountain": "⛰️",
    "Lakes": "🌊",
    "Lake": "🌊",
    "Gardens": "🌷",
    "Garden": "🌷",
    "Botanical Garden": "🌻",
    "Theme Parks": "🎢",
    "Adventure": "🧗",
    "Rock": "🧗‍♂️",
    "Fortress": "🏯",
    "Fort": "🏯",
    "City": "🏙️",
    "Urban Areas": "🏙️",
    "Market": "🛍️",
    "Markets": "🛍️",
    "Elephant Sanctuary": "🐘",
    "Elephant": "🐘",
    "Forest": "🌳",
    "Reserve": "🌳",
    "Plaza": "🏢",
    "Cave": "🕳️",
    "Point": "📍",
    "Bridge": "🌉",
    "Hill": "🏔️",
    "Landmark": "🗿",
    "Monument": "🗿",
    "Farm": "🚜",
    "Farms": "🚜",
    "Park": "🌳",
    "Nature & Wildlife Areas": "🦓",
}

# Province emojis
PROVINCE_EMOJIS = {
    "Northern Province": "🌅",
    "North Central Province": "🏛️",
    "Eastern Province": "🌊",
    "North Western Province": "🌲",
    "Central Province": "⛰️",
    "Uva Province": "🏞️",
    "Western Province": "🏙️",
    "Sabaragamuwa Province": "🌄",
    "Southern Province": "🏖️",
}

# Default emoji if type not found
DEFAULT_LOCATION_EMOJI = "📍"
DEFAULT_PROVINCE_EMOJI = "🗺️"

# Activity emojis
ACTIVITY_EMOJIS = {
    "swimming": "🏊",
    "hiking": "🥾",
    "trekking": "🏔️",
    "surfing": "🏄",
    "photography": "📸",
    "camping": "⛺",
    "bird watching": "🦅",
    "wildlife spotting": "🦓",
    "exploring": "👀",
    "sightseeing": "👀",
    "shopping": "🛒",
    "dining": "🍴",
    "boating": "🚣",
    "fishing": "🎣",
    "cycling": "🚴",
    "relaxing": "🧘",
    "walking": "🚶",
    "tours": "🧭",
    "scenic views": "🌄",
    "nature walks": "🌿",
    "water sports": "🏄",
    "picnic": "🧺",
    "culture": "🏮",
    "history": "📜",
    "adventure": "🧗",
    "spiritual": "✨",
    "educational": "📚",
    "scenic": "🌅",
    "climbing": "🧗‍♀️",
    "sunset viewing": "🌇",
    "sunrise": "🌅",
    "snorkeling": "🤿",
    "diving": "🪂",
    "kayaking": "🛶"
}

DEFAULT_ACTIVITY_EMOJI = "🎯"

In [13]:
# Helper functions for visual elements
def get_location_emoji(location_type):
    """Get emoji for a location type"""
    if not location_type:
        return DEFAULT_LOCATION_EMOJI

    for key, emoji in LOCATION_TYPE_EMOJIS.items():
        if key.lower() in location_type.lower():
            return emoji

    return DEFAULT_LOCATION_EMOJI

def get_province_emoji(province):
    """Get emoji for a province"""
    return PROVINCE_EMOJIS.get(province, DEFAULT_PROVINCE_EMOJI)

def get_activity_emoji(activity):
    """Get emoji for an activity"""
    activity = activity.strip().lower()
    for key, emoji in ACTIVITY_EMOJIS.items():
        if key in activity:
            return emoji
    return DEFAULT_ACTIVITY_EMOJI

def get_rating_stars(rating):
    """Convert a numerical rating to star emojis"""
    if not isinstance(rating, (int, float)) or np.isnan(rating):
        return "No Rating"

    full_stars = int(rating)
    half_star = "✨" if rating % 1 >= 0.5 else ""
    return "⭐" * full_stars + half_star + f" ({rating:.1f}/5)"

def get_similarity_indicator(similarity):
    """Return color-coded emoji for similarity score"""
    if similarity >= 0.8:
        return "🟢"  # High similarity
    elif similarity >= 0.5:
        return "🟡"  # Medium similarity
    elif similarity >= 0.3:
        return "🟠"  # Low similarity
    else:
        return "🔴"  # Very low similarity

def format_activities(activities_str):
    """Format activities with emojis"""
    if not activities_str or not isinstance(activities_str, str):
        return "No activities listed"

    activities = [act.strip() for act in activities_str.split(",") if act.strip()]
    formatted = []

    for activity in activities:
        emoji = get_activity_emoji(activity)
        formatted.append(f"{emoji} {activity}")

    return ", ".join(formatted)

In [14]:
# -------------------------------
# 2. Compute Similarity Matrices
# -------------------------------

# 2.1. Numerical Similarity using Cosine
cosine_sim_matrix = cosine_similarity(locations[["Avg_Rating", "Review_Count"]].values)
cosine_sim_df = pd.DataFrame(cosine_sim_matrix,
                             index=locations["Location_Name"],
                             columns=locations["Location_Name"])

# 2.2. Categorical Similarity (Jaccard) for Location_Type and Located_Province
def jaccard_similarity(df, columns):

    categorical_data = df[columns].astype(str).apply(lambda x: ','.join(x), axis=1)
    binary_matrix = categorical_data.str.get_dummies(sep=',')
    jaccard_distances = pdist(binary_matrix, metric='jaccard')
    jaccard_sim_matrix = 1 - squareform(jaccard_distances)
    return pd.DataFrame(jaccard_sim_matrix, index=df["Location_Name"], columns=df["Location_Name"])

jaccard_sim_df = jaccard_similarity(locations, ["Location_Type", "Located_Province"])

# 2.3. Geographic Similarity using Haversine Distance
def haversine_distance(lat1, lon1, lat2, lon2):

    R = 6371  # Earth radius in kilometers
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = sin(dlat/2)**2 + cos(lat1)*cos(lat2)*sin(dlon/2)**2
    c = 2 * atan2(sqrt(a), sqrt(1-a))
    return R * c

distance_matrix = np.array([[haversine_distance(lat1, lon1, lat2, lon2)
                             for lat2, lon2 in zip(locations.Latitude, locations.Longitude)]
                            for lat1, lon1 in zip(locations.Latitude, locations.Longitude)])
# Convert distance into similarity: closer locations have higher similarity
distance_sim_matrix = 1 / (1 + distance_matrix)
distance_sim_df = pd.DataFrame(distance_sim_matrix,
                               index=locations["Location_Name"],
                               columns=locations["Location_Name"])

# 2.4. Activities Similarity using Jaccard (binary matching)
def clean_activities(activity_str):

    return set([act.strip().lower() for act in activity_str.split(",") if act.strip()])

def jaccard_similarity_activities(activities_series):

    loc_names = activities_series.index
    n = len(loc_names)
    sim_matrix = np.zeros((n, n))

    # Convert each activities string to a set using the cleaning function
    activity_sets = activities_series.apply(clean_activities)

    for i, loc1 in enumerate(loc_names):
        for j, loc2 in enumerate(loc_names):
            set1 = activity_sets.loc[loc1]
            set2 = activity_sets.loc[loc2]
            # If both sets are empty, treat them as identical
            if not set1 and not set2:
                sim = 1.0
            else:
                sim = len(set1.intersection(set2)) / len(set1.union(set2))
            sim_matrix[i, j] = sim

    return pd.DataFrame(sim_matrix, index=loc_names, columns=loc_names)

activity_sim_df = jaccard_similarity_activities(locations.set_index("Location_Name")["Activities"])

In [15]:
# -------------------------------
# 3. Normalize Similarity Matrices (Optional)
# -------------------------------
cosine_sim_norm = scaler.fit_transform(cosine_sim_df)
jaccard_sim_norm = scaler.fit_transform(jaccard_sim_df)
distance_sim_norm = scaler.fit_transform(distance_sim_df)
activity_sim_norm = scaler.fit_transform(activity_sim_df)

cosine_sim_norm_df = pd.DataFrame(cosine_sim_norm, index=locations["Location_Name"], columns=locations["Location_Name"])
jaccard_sim_norm_df = pd.DataFrame(jaccard_sim_norm, index=locations["Location_Name"], columns=locations["Location_Name"])
distance_sim_norm_df = pd.DataFrame(distance_sim_norm, index=locations["Location_Name"], columns=locations["Location_Name"])
activity_sim_norm_df = pd.DataFrame(activity_sim_norm, index=locations["Location_Name"], columns=locations["Location_Name"])

In [16]:
# -------------------------------
# 4. Hybrid Similarity Calculation
# -------------------------------
# Set weights for each similarity component (adjust as needed)
alpha, beta, gamma, delta = 0.2, 0.4, 0.2, 0.2
hybrid_sim_matrix = (alpha * jaccard_sim_norm_df) + \
                    (beta * cosine_sim_norm_df) + \
                    (gamma * distance_sim_norm_df) + \
                    (delta * activity_sim_norm_df)

def recommend_hybrid(location_name, top_n=5):

    if location_name not in hybrid_sim_matrix.index:
        return f"Location '{location_name}' not found in dataset."

    similar_locs = hybrid_sim_matrix[location_name].sort_values(ascending=False).iloc[1:top_n + 1]
    details = []
    for loc in similar_locs.index:
        details.append({
            "Location": loc,
            "Hybrid_Similarity": round(hybrid_sim_matrix.at[loc, location_name], 4),
            "Categorical_Jaccard": round(jaccard_sim_norm_df.at[loc, location_name], 4),
            "Numerical_Cosine": round(cosine_sim_norm_df.at[loc, location_name], 4),
            "Geographic_Similarity": round(distance_sim_norm_df.at[loc, location_name], 4),
            "Activities_Jaccard": round(activity_sim_norm_df.at[loc, location_name], 4)
        })
    return pd.DataFrame(details)

In [17]:
# -------------------------------
# 5. Additional Recommendation Functions
# -------------------------------

def recommend_by_activities(user_activities, top_n=5):

    # Clean user activities
    user_set = set([act.strip().lower() for act in user_activities])

    scores = {}
    for idx, row in locations.iterrows():
        loc_set = clean_activities(row["Activities"])
        if not user_set and not loc_set:
            sim = 1.0
        else:
            union = user_set.union(loc_set)
            sim = len(user_set.intersection(loc_set)) / len(union) if union else 0.0
        scores[row["Location_Name"]] = sim

    scores_df = pd.DataFrame(scores.items(), columns=["Location", "Activities_Jaccard_Similarity"])
    scores_df = scores_df.sort_values("Activities_Jaccard_Similarity", ascending=False)
    return scores_df.head(top_n)

def filter_locations_by_type(location_type):

    filtered = locations[locations["Location_Type"].astype(str).str.contains(location_type, case=False, na=False)]
    return filtered[["Location_Name", "Location_Type", "Located_Province", "Avg_Rating"]]

def recommend_custom(user_activities=None, location_type=None, min_rating=None, top_n=5):

    df = locations.copy()

    # Apply location type filter if provided
    if location_type:
        df = df[df["Location_Type"].astype(str).str.contains(location_type, case=False, na=False)]

    # Apply minimum rating filter if provided
    if min_rating is not None:
        df = df[df["Avg_Rating"] >= min_rating]

    # If no user activities provided, return filtered locations sorted by rating
    if not user_activities:
        return df.sort_values("Avg_Rating", ascending=False)[["Location_Name", "Location_Type", "Avg_Rating"]].head(top_n)

    # Clean user activities
    user_set = set([act.strip().lower() for act in user_activities])
    scores = {}
    for idx, row in df.iterrows():
        loc_set = clean_activities(row["Activities"])
        if not user_set and not loc_set:
            sim = 1.0
        else:
            union = user_set.union(loc_set)
            sim = len(user_set.intersection(loc_set)) / len(union) if union else 0.0
        scores[row["Location_Name"]] = sim

    scores_df = pd.DataFrame(scores.items(), columns=["Location", "Activities_Jaccard_Similarity"])
    scores_df = scores_df.sort_values("Activities_Jaccard_Similarity", ascending=False)
    return scores_df.head(top_n)

def recommend_nearby(location_name, radius_km=50, num_locations=5):

    target = locations[locations["Location_Name"] == location_name]
    if target.empty:
        raise ValueError(f"Location '{location_name}' not found in the dataset.")

    target_lat = target.iloc[0]["Latitude"]
    target_long = target.iloc[0]["Longitude"]

    nearby = []
    for idx, row in locations.iterrows():
        if row["Location_Name"] == location_name:
            continue
        dist = haversine_distance(target_lat, target_long, row["Latitude"], row["Longitude"])
        if dist <= radius_km:
            nearby.append({
                "Location": row["Location_Name"],
                "Distance_km": round(dist, 2),
                "Avg_Rating": row["Avg_Rating"],
                "Location_Type": row["Location_Type"]
            })

    nearby_df = pd.DataFrame(nearby).sort_values("Distance_km")
    return nearby_df.head(num_locations)

def get_itinerary_plan(location_name, included_provinces=None, top_n=5, radius_km=50):

    # We need to access the original rating data before normalization
    original_data = pd.read_csv("../../data/recommendation/location_data.csv")

    # Check if location exists
    target = original_data[original_data["Location_Name"] == location_name]
    if target.empty:
        return f"❌ Location '{location_name}' not found in the dataset."

    # Get location details from original data
    target_info = target.iloc[0]
    current_province = target_info["Located_Province"]

    # Get emojis for location
    loc_type_emoji = get_location_emoji(target_info["Location_Type"])
    province_emoji = get_province_emoji(current_province)

    # If no provinces specified, use current province plus neighboring provinces
    # (In a real system, you'd have a mapping of neighboring provinces)
    if included_provinces is None:
        included_provinces = [current_province]

    # Create a beautiful header
    output = "\n" + "-" * 80 + "\n"
    output += f"= PERSONALIZED TRAVEL RECOMMENDATIONS =".center(80) + "\n"
    output += "-" * 80 + "\n\n"

    # Location details section with emojis
    output += f"{loc_type_emoji} LOCATION: {location_name}\n"
    output += "=" * 80 + "\n\n"

    # Location details - using original, non-normalized data
    output += f"📌 LOCATION DETAILS\n"
    output += f"{'-' * 30}\n"

    # Type with emoji
    output += f"📍 Type: {loc_type_emoji} {target_info['Location_Type']}\n"

    # Province with emoji
    output += f"🏞️ Province: {province_emoji} {target_info['Located_Province']}\n"

    # Rating with stars
    rating = target_info['Avg_Rating']
    output += f"⭐ Rating: {get_rating_stars(rating)}\n"

    # Activities with emojis
    activities = target_info['Activities']
    output += f"🎯 Activities: {format_activities(activities)}\n\n"

    # Add included provinces info with emojis
    output += f"🗺️ PROVINCES INCLUDED IN THIS PLAN\n"
    output += f"{'-' * 30}\n"
    for province in included_provinces:
        province_emoji = get_province_emoji(province)
        output += f"• {province_emoji} {province}\n"
    output += "\n"

    # 1. Hybrid recommendations - best overall alternatives
    if location_name in hybrid_sim_matrix.index:
        similar_locs = recommend_hybrid(location_name, top_n=top_n)
        # Filter for included provinces
        filtered_similar_locs = []
        for _, row in similar_locs.iterrows():
            loc_data = original_data[original_data["Location_Name"] == row["Location"]]
            if not loc_data.empty and loc_data.iloc[0]["Located_Province"] in included_provinces:
                filtered_similar_locs.append({
                    "Location": row["Location"],
                    "Hybrid_Similarity": row["Hybrid_Similarity"],
                    "Avg_Rating": loc_data.iloc[0]["Avg_Rating"],
                    "Located_Province": loc_data.iloc[0]["Located_Province"],
                    "Location_Type": loc_data.iloc[0]["Location_Type"]
                })

        output += f"🌟 TOP RECOMMENDED PLACES (HYBRID SYSTEM)\n"
        output += f"{'-' * 80}\n"

        if filtered_similar_locs:
            for i, row in enumerate(filtered_similar_locs[:top_n]):
                # Get emojis for type and province
                loc_emoji = get_location_emoji(row['Location_Type'])
                prov_emoji = get_province_emoji(row['Located_Province'])

                # Add similarity indicator
                sim_indicator = get_similarity_indicator(row['Hybrid_Similarity'])

                # Format stars for rating
                stars = get_rating_stars(row['Avg_Rating'])

                # Create a clean, well-formatted recommendation line
                output += f"{i+1}. {loc_emoji} {row['Location']}\n"
                output += f"   📊 Match: {sim_indicator} {row['Hybrid_Similarity']:.2f}\n"
                output += f"   ⭐ Rating: {stars}\n"
                output += f"   🏞️ Province: {prov_emoji} {row['Located_Province']}\n"

                if i < len(filtered_similar_locs[:top_n]) - 1:
                    output += f"   {'-' * 40}\n"
        else:
            output += "No matching locations found in the included provinces.\n"

        output += "\n"

    # 2. Similar location types - from original data, filtered by included provinces
    loc_type = target_info["Location_Type"]
    type_locs = original_data[
        (original_data["Location_Type"] == loc_type) &
        (original_data["Located_Province"].isin(included_provinces)) &
        (original_data["Location_Name"] != location_name)  # Skip reference location
        ].sort_values("Avg_Rating", ascending=False).head(top_n)

    output += f"{loc_type_emoji} SIMILAR {loc_type.upper()} LOCATIONS\n"
    output += f"{'-' * 80}\n"

    if not type_locs.empty:
        for i, row in enumerate(type_locs.iterrows()):
            row = row[1]  # Get the Series from the tuple
            # Get location province emoji
            prov_emoji = get_province_emoji(row['Located_Province'])

            # Format stars for rating
            stars = get_rating_stars(row['Avg_Rating'])

            # Format with cleaner layout
            output += f"{i+1}. {loc_type_emoji} {row['Location_Name']}\n"
            output += f"   ⭐ Rating: {stars}\n"
            output += f"   🏞️ Province: {prov_emoji} {row['Located_Province']}\n"

            if i < len(type_locs) - 1:
                output += f"   {'-' * 40}\n"
    else:
        output += f"No other {loc_type} locations found in the included provinces.\n"

    output += "\n"

    # 3. Places nearby - must use the normalized data for distance calculation but display original ratings
    nearby_locs = recommend_nearby(location_name, radius_km=radius_km, num_locations=top_n*2)  # Get more than needed
    output += f"📏 NEARBY LOCATIONS (within {radius_km}km)\n"
    output += f"{'-' * 80}\n"

    count = 0
    displayed_nearby = []

    for _, row in nearby_locs.iterrows():
        # Get original rating and province
        loc_data = original_data[original_data["Location_Name"] == row["Location"]]
        if not loc_data.empty:
            orig_rating = loc_data.iloc[0]["Avg_Rating"]
            province = loc_data.iloc[0]["Located_Province"]
            loc_type = loc_data.iloc[0]["Location_Type"]

            # Only include if province is in the included list
            if province in included_provinces:
                # Get emojis
                loc_emoji = get_location_emoji(loc_type)
                prov_emoji = get_province_emoji(province)

                # Get distance indicator
                if row['Distance_km'] < 10:
                    dist_indicator = "🟢"  # Very close
                elif row['Distance_km'] < 25:
                    dist_indicator = "🟡"  # Moderately close
                else:
                    dist_indicator = "🟠"  # Farther away

                # Format rating
                stars = get_rating_stars(orig_rating)

                displayed_nearby.append({
                    "name": row["Location"],
                    "emoji": loc_emoji,
                    "type": loc_type,
                    "distance": row['Distance_km'],
                    "distance_indicator": dist_indicator,
                    "rating": stars,
                    "province": province,
                    "province_emoji": prov_emoji
                })

                count += 1
                if count >= top_n:
                    break

    if displayed_nearby:
        for i, place in enumerate(displayed_nearby):
            output += f"{i+1}. {place['emoji']} {place['name']}\n"
            output += f"   📏 Distance: {place['distance_indicator']} {place['distance']:.1f}km away\n"
            output += f"   📍 Type: {place['type']}\n"
            output += f"   ⭐ Rating: {place['rating']}\n"
            output += f"   🏞️ Province: {place['province_emoji']} {place['province']}\n"

            if i < len(displayed_nearby) - 1:
                output += f"   {'-' * 40}\n"
    else:
        output += "No nearby locations found within the included provinces.\n"

    output += "\n"

    # 4. Highly rated places - only from included provinces
    high_rated = original_data[
        (original_data["Located_Province"].isin(included_provinces)) &
        (original_data["Location_Name"] != location_name)
        ].sort_values("Avg_Rating", ascending=False).head(top_n)

    output += f"⭐ TOP-RATED ATTRACTIONS IN INCLUDED PROVINCES\n"
    output += f"{'-' * 80}\n"

    if not high_rated.empty:
        for i, row in enumerate(high_rated.iterrows()):
            row = row[1]  # Get the Series from the tuple
            # Get emojis
            loc_emoji = get_location_emoji(row['Location_Type'])
            prov_emoji = get_province_emoji(row['Located_Province'])

            # Format rating
            stars = get_rating_stars(row['Avg_Rating'])

            output += f"{i+1}. {loc_emoji} {row['Location_Name']}\n"
            output += f"   ⭐ Rating: {stars}\n"
            output += f"   📍 Type: {row['Location_Type']}\n"
            output += f"   🏞️ Province: {prov_emoji} {row['Located_Province']}\n"

            if i < len(high_rated) - 1:
                output += f"   {'-' * 40}\n"
    else:
        output += "No highly rated locations found in the included provinces.\n"

    output += "\n"

    # 5. Locations with similar activities - filtered by included provinces
    target_activities = target_info["Activities"].split(",") if isinstance(target_info["Activities"], str) and target_info["Activities"] else []
    if target_activities:
        activity_locs = recommend_by_activities(target_activities, top_n=top_n*3)  # Get more than needed
        output += f"🎯 PLACES WITH SIMILAR ACTIVITIES\n"
        output += f"{'-' * 80}\n"

        count = 0
        displayed_activities = []

        for _, row in activity_locs.iterrows():
            if row["Location"] != location_name:  # Skip the reference location
                # Get original data
                loc_data = original_data[original_data["Location_Name"] == row["Location"]]
                if not loc_data.empty:
                    orig_rating = loc_data.iloc[0]["Avg_Rating"]
                    province = loc_data.iloc[0]["Located_Province"]
                    loc_type = loc_data.iloc[0]["Location_Type"]
                    activities = loc_data.iloc[0]["Activities"]

                    # Only include if province is in the included list
                    if province in included_provinces:
                        # Get emojis
                        loc_emoji = get_location_emoji(loc_type)
                        prov_emoji = get_province_emoji(province)

                        # Match score indicator
                        match_indicator = get_similarity_indicator(row['Activities_Jaccard_Similarity'])

                        # Format the rating
                        stars = get_rating_stars(orig_rating)

                        # Get common activities
                        target_act_set = clean_activities(target_info["Activities"])
                        loc_act_set = clean_activities(activities)
                        common_activities = target_act_set.intersection(loc_act_set)

                        displayed_activities.append({
                            "name": row["Location"],
                            "emoji": loc_emoji,
                            "type": loc_type,
                            "match": row['Activities_Jaccard_Similarity'],
                            "match_indicator": match_indicator,
                            "rating": stars,
                            "province": province,
                            "province_emoji": prov_emoji,
                            "common_activities": common_activities
                        })

                        count += 1
                        if count >= top_n:
                            break

        if displayed_activities:
            for i, place in enumerate(displayed_activities):
                output += f"{i+1}. {place['emoji']} {place['name']}\n"
                output += f"   🎯 Activity Match: {place['match_indicator']} {place['match']:.2f}\n"
                output += f"   ⭐ Rating: {place['rating']}\n"
                output += f"   📍 Type: {place['type']}\n"
                output += f"   🏞️ Province: {place['province_emoji']} {place['province']}\n"

                # Format common activities with emojis (up to 3)
                if place['common_activities']:
                    common_acts = []
                    for i, act in enumerate(place['common_activities']):
                        if i >= 3:  # Limit to 3 activities
                            common_acts.append("...")
                            break
                        emoji = get_activity_emoji(act)
                        common_acts.append(f"{emoji} {act}")

                    common_str = ", ".join(common_acts)
                    output += f"   🔄 Common Activities: {common_str}\n"

                if i < len(displayed_activities) - 1:
                    output += f"   {'-' * 40}\n"
        else:
            output += "No locations with similar activities found within the included provinces.\n"

    return output

In [18]:
if __name__ == "__main__":

    # Get itinerary plan for a location with specific provinces included
    plan = get_itinerary_plan(
        location_name="Galle Fort",
        included_provinces=["Central Province", "Southern Province", "Western Province"],
        top_n=5
    )

    print(plan)


--------------------------------------------------------------------------------
                    = PERSONALIZED TRAVEL RECOMMENDATIONS =                     
--------------------------------------------------------------------------------

📍 LOCATION: Galle Fort

📌 LOCATION DETAILS
------------------------------
📍 Type: 📍 Historic Sites
🏞️ Province: 🏖️ Southern Province
⭐ Rating: ⭐⭐⭐⭐ (4.5/5)
🎯 Activities: 👀 ['Sightseeing', 📸 'Photography', 📜 'Learning History', 🧭 'Cultural Tours']

🗺️ PROVINCES INCLUDED IN THIS PLAN
------------------------------
• ⛰️ Central Province
• 🏖️ Southern Province
• 🏙️ Western Province

🌟 TOP RECOMMENDED PLACES (HYBRID SYSTEM)
--------------------------------------------------------------------------------
1. 📍 Sigiriya The Ancient Rock Fortress
   📊 Match: 🟡 0.66
   ⭐ Rating: ⭐⭐⭐⭐✨ (4.7/5)
   🏞️ Province: ⛰️ Central Province

📍 SIMILAR HISTORIC SITES LOCATIONS
--------------------------------------------------------------------------------
1. 📍 Sigiriy