In [9]:
# Import necessary libraries
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore", message=".*does not have valid feature names.*")

# Set a random seed for reproducibility
RANDOM_SEED = 42

# Load and preprocess the dataset
data = pd.read_csv("destinations_with_coordinates.csv")

# Scale features for clustering
scaler = MinMaxScaler()
data[['culture', 'adventure', 'wildlife', 'sightseeing', 'history']] = scaler.fit_transform(
    data[['culture', 'adventure', 'wildlife', 'sightseeing', 'history']]
)

# Define the feature set for clustering
features = data[['culture', 'adventure', 'wildlife', 'sightseeing', 'history']]

# Perform KMeans clustering
optimal_k = 5  # Adjust based on your elbow method results
kmeans = KMeans(n_clusters=optimal_k, random_state=RANDOM_SEED)
data['cluster'] = kmeans.fit_predict(features)

# Function to recommend destinations
# Based on cluster and cosine similarity
def recommend_destinations(user_preferences, data, top_n=5):
    # Convert user preferences into a DataFrame with feature names
    user_df = pd.DataFrame([user_preferences], columns=features.columns)
    user_df = scaler.transform(user_df)  # Scale the user preferences

    # Assign the user to the closest cluster
    user_cluster = kmeans.predict(user_df)[0]

    # Filter data to only include destinations in the same cluster
    cluster_data = data[data['cluster'] == user_cluster]

    # Compute cosine similarity between the user and destinations in the cluster
    cluster_features = cluster_data[['culture', 'adventure', 'wildlife', 'sightseeing', 'history']]
    similarities = cosine_similarity(user_df, cluster_features)[0]

    # Add similarity scores to the cluster data
    cluster_data = cluster_data.copy()
    cluster_data['similarity'] = similarities

    # Sort by similarity and return the top N recommendations
    recommendations = cluster_data.sort_values(by='similarity', ascending=False).head(top_n)
    return recommendations[['pName', 'similarity']]


# Example usage
user_preferences = {
    "culture": 0.8,
    "adventure": 0.6,
    "wildlife": 0.4,
    "sightseeing": 0.9,
    "history": 0.7
}

# Get top 5 recommendations
recommendations = recommend_destinations(user_preferences, data, top_n=5)
print("Recommended Destinations:")
print(recommendations)


Recommended Destinations:
                  pName  similarity
473       Guerilla Trek    0.994274
483  jauljibi,dharchula    0.991837
331       Devchuli Hill    0.989096
308               Jagat    0.985207
28              Bhojpur    0.979160
