In [5]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import euclidean_distances

# Load Dataset
data_file = "E:/DATA/PythonProgram/SR/destinasi-wisata-indonesia-preprocessed.xlsx"
data = pd.read_excel(data_file)

# Preprocessing Function
def preprocess_text(text):
    """Preprocess input text for comparison."""
    import re
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    text = re.sub(r'\s+', ' ', text).strip()  # Remove extra spaces
    return text

# Evaluation Function
def evaluate_precision_recall(recommendations, ground_truth, N):
    """
    Calculate Precision@N and Recall@N.

    Parameters:
    - recommendations: List Tempat Rekomendasi dengan Place_Id.
    - ground_truth: List Tempat Relevant dengan Place_Id.
    - N & M: Jumlah rekomendasi yang ditampilkan untuk evaluasi.

    Returns:
    - Precision@N
    - Recall@N
    """
    top_n_recommendations = recommendations[:N]
    relevant_items = set(ground_truth)

    retrieved_relevant = [item for item in top_n_recommendations if item in relevant_items]

    precision_at_n = len(retrieved_relevant) / N if N > 0 else 0
    recall_at_n = len(retrieved_relevant) / len(relevant_items) if len(relevant_items) > 0 else 0

    return precision_at_n, recall_at_n

# Contoh Penggunaan
city = "Bandung"
category = "Taman Hiburan"
max_price = 500000
user_description = "tempat wisata keluarga dengan banyak permainan seru"

# Filtering dan Proses Data
filtered_data = data[
    (data['City'] == city) &
    (data['Category'] == category) &
    (data['Price'] <= max_price)
]

if not filtered_data.empty and 'Processed_Description' in filtered_data.columns:
    # Inisialisasi Vectorizer
    vectorizer = TfidfVectorizer()

    # Text Similarity
    processed_description = preprocess_text(user_description)
    tfidf_matrix = vectorizer.fit_transform(
        [processed_description] + filtered_data['Processed_Description'].tolist()
    )
    text_similarities = 1 / (1 + euclidean_distances(tfidf_matrix[0:1], tfidf_matrix[1:]).flatten())

    # Menambahkan Similarity ke Data
    filtered_data = filtered_data.copy()
    filtered_data['Description_Similarity'] = text_similarities

    # Sorting Similarity secara descending
    filtered_data = filtered_data.sort_values(by=['Description_Similarity'], ascending=False)

    # Mengambil rekomendasi Place_Id
    recommendations = filtered_data['Place_Id'].tolist()

    # Ground Truth
    ground_truth = data[(data['City'] == city) & (data['Category'] == category)]['Place_Id'].tolist()

    # Evaluasi Precision@N and Recall@N
    N = 5
    precision_at_n, recall_at_n = evaluate_precision_recall(recommendations, ground_truth, N)
    
    # Evaluasi Precision@M and Recall@M
    M = 3
    precision_at_m, recall_at_m = evaluate_precision_recall(recommendations, ground_truth, M)

    print(f"Top 5 Recommendations for {category} in {city} with max price {max_price}:")
    print(f"Precision@{N}: {precision_at_n}")
    print(f"Recall@{N}: {recall_at_n}")
    print()
    
    print(f"Top 3 Recommendations for {category} in {city} with max price {max_price}:")
    print(f"Precision@{M}: {precision_at_m}")
    print(f"Recall@{M}: {recall_at_m}")
else:
    print("No recommendations available based on the filters.")

Top 5 Recommendations for Taman Hiburan in Bandung with max price 500000:
Precision@5: 1.0
Recall@5: 0.1282051282051282

Top 3 Recommendations for Taman Hiburan in Bandung with max price 500000:
Precision@3: 1.0
Recall@3: 0.07692307692307693
