# Download dataset and libraries

In [3]:
# Specify the dataset path
dataset_path = "aprabowo/indonesia-tourism-destination"

# Download the dataset
!kaggle datasets download -d {dataset_path}

# Unzip the downloaded file
!unzip indonesia-tourism-destination.zip


Dataset URL: https://www.kaggle.com/datasets/aprabowo/indonesia-tourism-destination
License(s): copyright-authors
Downloading indonesia-tourism-destination.zip to /content
  0% 0.00/158k [00:00<?, ?B/s]
100% 158k/158k [00:00<00:00, 52.6MB/s]
Archive:  indonesia-tourism-destination.zip
  inflating: package_tourism.csv     
  inflating: tourism_rating.csv      
  inflating: tourism_with_id.csv     
  inflating: user.csv                


In [4]:
!pip install googlemaps

Collecting googlemaps
  Downloading googlemaps-4.10.0.tar.gz (33 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: googlemaps
  Building wheel for googlemaps (setup.py) ... [?25l[?25hdone
  Created wheel for googlemaps: filename=googlemaps-4.10.0-py3-none-any.whl size=40715 sha256=8871152db76ec8eabe36fe53800fee6286a0d0750b11e00e9170d192154ddaea
  Stored in directory: /root/.cache/pip/wheels/17/f8/79/999d5d37118fd35d7219ef57933eb9d09886c4c4503a800f84
Successfully built googlemaps
Installing collected packages: googlemaps
Successfully installed googlemaps-4.10.0


# Import Library

In [5]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Flatten, Dot, Dense, Add
from tensorflow.keras.models import Model
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error
import googlemaps

# Preprocessing dataset

In [6]:
# Load dataset
place = pd.read_csv('tourism_with_id.csv')
rating = pd.read_csv('tourism_rating.csv')
user = pd.read_csv('user.csv')

In [7]:
# Check for missing values
print(place.isnull().sum())
print(rating.isnull().sum())
print(user.isnull().sum())

Place_Id          0
Place_Name        0
Description       0
Category          0
City              0
Price             0
Rating            0
Time_Minutes    232
Coordinate        0
Lat               0
Long              0
Unnamed: 11     437
Unnamed: 12       0
dtype: int64
User_Id          0
Place_Id         0
Place_Ratings    0
dtype: int64
User_Id     0
Location    0
Age         0
dtype: int64


In [8]:
# Drop column that are not used
place = place.drop(['Unnamed: 11', 'Unnamed: 12'], axis=1)

# Drop duplicate values
rating.drop_duplicates(inplace=True)

In [9]:
# Calculate the median 'Time_Minutes' for each location
city_median = place.groupby('City')['Time_Minutes'].median()

# Fill in the blank value based on the median location
place['Time_Minutes'] = place.apply(
    lambda row: city_median[row['City']] if pd.isnull(row['Time_Minutes']) else row['Time_Minutes'],
    axis=1
)

In [10]:
# Combining the dataset
merge_data = pd.merge(rating, place[['Place_Id', 'Rating', 'Place_Name', 'Description', 'Category', 'City', 'Price', 'Time_Minutes', 'Coordinate', 'Lat', 'Long']], on='Place_Id', how='left')

# Calculating the mean rating for each place
merge_data = merge_data.groupby('Place_Id').agg(
    Mean_Rating=('Place_Ratings', 'mean'),
    Rating=('Rating', 'first'),
    Place_Name=('Place_Name', 'first'),
    Description=('Description', 'first'),
    Category=('Category', 'first'),
    City=('City', 'first'),
    Price=('Price', 'first'),
    Time_Minutes=('Time_Minutes', 'first'),
    Coordinate=('Coordinate', 'first'),
    Lat=('Lat', 'first'),
    Long=('Long', 'first')
).reset_index()

merge_data.head()

Unnamed: 0,Place_Id,Mean_Rating,Rating,Place_Name,Description,Category,City,Price,Time_Minutes,Coordinate,Lat,Long
0,1,3.722222,4.6,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153
1,2,2.84,4.6,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125
2,3,2.526316,4.6,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Taman Hiburan,Jakarta,270000,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-6.125312,106.833538
3,4,2.857143,4.5,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Taman Hiburan,Jakarta,10000,90.0,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-6.302446,106.895156
4,5,3.52,4.5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,Jakarta,94000,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-6.12419,106.839134


# Filter

In [11]:
def filter_places(city=None, categories=None):
    """
    Filter places based on city and categories.

    Args:
        city: City name to filter places (optional).
        categories: List of categories to filter places (optional).

    Returns:
        Filtered DataFrame.
    """
    print(f"Filtering places - City: {city}, Categories: {categories}")

    filtered = merge_data.copy()
    if city:
        filtered = filtered[filtered['City'].str.contains(city, case=False, na=False)]

    if categories:
        print(f"Categories before filtering: {filtered['Category'].unique()}")
        filtered = filtered[filtered['Category'].isin(categories)]
        print(f"Categories after filtering: {filtered['Category'].unique()}")
        print(f"Number of places after category filtering: {len(filtered)}")

    return filtered

# CF

In [12]:
# Prepare the data
user_ids = rating['User_Id'].unique().tolist()
place_ids = merge_data['Place_Id'].unique().tolist()

user_id_to_index = {user_id: index for index, user_id in enumerate(user_ids)}
place_id_to_index = {place_id: index for index, place_id in enumerate(place_ids)}

rating['User_Index'] = rating['User_Id'].map(user_id_to_index)
rating['Place_Index'] = rating['Place_Id'].map(place_id_to_index)

num_users = len(user_ids)
num_places = len(place_ids)
embedding_size = 50

In [13]:
# Define the CF Model
user_input = Input(shape=(1,))
user_embedding = Embedding(num_users, embedding_size, embeddings_regularizer=tf.keras.regularizers.l2(1e-6))(user_input)
user_vec = Flatten()(user_embedding)

place_input = Input(shape=(1,))
place_embedding = Embedding(num_places, embedding_size, embeddings_regularizer=tf.keras.regularizers.l2(1e-6))(place_input)
place_vec = Flatten()(place_embedding)

dot_product = Dot(axes=1)([user_vec, place_vec])

# Add bias terms for users and places
user_bias = Embedding(num_users, 1)(user_input)
user_bias = Flatten()(user_bias)

place_bias = Embedding(num_places, 1)(place_input)
place_bias = Flatten()(place_bias)

prediction = Add()([dot_product, user_bias, place_bias])

In [14]:
# Define the Model
cf_model = Model([user_input, place_input], prediction)
cf_model.compile(optimizer='adam', loss='mean_squared_error')

In [15]:
# Train the model
cf_model.fit(
    [rating['User_Index'], rating['Place_Index']],
    rating['Place_Ratings'],
    epochs=20,
    verbose=1
)

Epoch 1/20
[1m311/311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 10.9242
Epoch 2/20
[1m311/311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 9.8397
Epoch 3/20
[1m311/311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 6.8258
Epoch 4/20
[1m311/311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 2.8735
Epoch 5/20
[1m311/311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.8544
Epoch 6/20
[1m311/311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.7238
Epoch 7/20
[1m311/311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 1.6546
Epoch 8/20
[1m311/311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.5904
Epoch 9/20
[1m311/311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.4802
Epoch 10/20
[1m311/311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - l

<keras.src.callbacks.history.History at 0x7e4b60c08be0>

In [16]:
def predict_ratings(user_id, place_ids):
    user_index = user_id_to_index[user_id]
    place_indices = [place_id_to_index[place_id] for place_id in place_ids if place_id in place_id_to_index]
    predictions = cf_model.predict([np.array([user_index] * len(place_indices)), np.array(place_indices)])
    return predictions.flatten()

# CBF

In [17]:
def calculate_cbf_scores(filtered_places):
    """
    Calculate content-based filtering (CBF) scores for the places based on categories or descriptions.

    Args:
        filtered_places: DataFrame of places to filter based on categories.

    Returns:
        A DataFrame of places with their CBF scores and other relevant details.
    """
    # Check if filtered_places is empty
    if filtered_places.empty:
        print("Warning: No places found after filtering!")
        return pd.DataFrame(columns=['Place_Id', 'name', 'category', 'similarity_score'])

    # Ensure 'Features' column creation works
    filtered_places['Features'] = filtered_places['Place_Name'] + ' ' + filtered_places['Category']

    # Vectorize the features using TF-IDF Vectorizer
    vectorizer = TfidfVectorizer(stop_words='english')  # Exclude common English stop words

    try:
        tfidf_matrix = vectorizer.fit_transform(filtered_places['Features'])
    except ValueError as e:
        print(f"Error in vectorization: {e}")
        print(f"Unique features: {filtered_places['Features'].unique()}")
        return pd.DataFrame(columns=['Place_Id', 'name', 'category', 'similarity_score'])

    # Compute cosine similarity between places based on the TF-IDF vectors
    cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

    # For each place, recommend places with highest similarity scores (top 10)
    recommendations = []
    for idx in range(len(filtered_places)):
        # Ensure we're not going out of bounds when selecting the top 10 similar places
        num_similar = min(10, len(filtered_places))  # Avoid out-of-bounds indices
        similar_indices = cosine_sim[idx].argsort()[-(num_similar -1):][::-1]  # Get top N similar places

        for similar_idx in similar_indices:
            if similar_idx != idx:  # Avoid recommending the same place
                recommendations.append({
                    'Place_Id': filtered_places.iloc[similar_idx]['Place_Id'],
                    'name': filtered_places.iloc[similar_idx]['Place_Name'],
                    'category': filtered_places.iloc[similar_idx]['Category'],
                    'similarity_score': cosine_sim[idx][similar_idx]
                })

    # If no recommendations found
    if not recommendations:
        print("No recommendations could be generated!")
        return pd.DataFrame(columns=['Place_Id', 'name', 'category', 'similarity_score'])

    return pd.DataFrame(recommendations)

# Distance Calculation (GMaps API)

In [None]:
# Function to calculate distance using Google Maps API
def calculate_distance(start_lat, start_lng, end_lat, end_lng):
    gmaps_api_key = ''
    gmaps = googlemaps.Client(key=gmaps_api_key)
    origin = (start_lat, start_lng)
    destination = (end_lat, end_lng)

    # Get distance matrix from Google Maps API
    result = gmaps.distance_matrix(origin, destination)

    try:
        # Get distance matrix from Google Maps API
        result = gmaps.distance_matrix(origin, destination)

        # Check if the result contains the expected data
        if (result and
            'rows' in result and
            result['rows'] and
            'elements' in result['rows'][0] and
            result['rows'][0]['elements'] and
            'distance' in result['rows'][0]['elements'][0] and
            'duration' in result['rows'][0]['elements'][0]):

            distance_km = result['rows'][0]['elements'][0]['distance']['value'] / 1000  # Convert meters to km
            travel_time = result['rows'][0]['elements'][0]['duration']['value'] / 60
            return distance_km, travel_time
        else:
            print(f"Incomplete distance matrix result for origin: {origin}, destination: {destination}")
            return None

    except Exception as e:
        print(f"Error calculating distance for origin: {origin}, destination: {destination}")
        print(f"Error details: {str(e)}")
        return None

# Recommendation Model

In [33]:
def recommend_tourist_destinations(
    user_id, user_lat, user_lng, user_city, user_categories,
    days=None, time=8, budget=None, is_new_user=False
    ):
    """
    Recommend tourist destinations with sequential distance calculation,
    resetting to original starting point each day.

    Args:
        user_id: The ID of the user for whom the recommendations are being made.
        user_lat (float): Latitude of the user's starting location.
        user_lng (float): Longitude of the user's starting location.
        user_city (str): City preference for the user.
        user_categories: Categories filter (if any).
        days: Number of days for splitting recommendations (if applicable).
        time (float): Fixed daily time limit set to 8 hours.
        budget (float): Budget preference for the user (if applicable).

    Returns:
        A list of recommended destinations for each day, total time used, and total budget spent.
    """

    # Check if existing user
    user_exists = user_id in rating['User_Id'].unique()

    # Determine categories
    if user_categories is None:
      if user_exists:
        # Get user's past ratings
        user_ratings = rating[rating['User_Id'] == user_id]

        # Merge ratings with place data to get categories
        user_rated_places = pd.merge(rating, merge_data[['Place_Id', 'Category']], on='Place_Id', how='left')

        # Count category frequencies and sort
        category_counts = user_rated_places['Category'].value_counts().reset_index()
        category_counts.columns = ['Category', 'Frequency']

        # Get the most frequent category
        user_categories = category_counts.head(2)['Category'].tolist()
        print(f"User's most frequent category: {user_categories}")

      else:
        city_places = merge_data[merge_data['City'].str.contains(user_city, case=False, na=False)]
        # Group by category and calculate mean rating
        category_ratings = city_places.groupby('Category')['Rating'].mean().sort_values(ascending=False)
        # Select top 3 categories with highest average ratings in the city
        user_categories = category_ratings.head(3).index.tolist()

        print(f"New user - selecting top-rated categories in {user_city}:")
        print(category_ratings)
        print(f"Selected categories: {user_categories}")

    # Ensure category is a list
    if not isinstance(user_categories, list):
      user_categories = [user_categories]

    print(f"Final categories: {user_categories}")

    # Step 1: Filter places based on city and categories (if provided)
    filtered_places = filter_places(
        city=user_city,
        categories=user_categories
        )

    # Step 2: Get Content-Based Filtering recommendations
    cbf_recommendations = calculate_cbf_scores(filtered_places)

    # Step 3: Get Collaborative Filtering recommendations
    place_ids = cbf_recommendations['Place_Id'].unique()

    if user_exists:
        cf_recommendations = predict_ratings(user_id, place_ids)

        # Convert to DataFrame
        cf_recommendations = pd.DataFrame({'Place_Id': place_ids, 'cf_rating': cf_recommendations})

    else:
        # Calculate weighted global average ratings
        global_avg_ratings = merge_data.groupby('Place_Id')['Rating'].mean()

        cf_recommendations = pd.DataFrame({
            'Place_Id': place_ids,
            'cf_rating': [
                global_avg_ratings.get(pid, merge_data['Rating'].mean()) + np.random.uniform(-0.5, 0.5)
                for pid in place_ids
                ]
        })

    # Convert to DataFrame
    cbf_recommendations = pd.DataFrame(cbf_recommendations)

    # Step 4: Combine recommendations
    combined_recommendations = pd.merge(
        cbf_recommendations,
        merge_data[['Place_Id', 'Rating', 'Time_Minutes', 'Price', 'Lat', 'Long']],
        on='Place_Id',
        how='left'
        )
    combined_recommendations = pd.merge(
        combined_recommendations,
        cf_recommendations,
        on='Place_Id',
        how='left'
        )

    # Calculate MSE between CBF and CF recommendations
    combined_recommendations['mse'] = (
        combined_recommendations['Rating'] - combined_recommendations['cf_rating'])**2

    # Sort recommendations by MSE to prioritize consistent recommendations
    combined_recommendations = combined_recommendations.sort_values('mse')

    # Remove duplicates, if any
    combined_recommendations = combined_recommendations.drop_duplicates(subset='Place_Id')

    # Track recommendations per day
    recommendations_per_day = []
    total_time_per_day = []
    total_budget_per_day = []

    # Track visited places across days
    visited_places = set()

    if days:
        for day in range(days):
            day_recommendations = []
            day_total_time = 0
            day_total_budget = 0

            # IMPORTANT: Reset to original starting point for each day
            current_lat = user_lat
            current_lng = user_lng

            # Iterate through sorted recommendations
            for _, place in combined_recommendations.iterrows():
                # Skip if place has been visited in previous days
                if place['Place_Id'] in visited_places:
                    continue

                # Calculate distance from current location to this destination
                distance_km, travel_time = calculate_distance(current_lat, current_lng, place['Lat'], place['Long'])

                # Calculate total time for this place (travel time + visit time in hours)
                place_total_time = (place['Time_Minutes'] / 60) + (travel_time / 60)

                # Check if adding this place would exceed 8-hour limit
                if day_total_time + place_total_time > time:
                    continue

                # Check budget constraint if provided
                if budget and day_total_budget + place['Price'] > budget:
                    continue

                # Add place to daily recommendations
                place_with_distance = place.copy()
                place_with_distance['distance_km'] = distance_km
                place_with_distance['travel_time'] = travel_time
                day_recommendations.append(place_with_distance)

                # Update tracking variables
                day_total_time += place_total_time
                day_total_budget += place['Price']

                # Update current location for next distance calculation
                current_lat = place['Lat']
                current_lng = place['Long']

                # Mark place as visited
                visited_places.add(place['Place_Id'])

            # Convert to DataFrame
            day_recommendations_df = pd.DataFrame(day_recommendations)
            recommendations_per_day.append(day_recommendations_df)
            total_time_per_day.append(day_total_time)
            total_budget_per_day.append(day_total_budget)

            # Calculate MSE
            mse = combined_recommendations['mse'].mean()

    return recommendations_per_day, total_time_per_day, total_budget_per_day, mse

# Testing Existing User

Test 1 - Existing User + 1 Category

In [20]:
user_id = 1
user_lat = -6.200000
user_lng = 106.816666
user_city = 'Jakarta'
user_categories = 'Budaya'
days = 2
budget = 150000
time = 8

# Get recommendations
recommendations_per_day, total_time_per_day, total_budget_per_day, mse = recommend_tourist_destinations(
    user_id,
    user_lat,
    user_lng,
    user_city,
    user_categories,
    days=days,
    time=time,
    budget=budget
    )

print(recommendations_per_day)
print("\nTotal Price per Day: ")
if total_budget_per_day:
    for day, price in enumerate(total_budget_per_day, 1):
        print(f"Day {day}: {price}")
else:
    print("No daily prices to display.")

print("\nTotal Hours Spent per Day: ")
if total_time_per_day:
    for day, hours in enumerate(total_time_per_day, 1):
      print(f"Day {day}: {hours:.2f} hours")
else:
    print("No daily hours to display.")

print("MSE: ", mse)

Final categories: ['Budaya']
Filtering places - City: Jakarta, Categories: ['Budaya']
Categories before filtering: ['Budaya' 'Taman Hiburan' 'Cagar Alam' 'Bahari' 'Pusat Perbelanjaan'
 'Tempat Ibadah']
Categories after filtering: ['Budaya']
Number of places after category filtering: 32
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 115ms/step
[     Place_Id                         name category  similarity_score  Rating  \
119        63  Museum Kebangkitan Nasional   Budaya          0.030937     4.7   
1          44       Monumen Selamat Datang   Budaya          0.437107     4.7   
69          1             Monumen Nasional   Budaya          0.041014     4.6   
215        20        Museum Taman Prasasti   Budaya          0.080291     4.5   
79         60               Museum Tekstil   Budaya          0.032572     4.5   
246        18        Museum Bank Indonesia   Budaya          0.117089     4.7   

     Time_Minutes  Price       Lat        Long  cf_rating       mse  \


Test 2 - Existing User + No Category

In [21]:
user_id = 1
user_lat = -6.200000
user_lng = 106.816666
user_city = 'Jakarta'
user_categories = None
days = 2
budget = 150000
time = 8

# Get recommendations
recommendations_per_day, total_time_per_day, total_budget_per_day, mse = recommend_tourist_destinations(
    user_id,
    user_lat,
    user_lng,
    user_city,
    user_categories,
    days=days,
    time=time,
    budget=budget
    )

print(recommendations_per_day)
print("\nTotal Price per Day: ")
if total_budget_per_day:
    for day, price in enumerate(total_budget_per_day, 1):
        print(f"Day {day}: {price}")
else:
    print("No daily prices to display.")

print("\nTotal Hours Spent per Day: ")
if total_time_per_day:
    for day, hours in enumerate(total_time_per_day, 1):
      print(f"Day {day}: {hours:.2f} hours")
else:
    print("No daily hours to display.")

print("MSE: ", mse)

User's most frequent category: ['Taman Hiburan', 'Budaya']
Final categories: ['Taman Hiburan', 'Budaya']
Filtering places - City: Jakarta, Categories: ['Taman Hiburan', 'Budaya']
Categories before filtering: ['Budaya' 'Taman Hiburan' 'Cagar Alam' 'Bahari' 'Pusat Perbelanjaan'
 'Tempat Ibadah']
Categories after filtering: ['Budaya' 'Taman Hiburan']
Number of places after category filtering: 59
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step 
[     Place_Id                                       name       category  \
103        41                      Museum Bahari Jakarta         Budaya   
192        51                        Jakarta Planetarium  Taman Hiburan   
171        28  Wisata Agro Edukatif Istana Susu Cibugary  Taman Hiburan   

     similarity_score  Rating  Time_Minutes  Price       Lat        Long  \
103          0.214181     4.4         120.0   2000 -6.126955  106.808590   
192          0.347374     4.1         120.0  12000 -6.190082  106.838853   
1

Test 3 - Existing User + >1 Category

In [22]:
user_id = 1
user_lat = -6.200000
user_lng = 106.816666
user_city = 'Jakarta'
user_categories = ['Budaya', 'Taman Hiburan', 'Tempat Ibadah']
days = 2
budget = 150000
time = 10

# Get recommendations
recommendations_per_day, total_time_per_day, total_budget_per_day, mse = recommend_tourist_destinations(
    user_id,
    user_lat,
    user_lng,
    user_city,
    user_categories,
    days=days,
    time=time,
    budget=budget
    )

print(recommendations_per_day)
print("\nTotal Price per Day: ")
if total_budget_per_day:
    for day, price in enumerate(total_budget_per_day, 1):
        print(f"Day {day}: {price}")
else:
    print("No daily prices to display.")

print("\nTotal Hours Spent per Day: ")
if total_time_per_day:
    for day, hours in enumerate(total_time_per_day, 1):
      print(f"Day {day}: {hours:.2f} hours")
else:
    print("No daily hours to display.")

print("MSE: ", mse)

Final categories: ['Budaya', 'Taman Hiburan', 'Tempat Ibadah']
Filtering places - City: Jakarta, Categories: ['Budaya', 'Taman Hiburan', 'Tempat Ibadah']
Categories before filtering: ['Budaya' 'Taman Hiburan' 'Cagar Alam' 'Bahari' 'Pusat Perbelanjaan'
 'Tempat Ibadah']
Categories after filtering: ['Budaya' 'Taman Hiburan' 'Tempat Ibadah']
Number of places after category filtering: 62
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[     Place_Id                                       name       category  \
281        41                      Museum Bahari Jakarta         Budaya   
208        51                        Jakarta Planetarium  Taman Hiburan   
187        28  Wisata Agro Edukatif Istana Susu Cibugary  Taman Hiburan   
9          78                       Hutan Kota Srengseng  Taman Hiburan   

     similarity_score  Rating  Time_Minutes  Price       Lat        Long  \
281          0.346510     4.4         120.0   2000 -6.126955  106.808590   
208         

# Testing New User

Test 4 - New User + 1 Category

In [34]:
user_id = 123456
user_lat = -6.200000
user_lng = 106.816666
user_city = 'Jakarta'
user_categories = 'Budaya'
days = 2
budget = 150000
time = 8

# Get recommendations
recommendations_per_day, total_time_per_day, total_budget_per_day, mse = recommend_tourist_destinations(
    user_id,
    user_lat,
    user_lng,
    user_city,
    user_categories,
    days=days,
    time=time,
    budget=budget
    )

print(recommendations_per_day)
print("\nTotal Price per Day: ")
if total_budget_per_day:
    for day, price in enumerate(total_budget_per_day, 1):
        print(f"Day {day}: {price}")
else:
    print("No daily prices to display.")

print("\nTotal Hours Spent per Day: ")
if total_time_per_day:
    for day, hours in enumerate(total_time_per_day, 1):
      print(f"Day {day}: {hours:.2f} hours")
else:
    print("No daily hours to display.")

print("MSE: ", mse)

Final categories: ['Budaya']
Filtering places - City: Jakarta, Categories: ['Budaya']
Categories before filtering: ['Budaya' 'Taman Hiburan' 'Cagar Alam' 'Bahari' 'Pusat Perbelanjaan'
 'Tempat Ibadah']
Categories after filtering: ['Budaya']
Number of places after category filtering: 32
[     Place_Id                       name category  similarity_score  Rating  \
255        20      Museum Taman Prasasti   Budaya          0.110516     4.5   
128        40  Galeri Nasional Indonesia   Budaya          0.661421     4.6   
77         21              Museum Wayang   Budaya          0.032572     4.5   
242        17          Museum Fatahillah   Budaya          0.141785     4.4   

     Time_Minutes  Price       Lat        Long  cf_rating       mse  \
255          90.0   2000 -6.172224  106.818969   4.458840  0.001694   
128          90.0      0 -6.178548  106.832772   4.644194  0.001953   
77          150.0   5000 -6.134907  106.812445   4.383128  0.013659   
242          90.0   5000 -6.1364

Test 5 - New User + No Category

In [35]:
user_id = 123456
user_lat = -6.200000
user_lng = 106.816666
user_city = 'Jakarta'
user_categories = None
days = 2
budget = 150000
time = 8

# Get recommendations
recommendations_per_day, total_time_per_day, total_budget_per_day, mse = recommend_tourist_destinations(
    user_id,
    user_lat,
    user_lng,
    user_city,
    user_categories,
    days=days,
    time=time,
    budget=budget
    )

print(recommendations_per_day)
print("\nTotal Price per Day: ")
if total_budget_per_day:
    for day, price in enumerate(total_budget_per_day, 1):
        print(f"Day {day}: {price}")
else:
    print("No daily prices to display.")

print("\nTotal Hours Spent per Day: ")
if total_time_per_day:
    for day, hours in enumerate(total_time_per_day, 1):
      print(f"Day {day}: {hours:.2f} hours")
else:
    print("No daily hours to display.")

print("MSE: ", mse)

New user - selecting top-rated categories in Jakarta:
Category
Tempat Ibadah         4.666667
Budaya                4.553125
Pusat Perbelanjaan    4.490000
Taman Hiburan         4.444444
Cagar Alam            4.375000
Bahari                4.362500
Name: Rating, dtype: float64
Selected categories: ['Tempat Ibadah', 'Budaya', 'Pusat Perbelanjaan']
Final categories: ['Tempat Ibadah', 'Budaya', 'Pusat Perbelanjaan']
Filtering places - City: Jakarta, Categories: ['Tempat Ibadah', 'Budaya', 'Pusat Perbelanjaan']
Categories before filtering: ['Budaya' 'Taman Hiburan' 'Cagar Alam' 'Bahari' 'Pusat Perbelanjaan'
 'Tempat Ibadah']
Categories after filtering: ['Budaya' 'Pusat Perbelanjaan' 'Tempat Ibadah']
Number of places after category filtering: 45
[     Place_Id                                               name  \
69         39  Museum Macan (Modern and Contemporary Art in N...   
138        49                              Galeri Indonesia Kaya   
128        81                               

Test 6 - New User + >1 Category

In [37]:
user_id = 123456
user_lat = -6.200000
user_lng = 106.816666
user_city = 'Jakarta'
user_categories = ['Budaya', 'Taman Hiburan']
days = 2
budget = 150000
time = 8

# Get recommendations
recommendations_per_day, total_time_per_day, total_budget_per_day, mse = recommend_tourist_destinations(
    user_id,
    user_lat,
    user_lng,
    user_city,
    user_categories,
    days=days,
    time=time,
    budget=budget
    )

print(recommendations_per_day)
print("\nTotal Price per Day: ")
if total_budget_per_day:
    for day, price in enumerate(total_budget_per_day, 1):
        print(f"Day {day}: {price}")
else:
    print("No daily prices to display.")

print("\nTotal Hours Spent per Day: ")
if total_time_per_day:
    for day, hours in enumerate(total_time_per_day, 1):
      print(f"Day {day}: {hours:.2f} hours")
else:
    print("No daily hours to display.")

print("MSE: ", mse)

Final categories: ['Budaya', 'Taman Hiburan']
Filtering places - City: Jakarta, Categories: ['Budaya', 'Taman Hiburan']
Categories before filtering: ['Budaya' 'Taman Hiburan' 'Cagar Alam' 'Bahari' 'Pusat Perbelanjaan'
 'Tempat Ibadah']
Categories after filtering: ['Budaya' 'Taman Hiburan']
Number of places after category filtering: 59
[     Place_Id                     name       category  similarity_score  \
57         78     Hutan Kota Srengseng  Taman Hiburan          0.249762   
151        57   Taman Lapangan Banteng  Taman Hiburan          0.342557   
32         58             Taman Ayodya  Taman Hiburan          0.189513   
176        24          Museum Nasional         Budaya          0.161557   
464         6  Taman Impian Jaya Ancol  Taman Hiburan          0.440770   

     Rating  Time_Minutes  Price       Lat        Long  cf_rating       mse  \
57      4.3          90.0   1000 -6.210694  106.764395   4.318524  0.000343   
151     4.7          90.0      0 -6.170555  106.83503