## **Install & Import Libraries**

In [1]:
# Google Drive Public File/Folder Downloader
# !pip install gdown -q -U

In [2]:
# Import Library
import keras
import gdown
import joblib
import sklearn
import numpy as np
import pandas as pd
import tensorflow as tf
from TF_Object import L2NormalizeLayer
from TF_Object import CollaborativeFilteringLayer
from TF_Object import CollaborativeFilteringModel

## **Prepare Dataset**

In [3]:
# File ID from Google Drive link
base_url = "https://drive.google.com/uc?id={id}"
file_id = {
    # "old_users_data"              : "16ngL0bfyQqucpYIffxkKiCa3qSGhR26S",
    "new_tourism_with_id_links"   : "1jMd_Sj7t3EvONIJDzg8XuqfcT079LpCa",
    # "new_tourism_rating_comments" : "1o-phm0eE64NulxzTLM_FWCyiMbfPHsYt",
    # "package_tourism"             : "1LtPNXAnkJde03TFHJXqhIbKvtI_FXJxC"
    "touris_spots_distance"       : "1HyZ52iIGI1J85rAc8xbYTScKYQBB0WVt"
}

# Download all the dataset file using gdown
for filename, id in file_id.items():
  gdown.download(id=id, output=f"{filename}.csv")      # Using Python
  # !gdown {base_url.format(id=id)} -O {filename}.csv  # Using terminal command

Downloading...
From: https://drive.google.com/uc?id=1jMd_Sj7t3EvONIJDzg8XuqfcT079LpCa
To: d:\Kuliah Semester 7\Bangkit 2024 Batch 2\Capstone\exploria-machine-learning\Example\new_tourism_with_id_links.csv
100%|██████████| 430k/430k [00:00<00:00, 1.69MB/s]
Downloading...
From: https://drive.google.com/uc?id=1HyZ52iIGI1J85rAc8xbYTScKYQBB0WVt
To: d:\Kuliah Semester 7\Bangkit 2024 Batch 2\Capstone\exploria-machine-learning\Example\touris_spots_distance.csv
100%|██████████| 2.68M/2.68M [00:01<00:00, 1.49MB/s]


In [4]:
# Load all dataset in Pandas DataFrame
# user_df = pd.read_csv("old_users_data.csv")
tourism_df = pd.read_csv("new_tourism_with_id_links.csv")
# rating_df = pd.read_csv("new_tourism_rating_comments.csv")
# package_df = pd.read_csv("package_tourism.csv")
distance_df = pd.read_csv("touris_spots_distance.csv")

In [5]:
tourism_df.head(3)

Unnamed: 0,Place_Id,Place_Name,Category,Description,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Img_Path_0,Img_Path_1,Img_Path_2
0,1,Monumen Nasional,"Budaya, Situs Sejarah",Monumen Nasional atau yang populer disingkat d...,Jakarta,20000,46,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153,images_output\Monumen Nasional\Monumen Nasiona...,images_output\Monumen Nasional\Monumen Nasiona...,images_output\Monumen Nasional\Monumen Nasiona...
1,2,Kota Tua,Budaya,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Jakarta,0,46,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125,images_output\Kota Tua\Kota Tua_0.jpg,images_output\Kota Tua\Kota Tua_1.jpg,
2,3,Dunia Fantasi,"Taman Hiburan, Bahari",Dunia Fantasi atau disebut juga Dufan adalah t...,Jakarta,270000,46,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-6.125312,106.833538,images_output\Dunia Fantasi\Dunia Fantasi_0.jpg,images_output\Dunia Fantasi\Dunia Fantasi_1.png,images_output\Dunia Fantasi\Dunia Fantasi_2.jpg


In [6]:
distance_df.sample(3)

Unnamed: 0,Place_Id_Source,Place_Id_Target,Distance
83897,193,186,50.73
31946,74,120,441.12
8756,21,38,3.9


# **Initiate Important Instance**

In [None]:
# Load Vector Data
user_data_vecs = np.load('../Vector/user_vector.npy')        # Transformation result from user DB
tourism_data_vecs = np.load('../Vector/tourism_vector.npy')  # Transformation result from tourism DB

# Load Y_mean for denormalize prediction results
Y_mean = np.load('../Vector/Y_mean.npy')

# Load Scalers
user_scaler = joblib.load('../Scaler/user_scaler.gz')
tourism_scaler = joblib.load('../Scaler/tourism_scaler.gz')
target_scaler = joblib.load('../Scaler/target_scaler.gz')

# Load Collaborative Filtering Model
cofi_model = tf.keras.models.load_model('../Model/collaborative_filtering.h5', custom_objects={
    'CollaborativeFilteringModel': CollaborativeFilteringModel,
    'CollaborativeFilteringLayer': CollaborativeFilteringLayer
})

# Load Content-Based Model
cb_model = tf.keras.models.load_model('../Model/content_based.h5', custom_objects={
    'L2NormalizeLayer': L2NormalizeLayer
})

# **Class HybridRecommendation**

In [8]:
class HybridRecommendation:
    def __init__(
            self,
            cofi_model: tf.keras.Model,
            cb_model: tf.keras.Model,
            user_scaler: sklearn.base.BaseEstimator,
            tourism_scaler: sklearn.base.BaseEstimator,
            target_scaler: sklearn.base.BaseEstimator,
            Y_mean: np.ndarray
    ):
        self.cofi_model = cofi_model
        self.cb_model = cb_model
        self.user_scaler = user_scaler
        self.tourism_scaler = tourism_scaler
        self.target_scaler = target_scaler
        self.Y_mean = Y_mean
    
    def __hybrid(
            self,
            user_id: int,
            current_user_data: np.ndarray,
            tourism_data_vecs: np.ndarray,
    ) -> dict:
        # COLLABORATIVE FILTERING
        # Prepare user data
        user_id = tf.constant(user_id, dtype=tf.int32)
        # Make prediction
        cofi_y_pred = self.cofi_model(user_id)
        # Convert to Numpy and restore the mean
        cofi_y_pred = cofi_y_pred.numpy() + self.Y_mean
        # Get the index of the predicted result sequence
        sorted_cofi_index = np.argsort(cofi_y_pred, axis=0).reshape(-1)
        sorted_cofi_y_pred = cofi_y_pred[sorted_cofi_index].reshape(-1)
        # Create cofi_point, where the highest recommended tourist destination ID will get the highest points. 
        cofi_points = dict()
        for point, idx in enumerate(sorted_cofi_index):
            cofi_points[int(idx)] = {
                'point': point,
                'rating': cofi_y_pred[int(idx)].item()
            }

        # CONTENT-BASED
        # Prepare user data
        current_user_vecs = np.tile(current_user_data, (tourism_data_vecs.shape[0], 1))
        scaled_current_user_vecs = self.user_scaler.transform(current_user_vecs)
        # Prepare Tourism Data
        scaled_tourism_vecs = self.tourism_scaler.transform(tourism_data_vecs)
        # Make prediction
        cb_y_pred_norm = self.cb_model.predict([scaled_current_user_vecs, scaled_tourism_vecs])
        cb_y_pred = self.target_scaler.inverse_transform(cb_y_pred_norm)
        # Get the index of the predicted result sequence
        sorted_cb_index = np.argsort(cb_y_pred_norm, axis=0).reshape(-1)
        # Create cb_points, where the highest recommended tourist destination ID will get the highest points. 
        cb_points = dict()        
        for point, idx in enumerate(sorted_cb_index):
            cb_points[int(idx)] = {
                'point': point,
                'rating': cb_y_pred[int(idx)].item()
            }

        # HYBRID WEIGHT
        # Calculate the total point value based on collaborative and content-based weighting
        final_points = dict()
        for tourism_idx in range(tourism_data_vecs.shape[0]):
            final_points[tourism_idx] = {
                'point': cofi_points[tourism_idx]['point'] * 0.5 + cb_points[tourism_idx]['point'] * 0.5,
                'cofi_rating': cofi_points[tourism_idx]['rating'],
                'cb_rating': cb_points[tourism_idx]['rating'],
            }

        # Return tourism ID from most recommended to least recommended
        return {key + 1: value for key, value in final_points.items()}
    
    def get_recommendation(
            self,
            user_id: int,
            current_user_data: np.ndarray,
            tourism_data_vecs: np.ndarray,
            choosen_spot_id: int | None = None,
            distance_from_chosen_spot_df: pd.DataFrame | None = None
    ) -> dict | list:
        
        # Get hybrid recommendation
        recommended_spots = self.__hybrid(user_id, current_user_data, tourism_data_vecs)

        # Get recommendation based on spot distance
        if choosen_spot_id is not None and distance_from_chosen_spot_df is not None:
            sorted_distance_from_chosen_spot_df = distance_from_chosen_spot_df.\
                sort_values(by='Distance', ascending=False)
            
            distance_points = dict()
            for point, idx in enumerate(sorted_distance_from_chosen_spot_df['Place_Id_Target']):
                distance_points[idx] = {
                    'point': point,
                    'distance': sorted_distance_from_chosen_spot_df.\
                        loc[sorted_distance_from_chosen_spot_df['Place_Id_Target'] == idx, 'Distance'].values
                }
        
            final_points_with_distance = dict()
            for tourism_idx in [i for i in range(1, tourism_data_vecs.shape[0] + 1) if i != choosen_spot_id]:
                final_points_with_distance[tourism_idx] = {
                    'point': recommended_spots[tourism_idx]['point'] * 0.55 + distance_points[tourism_idx]['point'] * 0.45,
                    'cofi_rating': recommended_spots[tourism_idx]['cofi_rating'],
                    'cb_rating': recommended_spots[tourism_idx]['cb_rating'],
                    'distance': distance_points[tourism_idx]['distance'].item()
                }
            
            # Sorts the dictionary by point value from largest to smallest
            sorted_final_points_with_distance = dict(sorted(final_points_with_distance.items(),
                                                            key=lambda x: x[1]['point'], reverse=True))
                
            # Return dictionary, with key = Place_Id and value = Distance, from most recommended
            return {key: value['distance'] for key, value in sorted_final_points_with_distance.items()}
        
        else:
            # Sorts the dictionary by point value from largest to smallest
            sorted_final_points = dict(sorted(recommended_spots.items(), key=lambda x: x[1]['point'], reverse=True))

            # Returns a list of Place_Ids from most recommended
            return list(sorted_final_points.keys())

# **Examples**

In [9]:
# Initiate HybridRecommendation instance
hybrid_recom = HybridRecommendation(
    cofi_model,
    cb_model,
    user_scaler,
    tourism_scaler,
    target_scaler,
    Y_mean
)

## **Normal Hybrid Reccomendation**

In [10]:
# Prepare user data
user_id = 120
current_user_data = user_data_vecs[user_id]

# Get (normal) hybrid recommendation for that user
recommended_id = hybrid_recom.get_recommendation(
    user_id,
    current_user_data,
    tourism_data_vecs
)

# Displays recommended results from highest to lowest (free to take as much tourism data as you want)
result_1 = tourism_df.loc[tourism_df['Place_Id'].isin(recommended_id)].\
    set_index('Place_Id').reindex(recommended_id).reset_index()
result_1

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step


Unnamed: 0,Place_Id,Place_Name,Category,Description,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Img_Path_0,Img_Path_1,Img_Path_2
0,97,Monumen Yogya Kembali,"Budaya, Situs Sejarah",Museum Monumen Yogya Kembali (bahasa Jawa: ꦩꦺꦴ...,Yogyakarta,15000,45,30.0,"{'lat': -7.7495904, 'lng': 110.3696068}",-7.749590,110.369607,images_output\Monumen Yogya Kembali\Monumen Yo...,images_output\Monumen Yogya Kembali\Monumen Yo...,
1,115,Monumen Sanapati,"Budaya, Situs Sejarah",Monumen Sanapati dibangun untuk memeringati 50...,Yogyakarta,15000,43,,"{'lat': -7.788172800000001, 'lng': 110.3715403}",-7.788173,110.371540,images_output\Monumen Sanapati\Monumen Sanapat...,images_output\Monumen Sanapati\Monumen Sanapat...,images_output\Monumen Sanapati\Monumen Sanapat...
2,91,Situs Warungboto,"Budaya, Situs Sejarah",Situs Warungboto atau Pesanggrahan Rejawinangu...,Yogyakarta,0,44,60.0,"{'lat': -7.8102685, 'lng': 110.3931513}",-7.810269,110.393151,images_output\Situs Warungboto\Situs Warungbot...,,
3,1,Monumen Nasional,"Budaya, Situs Sejarah",Monumen Nasional atau yang populer disingkat d...,Jakarta,20000,46,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153,images_output\Monumen Nasional\Monumen Nasiona...,images_output\Monumen Nasional\Monumen Nasiona...,images_output\Monumen Nasional\Monumen Nasiona...
4,14,Pulau Pelangi,Bahari,Pulau Pelangi adalah sebuah pulau yang terleta...,Jakarta,900000,48,,"{'lat': -5.587055, 'lng': 106.5885}",-5.587055,106.588500,images_output\Pulau Pelangi\Pulau Pelangi_0.jpg,images_output\Pulau Pelangi\Pulau Pelangi_1.jpg,images_output\Pulau Pelangi\Pulau Pelangi_2.jpg
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
432,262,Wot Batu,Cagar Alam,Wot Batu adalah sebuah konfigurasi energi yang...,Bandung,50000,47,,"{'lat': -6.8597013, 'lng': 107.6360983}",-6.859701,107.636098,images_output\Wot Batu\Wot Batu_0.jpg,images_output\Wot Batu\Wot Batu_1.jpg,
433,284,Pasar Baru,Pusat Perbelanjaan,Pasar Baru adalah kawasan perdagangan yang ber...,Bandung,0,43,,"{'lat': -6.9177044, 'lng': 107.6036014}",-6.917704,107.603601,images_output\Pasar Baru\Pasar Baru_0.jpg,images_output\Pasar Baru\Pasar Baru_1.jpg,images_output\Pasar Baru\Pasar Baru_2.jpg
434,81,Mall Thamrin City,Pusat Perbelanjaan,Thamrin City atau Thamrin City Mall merupakan ...,Jakarta,0,44,60.0,"{'lat': -6.1946096, 'lng': 106.817905}",-6.194610,106.817905,images_output\Mall Thamrin City\Mall Thamrin C...,images_output\Mall Thamrin City\Mall Thamrin C...,images_output\Mall Thamrin City\Mall Thamrin C...
435,25,Pasar Tanah Abang,Pusat Perbelanjaan,Pasar Tanah Abang atau Pasar Sabtu dibangun ol...,Jakarta,0,4,45.0,"{'lat': -6.189009599999999, 'lng': 106.8119312}",-6.189010,106.811931,images_output\Pasar Tanah Abang\Pasar Tanah Ab...,images_output\Pasar Tanah Abang\Pasar Tanah Ab...,


## **Distance-Based Hybrid Recommendation**

In [11]:
# Prepare data
user_id = 120
choosen_spot_id = 300
current_user_data = user_data_vecs[user_id]
choosen_spot_distance_df = distance_df.loc[distance_df['Place_Id_Source'] == choosen_spot_id]

# Get (distance-based) hybrid recommendation for that user
recommended_id = hybrid_recom.get_recommendation(
    user_id,
    current_user_data,
    tourism_data_vecs,
    choosen_spot_id,
    choosen_spot_distance_df
)

# Displays recommended results from highest to lowest (free to take as much tourism data as you want)
result_2 = tourism_df.loc[tourism_df['Place_Id'].isin(list(recommended_id.keys()))].\
    set_index('Place_Id').reindex(recommended_id).reset_index()
result_2['Distance'] = list(recommended_id.values())
result_2

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 


Unnamed: 0,Place_Id,Place_Name,Category,Description,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Img_Path_0,Img_Path_1,Img_Path_2,Distance
0,334,Chingu Cafe Little Seoul,"Taman Hiburan, Bahari",Selain populer karena memiliki pemandangan yan...,Bandung,50000,45,,"{'lat': -6.901224099999999, 'lng': 107.6099853}",-6.901224,107.609985,images_output\Chingu Cafe Little Seoul\Chingu ...,images_output\Chingu Cafe Little Seoul\Chingu ...,,29.69
1,285,Taman Sejarah Bandung,"Budaya, Situs Sejarah",Taman Sejarah Bandung adalah taman yang terlet...,Bandung,0,46,45.0,"{'lat': -6.9103086, 'lng': 107.6098619}",-6.910309,107.609862,images_output\Taman Sejarah Bandung\Taman Seja...,images_output\Taman Sejarah Bandung\Taman Seja...,,29.78
2,277,Peta Park,"Taman Hiburan, Bahari",Bandung seperti tidak henti menawarkan tempat ...,Bandung,15000,43,,"{'lat': -6.9317951, 'lng': 107.5883026}",-6.931795,107.588303,images_output\Peta Park\Peta Park_0.jpg,images_output\Peta Park\Peta Park_1.jpg,images_output\Peta Park\Peta Park_2.jpg,27.85
3,290,Museum Pendidikan Nasional,"Budaya, Taman Hiburan",Museum Pendidikan Nasional Universitas Pendidi...,Bandung,5000,46,,"{'lat': -6.859747400000001, 'lng': 107.5941693}",-6.859747,107.594169,images_output\Museum Pendidikan Nasional\Museu...,images_output\Museum Pendidikan Nasional\Museu...,images_output\Museum Pendidikan Nasional\Museu...,27.88
4,229,Rabbit Town,"Taman Hiburan, Bahari",Rabbit Town Bandung merupakan salah satu wisat...,Bandung,45000,4,90.0,"{'lat': -6.8669518, 'lng': 107.6100859}",-6.866952,107.610086,images_output\Rabbit Town\Rabbit Town_0.jpg,,,29.59
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
431,408,Kebun Binatang Surabaya,Cagar Alam,Kebun Binatang Surabaya (KBS) (Dialek Arekan: ...,Surabaya,15000,43,180.0,"{'lat': -7.295954599999999, 'lng': 112.7366094}",-7.295955,112.736609,images_output\Kebun Binatang Surabaya\Kebun Bi...,,,597.07
432,417,Klenteng Sanggar Agung,"Tempat Ibadah, Budaya",Kelenteng Sanggar Agung atau Klenteng Hong San...,Surabaya,10000,45,,"{'lat': -7.246944, 'lng': 112.802222}",-7.246944,112.802222,images_output\Klenteng Sanggar Agung\Klenteng ...,images_output\Klenteng Sanggar Agung\Klenteng ...,,603.92
433,163,Watu Mabur Mangunan,Cagar Alam,Kawasan Tebing Watu Mabur ini terbilang belum ...,Yogyakarta,2500,45,60.0,"{'lat': -7.947121, 'lng': 110.441}",-7.947121,110.441000,images_output\Watu Mabur Mangunan\Watu Mabur M...,images_output\Watu Mabur Mangunan\Watu Mabur M...,,361.83
434,411,Masjid Muhammad Cheng Hoo,Tempat Ibadah,Masjid Cheng Hoo Surabaya adalah Masjid bernua...,Surabaya,0,47,,"{'lat': -7.252275399999999, 'lng': 112.7468796}",-7.252275,112.746880,images_output\Masjid Muhammad Cheng Hoo\Masjid...,images_output\Masjid Muhammad Cheng Hoo\Masjid...,images_output\Masjid Muhammad Cheng Hoo\Masjid...,597.87
