In [1]:
import pandas as pd
import numpy as np
from zipfile import ZipFile
from pathlib import Path
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, MinMaxScaler

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
rating = pd.read_csv('./data/tourism_rating.csv')
destination = pd.read_csv('./data/tourism_with_id.csv')
user = pd.read_csv('./data/user.csv')

<h2> PRE-PROCESSING

<h4> DATA DESTINATION

In [3]:
destination.head()

Unnamed: 0,IdWisata,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Unnamed: 11,Unnamed: 12
0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153,,1
1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125,,2
2,3,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Taman Hiburan,Jakarta,270000,4.6,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-6.125312,106.833538,,3
3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Taman Hiburan,Jakarta,10000,4.5,,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-6.302446,106.895156,,4
4,5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,Jakarta,94000,4.5,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-6.12419,106.839134,,5


In [4]:
destination = destination.drop(['Unnamed: 11','Unnamed: 12', 'Time_Minutes'], axis=1)
destination.head(2)

Unnamed: 0,IdWisata,Place_Name,Description,Category,City,Price,Rating,Coordinate,Lat,Long
0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153
1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125


In [5]:
destination_jogja = destination[destination['City'] == 'Yogyakarta']
destination_jogja.head()

Unnamed: 0,IdWisata,Place_Name,Description,Category,City,Price,Rating,Coordinate,Lat,Long
84,85,Taman Pintar Yogyakarta,Taman Pintar Yogyakarta (bahasa Jawa: Hanacara...,Taman Hiburan,Yogyakarta,6000,4.5,"{'lat': -7.800671500000001, 'lng': 110.3676551}",-7.800671,110.367655
85,86,Keraton Yogyakarta,Keraton Ngayogyakarta Hadiningrat atau Keraton...,Budaya,Yogyakarta,15000,4.6,"{'lat': -7.8052845, 'lng': 110.3642031}",-7.805284,110.364203
86,87,Sindu Kusuma Edupark (SKE),Sindu Kusuma Edupark (SKE) merupakan sebuah de...,Taman Hiburan,Yogyakarta,20000,4.2,"{'lat': -7.767297300000001, 'lng': 110.3542486}",-7.767297,110.354249
87,88,Museum Benteng Vredeburg Yogyakarta,Museum Benteng Vredeburg (bahasa Jawa: ꦩꦸꦱꦶꦪꦸꦩ...,Budaya,Yogyakarta,3000,4.6,"{'lat': -7.800201599999999, 'lng': 110.3663044}",-7.800202,110.366304
88,89,De Mata Museum Jogja,Museum De Mata merupakan salah satu museum yan...,Budaya,Yogyakarta,50000,4.4,"{'lat': -7.816315599999999, 'lng': 110.3871442}",-7.816316,110.387144


In [6]:
# reset index 

destination_jogja = destination_jogja.reset_index(drop=True)
destination_jogja['IdWisata'] = destination_jogja.index + 1
destination_jogja.head()

Unnamed: 0,IdWisata,Place_Name,Description,Category,City,Price,Rating,Coordinate,Lat,Long
0,1,Taman Pintar Yogyakarta,Taman Pintar Yogyakarta (bahasa Jawa: Hanacara...,Taman Hiburan,Yogyakarta,6000,4.5,"{'lat': -7.800671500000001, 'lng': 110.3676551}",-7.800671,110.367655
1,2,Keraton Yogyakarta,Keraton Ngayogyakarta Hadiningrat atau Keraton...,Budaya,Yogyakarta,15000,4.6,"{'lat': -7.8052845, 'lng': 110.3642031}",-7.805284,110.364203
2,3,Sindu Kusuma Edupark (SKE),Sindu Kusuma Edupark (SKE) merupakan sebuah de...,Taman Hiburan,Yogyakarta,20000,4.2,"{'lat': -7.767297300000001, 'lng': 110.3542486}",-7.767297,110.354249
3,4,Museum Benteng Vredeburg Yogyakarta,Museum Benteng Vredeburg (bahasa Jawa: ꦩꦸꦱꦶꦪꦸꦩ...,Budaya,Yogyakarta,3000,4.6,"{'lat': -7.800201599999999, 'lng': 110.3663044}",-7.800202,110.366304
4,5,De Mata Museum Jogja,Museum De Mata merupakan salah satu museum yan...,Budaya,Yogyakarta,50000,4.4,"{'lat': -7.816315599999999, 'lng': 110.3871442}",-7.816316,110.387144


In [7]:
destination_jogja.tail()

Unnamed: 0,IdWisata,Place_Name,Description,Category,City,Price,Rating,Coordinate,Lat,Long
121,122,Wisata Kaliurang,"Jogja selalu menarik untuk dikulik, terlebih t...",Cagar Alam,Yogyakarta,8000,4.4,"{'lat': -7.6120675, 'lng': 110.4205209}",-7.612068,110.420521
122,123,Heha Sky View,HeHa Sky View adalah salah satu tempat wisata ...,Taman Hiburan,Yogyakarta,15000,4.4,"{'lat': -7.8496144, 'lng': 110.478324}",-7.849614,110.478324
123,124,Taman Sungai Mudal,"Taman Sungai Mudal, sebuah objek wisata alam t...",Cagar Alam,Yogyakarta,10000,4.6,"{'lat': -7.762813599999998, 'lng': 110.1161626}",-7.762814,110.116163
124,125,Pantai Sanglen,Pantai Sanglen. Lokasinya berada di Desa Kemad...,Bahari,Yogyakarta,10000,4.5,"{'lat': -8.1367456, 'lng': 110.5716362}",-8.136746,110.571636
125,126,Pantai Congot,"Selain Pantai Glagah dan Pantai Trisik, ternya...",Bahari,Yogyakarta,3000,4.3,"{'lat': -7.907542500000001, 'lng': 110.0535658}",-7.907542,110.053566


In [8]:
destination_jogja.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 126 entries, 0 to 125
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   IdWisata     126 non-null    int64  
 1   Place_Name   126 non-null    object 
 2   Description  126 non-null    object 
 3   Category     126 non-null    object 
 4   City         126 non-null    object 
 5   Price        126 non-null    int64  
 6   Rating       126 non-null    float64
 7   Coordinate   126 non-null    object 
 8   Lat          126 non-null    float64
 9   Long         126 non-null    float64
dtypes: float64(3), int64(2), object(5)
memory usage: 10.0+ KB


<h4> DATA RATING

In [9]:
rating.head(10)

Unnamed: 0,IdUser,IdWisata,rating
0,1,179,3
1,1,344,2
2,1,5,5
3,1,373,3
4,1,101,4
5,1,312,2
6,1,258,5
7,1,20,4
8,1,154,2
9,1,393,5


In [10]:
id_jogja = destination_jogja['IdWisata']
ratings_jogja = rating[rating["IdWisata"].isin(id_jogja)]
ratings_jogja = ratings_jogja.reset_index(drop=True)
ratings_jogja.head()

Unnamed: 0,IdUser,IdWisata,rating
0,1,5,5
1,1,101,4
2,1,20,4
3,1,103,3
4,1,89,3


In [11]:
ratings_jogja.tail()

Unnamed: 0,IdUser,IdWisata,rating
2872,300,69,1
2873,300,8,1
2874,300,108,5
2875,300,103,5
2876,300,64,4


In [12]:
ratings_jogja.shape

(2877, 3)

<h4> DATA USER

In [13]:
user.head()

Unnamed: 0,IdUser,Location,Age
0,1,"Semarang, Jawa Tengah",20
1,2,"Bekasi, Jawa Barat",21
2,3,"Cirebon, Jawa Barat",23
3,4,"Bekasi, Jawa Barat",21
4,5,"Lampung, Sumatera Selatan",20


In [14]:
user.shape

(301, 3)

In [15]:
id_user = ratings_jogja['IdUser']
user_jogja = user[user['IdUser'].isin(id_user)]
user_jogja = user_jogja.reset_index(drop=True)
user_jogja.head()

Unnamed: 0,IdUser,Location,Age
0,1,"Semarang, Jawa Tengah",20
1,2,"Bekasi, Jawa Barat",21
2,3,"Cirebon, Jawa Barat",23
3,4,"Bekasi, Jawa Barat",21
4,5,"Lampung, Sumatera Selatan",20


In [16]:
user_jogja.shape

(300, 3)

<h1> PREPARE DATA FOR MODELLING

In [17]:
# Merge with destination_jogja to get category information
df = ratings_jogja.merge(pd.DataFrame(destination_jogja), on='IdWisata')

# Initialize dictionary to store aggregated data
aggregated_data = {}

# Iterate through each row and aggregate ratings
for row in df.itertuples(index=False):
    IdUser = row.IdUser
    category = row.Category
    rating = row.rating
    
    if IdUser not in aggregated_data:
        aggregated_data[IdUser] = {
            'average_category_Bahari': 0.0,
            'average_category_Budaya': 0.0,
            'average_category_Cagar Alam': 0.0,
            'average_category_Pusat Perbelanjaan': 0.0,
            'average_category_Taman Hiburan': 0.0,
            'number_of_ratings_Bahari': 0,
            'number_of_ratings_Budaya': 0,
            'number_of_ratings_Cagar Alam': 0,
            'number_of_ratings_Pusat Perbelanjaan': 0,
            'number_of_ratings_Taman Hiburan': 0,
            'Average_All_Ratings': 0.0
        }
    
    aggregated_data[IdUser]['IdUser'] = IdUser
    aggregated_data[IdUser]['average_category_' + category] += rating
    aggregated_data[IdUser]['number_of_ratings_' + category] += 1
    aggregated_data[IdUser]['Average_All_Ratings'] += rating

# Calculate average ratings and overall average
for IdUser, data in aggregated_data.items():
    for category in ['Bahari', 'Budaya', 'Cagar Alam', 'Pusat Perbelanjaan', 'Taman Hiburan']:
        count = data['number_of_ratings_' + category]
        if count > 0:
            data['average_category_' + category] /= count

    total_ratings = sum(data['number_of_ratings_' + category] for category in ['Bahari', 'Budaya', 'Cagar Alam', 'Pusat Perbelanjaan', 'Taman Hiburan'])
    data['Average_All_Ratings'] /= total_ratings

# Convert aggregated data to DataFrame
pivoted_data = pd.DataFrame.from_dict(aggregated_data, orient='index')

# Reorder columns
columns_order = ['IdUser', 'average_category_Bahari', 'average_category_Budaya', 'average_category_Cagar Alam',
                 'average_category_Pusat Perbelanjaan', 'average_category_Taman Hiburan',
                 'number_of_ratings_Bahari', 'number_of_ratings_Budaya', 'number_of_ratings_Cagar Alam',
                 'number_of_ratings_Pusat Perbelanjaan', 'number_of_ratings_Taman Hiburan', 'Average_All_Ratings']

pivoted_data = pivoted_data[columns_order]

# Display the resulting DataFrame
pivoted_data

Unnamed: 0,IdUser,average_category_Bahari,average_category_Budaya,average_category_Cagar Alam,average_category_Pusat Perbelanjaan,average_category_Taman Hiburan,number_of_ratings_Bahari,number_of_ratings_Budaya,number_of_ratings_Cagar Alam,number_of_ratings_Pusat Perbelanjaan,number_of_ratings_Taman Hiburan,Average_All_Ratings
1,1,3.333333,3.80,2.0,2.0,4.000000,3,5,1,1,1,3.363636
4,4,2.333333,4.00,3.0,0.0,3.500000,3,4,1,0,4,3.333333
11,11,3.333333,3.75,2.0,0.0,3.333333,3,4,1,0,3,3.363636
12,12,4.250000,5.00,4.0,0.0,3.666667,4,1,4,0,6,4.000000
35,35,3.000000,3.00,4.0,0.0,0.000000,2,2,2,0,0,3.333333
...,...,...,...,...,...,...,...,...,...,...,...,...
90,90,1.000000,4.00,2.0,0.0,2.250000,1,1,2,0,4,2.250000
209,209,2.800000,4.00,0.0,0.0,0.000000,5,1,0,0,0,3.000000
191,191,4.000000,5.00,2.0,0.0,2.500000,1,1,1,0,2,3.200000
161,161,4.000000,0.00,1.5,0.0,3.857143,1,0,2,0,7,3.400000


In [18]:
pivoted_data

Unnamed: 0,IdUser,average_category_Bahari,average_category_Budaya,average_category_Cagar Alam,average_category_Pusat Perbelanjaan,average_category_Taman Hiburan,number_of_ratings_Bahari,number_of_ratings_Budaya,number_of_ratings_Cagar Alam,number_of_ratings_Pusat Perbelanjaan,number_of_ratings_Taman Hiburan,Average_All_Ratings
1,1,3.333333,3.80,2.0,2.0,4.000000,3,5,1,1,1,3.363636
4,4,2.333333,4.00,3.0,0.0,3.500000,3,4,1,0,4,3.333333
11,11,3.333333,3.75,2.0,0.0,3.333333,3,4,1,0,3,3.363636
12,12,4.250000,5.00,4.0,0.0,3.666667,4,1,4,0,6,4.000000
35,35,3.000000,3.00,4.0,0.0,0.000000,2,2,2,0,0,3.333333
...,...,...,...,...,...,...,...,...,...,...,...,...
90,90,1.000000,4.00,2.0,0.0,2.250000,1,1,2,0,4,2.250000
209,209,2.800000,4.00,0.0,0.0,0.000000,5,1,0,0,0,3.000000
191,191,4.000000,5.00,2.0,0.0,2.500000,1,1,1,0,2,3.200000
161,161,4.000000,0.00,1.5,0.0,3.857143,1,0,2,0,7,3.400000


In [19]:
destination_fix = destination_jogja[["IdWisata", "Place_Name", "Category", "Price", "Rating"]]

# Perform one-hot encoding on the "Category" column
category_encoded = pd.get_dummies(destination_fix["Category"], prefix="Category")

# Concatenate the one-hot encoded categories with the selected columns
destination_fix = pd.concat([destination_fix[["IdWisata", "Place_Name", "Price", "Rating"]], category_encoded], axis=1)
destination_fix

Unnamed: 0,IdWisata,Place_Name,Price,Rating,Category_Bahari,Category_Budaya,Category_Cagar Alam,Category_Pusat Perbelanjaan,Category_Taman Hiburan
0,1,Taman Pintar Yogyakarta,6000,4.5,0,0,0,0,1
1,2,Keraton Yogyakarta,15000,4.6,0,1,0,0,0
2,3,Sindu Kusuma Edupark (SKE),20000,4.2,0,0,0,0,1
3,4,Museum Benteng Vredeburg Yogyakarta,3000,4.6,0,1,0,0,0
4,5,De Mata Museum Jogja,50000,4.4,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...
121,122,Wisata Kaliurang,8000,4.4,0,0,1,0,0
122,123,Heha Sky View,15000,4.4,0,0,0,0,1
123,124,Taman Sungai Mudal,10000,4.6,0,0,1,0,0
124,125,Pantai Sanglen,10000,4.5,1,0,0,0,0


In [20]:
merged_df = pd.merge(pivoted_data, ratings_jogja, on='IdUser', how='left')

# Merge the destination_fix dataset with the merged pivoted_data and df2 datasets based on 'IdWisata'
final_df = pd.merge(merged_df, destination_fix, on='IdWisata', how='left')
final_df

Unnamed: 0,IdUser,average_category_Bahari,average_category_Budaya,average_category_Cagar Alam,average_category_Pusat Perbelanjaan,average_category_Taman Hiburan,number_of_ratings_Bahari,number_of_ratings_Budaya,number_of_ratings_Cagar Alam,number_of_ratings_Pusat Perbelanjaan,...,IdWisata,rating,Place_Name,Price,Rating,Category_Bahari,Category_Budaya,Category_Cagar Alam,Category_Pusat Perbelanjaan,Category_Taman Hiburan
0,1,3.333333,3.8,2.0,2.0,4.0,3,5,1,1,...,5,5,De Mata Museum Jogja,50000,4.4,0,1,0,0,0
1,1,3.333333,3.8,2.0,2.0,4.0,3,5,1,1,...,101,4,Pantai Krakal,10000,4.5,1,0,0,0,0
2,1,3.333333,3.8,2.0,2.0,4.0,3,5,1,1,...,20,4,Tebing Breksi,20000,4.4,0,1,0,0,0
3,1,3.333333,3.8,2.0,2.0,4.0,3,5,1,1,...,103,3,Pantai Siung,10000,4.6,1,0,0,0,0
4,1,3.333333,3.8,2.0,2.0,4.0,3,5,1,1,...,89,3,Pantai Nglambor,10000,4.4,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2872,199,3.000000,3.5,4.5,0.0,1.0,2,2,2,0,...,93,3,Pantai Parangtritis,10000,4.5,1,0,0,0,0
2873,199,3.000000,3.5,4.5,0.0,1.0,2,2,2,0,...,38,3,Watu Goyang,2500,4.4,0,1,0,0,0
2874,199,3.000000,3.5,4.5,0.0,1.0,2,2,2,0,...,73,3,Pantai Baron,10000,4.4,1,0,0,0,0
2875,199,3.000000,3.5,4.5,0.0,1.0,2,2,2,0,...,87,4,Candi Ijo,5000,4.6,0,1,0,0,0


In [21]:
full_df = final_df.sample(frac=1).reset_index(drop=True)
full_df

Unnamed: 0,IdUser,average_category_Bahari,average_category_Budaya,average_category_Cagar Alam,average_category_Pusat Perbelanjaan,average_category_Taman Hiburan,number_of_ratings_Bahari,number_of_ratings_Budaya,number_of_ratings_Cagar Alam,number_of_ratings_Pusat Perbelanjaan,...,IdWisata,rating,Place_Name,Price,Rating,Category_Bahari,Category_Budaya,Category_Cagar Alam,Category_Pusat Perbelanjaan,Category_Taman Hiburan
0,18,3.500000,0.000000,5.000000,0.0,3.800000,6,0,2,0,...,102,3,Pantai Glagah,5000,4.4,1,0,0,0,0
1,113,3.200000,2.833333,3.000000,0.0,2.666667,5,6,2,0,...,46,1,Studio Alam Gamplong,10000,4.4,0,0,0,0,1
2,247,3.200000,3.857143,1.800000,0.0,2.000000,5,7,5,0,...,86,4,Candi Borobudur,50000,4.7,0,1,0,0,0
3,106,0.000000,3.250000,3.000000,0.0,3.666667,0,4,2,0,...,54,5,Jogja Exotarium,20000,4.4,0,0,0,0,1
4,205,0.000000,2.000000,3.500000,0.0,2.500000,0,1,2,0,...,82,3,Kebun Teh Nglinggo,6000,4.5,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2872,141,2.000000,5.000000,3.000000,5.0,4.000000,1,2,1,1,...,95,5,Candi Ratu Boko,75000,4.6,0,1,0,0,0
2873,201,3.000000,2.000000,3.000000,0.0,3.333333,3,1,3,0,...,57,4,Bunker Kaliadem Merapi,3000,4.5,0,0,1,0,0
2874,158,2.500000,0.000000,2.666667,0.0,3.666667,2,0,3,0,...,22,1,Kawasan Malioboro,0,4.8,0,0,0,0,1
2875,194,2.666667,3.750000,4.000000,2.0,3.000000,3,4,1,1,...,11,1,Desa Wisata Sungai Code Jogja Kota,0,5.0,0,0,0,0,1


In [22]:
user_full = full_df[['IdUser', 'average_category_Bahari',
       'average_category_Budaya', 'average_category_Cagar Alam',
       'average_category_Pusat Perbelanjaan', 'average_category_Taman Hiburan',
       'number_of_ratings_Bahari', 'number_of_ratings_Budaya',
       'number_of_ratings_Cagar Alam', 'number_of_ratings_Pusat Perbelanjaan',
       'number_of_ratings_Taman Hiburan', 'Average_All_Ratings']]
user_full

Unnamed: 0,IdUser,average_category_Bahari,average_category_Budaya,average_category_Cagar Alam,average_category_Pusat Perbelanjaan,average_category_Taman Hiburan,number_of_ratings_Bahari,number_of_ratings_Budaya,number_of_ratings_Cagar Alam,number_of_ratings_Pusat Perbelanjaan,number_of_ratings_Taman Hiburan,Average_All_Ratings
0,18,3.500000,0.000000,5.000000,0.0,3.800000,6,0,2,0,5,3.846154
1,113,3.200000,2.833333,3.000000,0.0,2.666667,5,6,2,0,3,2.937500
2,247,3.200000,3.857143,1.800000,0.0,2.000000,5,7,5,0,4,2.857143
3,106,0.000000,3.250000,3.000000,0.0,3.666667,0,4,2,0,3,3.333333
4,205,0.000000,2.000000,3.500000,0.0,2.500000,0,1,2,0,2,2.800000
...,...,...,...,...,...,...,...,...,...,...,...,...
2872,141,2.000000,5.000000,3.000000,5.0,4.000000,1,2,1,1,2,4.000000
2873,201,3.000000,2.000000,3.000000,0.0,3.333333,3,1,3,0,3,3.000000
2874,158,2.500000,0.000000,2.666667,0.0,3.666667,2,0,3,0,3,3.000000
2875,194,2.666667,3.750000,4.000000,2.0,3.000000,3,4,1,1,2,3.181818


In [23]:
item_full = full_df[['IdWisata', 'Price', 'Rating', 'Category_Bahari', 'Category_Budaya', 'Category_Cagar Alam', 
                      'Category_Pusat Perbelanjaan', 'Category_Taman Hiburan']]
item_full

Unnamed: 0,IdWisata,Price,Rating,Category_Bahari,Category_Budaya,Category_Cagar Alam,Category_Pusat Perbelanjaan,Category_Taman Hiburan
0,102,5000,4.4,1,0,0,0,0
1,46,10000,4.4,0,0,0,0,1
2,86,50000,4.7,0,1,0,0,0
3,54,20000,4.4,0,0,0,0,1
4,82,6000,4.5,0,0,1,0,0
...,...,...,...,...,...,...,...,...
2872,95,75000,4.6,0,1,0,0,0
2873,57,3000,4.5,0,0,1,0,0
2874,22,0,4.8,0,0,0,0,1
2875,11,0,5.0,0,0,0,0,1


In [24]:
y_full = full_df[['rating']]
y_full

Unnamed: 0,rating
0,3
1,1
2,4
3,5
4,3
...,...
2872,5
2873,4
2874,1
2875,1


In [25]:
item_full_unscaled = item_full
user_full_unscaled = user_full
y_full_unscaled    = y_full

scalerItem = StandardScaler()
scalerItem.fit(item_full)
item_full = scalerItem.transform(item_full)

scalerUser = StandardScaler()
scalerUser.fit(user_full)
user_full = scalerUser.transform(user_full)

scalerTarget = MinMaxScaler((-1, 1))
scalerTarget.fit(y_full.to_numpy().reshape(-1, 1))
y_full = scalerTarget.transform(y_full.to_numpy().reshape(-1, 1))
#ynorm_test = scalerTarget.transform(y_test.reshape(-1, 1))

print(np.allclose(item_full_unscaled, scalerItem.inverse_transform(item_full)))
print(np.allclose(user_full_unscaled, scalerUser.inverse_transform(user_full)))

True
True


In [26]:
total_rows = len(y_full)
train_rows = int(0.85 * total_rows)
test_rows = total_rows - train_rows

In [27]:
user_train = user_full[:train_rows]
user_test = user_full[train_rows:]

item_train = item_full[:train_rows]
item_test = item_full[train_rows:]

y_train = y_full[:train_rows]
y_test = y_full[train_rows:]
print(f"item training data shape: {item_train.shape}")
print(f"item test data shape: {item_test.shape}")

print(f"user training data shape: {user_train.shape}")
print(f"user test data shape: {user_test.shape}")

num_user_features = user_train.shape[1] - 1
num_item_features = item_train.shape[1] - 1

print(f"user test data shape: {num_user_features}")
print(f"item test data shape: {num_item_features}")


item training data shape: (2445, 8)
item test data shape: (432, 8)
user training data shape: (2445, 12)
user test data shape: (432, 12)
user test data shape: 11
item test data shape: 7


<h3>MODELLING

In [28]:
num_outputs = 16
tf.random.set_seed(1)
user_NN = tf.keras.models.Sequential([    
    tf.keras.layers.Dense(32, activation='relu', name = 'layer1'),
    tf.keras.layers.Dense(16, activation='relu', name = 'layer2'),
    tf.keras.layers.Dense(num_outputs, activation='linear', name = 'layer3')
])

item_NN = tf.keras.models.Sequential([   
    tf.keras.layers.Dense(32, activation='relu', name = 'layer1'),
    tf.keras.layers.Dense(16, activation='relu', name = 'layer2'),
    tf.keras.layers.Dense(num_outputs, activation='linear', name = 'layer3')
])

# create the user input and point to the base network
input_user = tf.keras.layers.Input(shape=(num_user_features))
vu = user_NN(input_user)
vu = tf.linalg.l2_normalize(vu, axis=1)

# create the item input and point to the base network
input_item = tf.keras.layers.Input(shape=(num_item_features))
vm = item_NN(input_item)
vm = tf.linalg.l2_normalize(vm, axis=1)

# compute the dot product of the two vectors vu and vm
output = tf.keras.layers.Dot(axes=1)([vu, vm])

# specify the inputs and output of the model
model = tf.keras.Model([input_user, input_item], output)

model.summary()

2023-06-12 02:09:08.456800: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2023-06-12 02:09:08.456866: E tensorflow/stream_executor/cuda/cuda_driver.cc:313] failed call to cuInit: UNKNOWN ERROR (303)
2023-06-12 02:09:08.456924: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (abdulhafidh): /proc/driver/nvidia/version does not exist
2023-06-12 02:09:08.457452: I tensorflow/core/platform/cpu_feature_guard.cc:143] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2023-06-12 02:09:08.473369: I tensorflow/core/platform/profile_utils/cpu_utils.cc:102] CPU Frequency: 1599960000 Hz
2023-06-12 02:09:08.474904: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f0ca8000b70 initialized for platform Host (this does not guarantee tha

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 11)]         0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 7)]          0                                            
__________________________________________________________________________________________________
sequential (Sequential)         (None, 16)           1184        input_1[0][0]                    
__________________________________________________________________________________________________
sequential_1 (Sequential)       (None, 16)           1056        input_2[0][0]                    
______________________________________________________________________________________________

In [29]:
tf.random.set_seed(1)
cost_fn = tf.keras.losses.MeanSquaredError()
opt = keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=opt,
              loss=cost_fn)
# model.compile(loss = tf.keras.losses.MeanSquaredError(),
#     optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.002),
#     metrics=[tf.keras.metrics.RootMeanSquaredError()]
#     )

In [30]:
tf.random.set_seed(1)
model.fit([user_train[:, 1:], item_train[:,1:]], y_train, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f0d4f0d4a10>

In [31]:
model.evaluate([user_test[:, 1:], item_test[:, 1:]], y_test)



0.38275960087776184

<h2> COBA_COBA PREDICT

In [32]:
data = {
    'IdUser': [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400],
    'IdWisata': [154, 209, 113, 149, 210, 85, 87, 207, 152, 153, 95, 103, 115, 118, 100, 179, 105],
    'rating': [3, 4, 2, 3, 4, 5, 4, 5, 4, 5, 4, 4, 5, 5, 5, 5, 5]
}

df_pred = pd.DataFrame(data)
print(df_pred)

    IdUser  IdWisata  rating
0      400       154       3
1      400       209       4
2      400       113       2
3      400       149       3
4      400       210       4
5      400        85       5
6      400        87       4
7      400       207       5
8      400       152       4
9      400       153       5
10     400        95       4
11     400       103       4
12     400       115       5
13     400       118       5
14     400       100       5
15     400       179       5
16     400       105       5


In [33]:
def calculate_user_ratings(ratings_jogja, destination_jogja=destination_jogja):
    # Merge with destination_jogja to get category information
    df = ratings_jogja.merge(pd.DataFrame(destination_jogja), on='IdWisata')

    # Initialize dictionary to store aggregated data
    aggregated_data = {}

    # Iterate through each row and aggregate ratings
    for row in df.itertuples(index=False):
        IdUser = row.IdUser
        category = row.Category
        rating = row.rating

        if IdUser not in aggregated_data:
            aggregated_data[IdUser] = {
                'average_category_Bahari': 0.0,
                'average_category_Budaya': 0.0,
                'average_category_Cagar Alam': 0.0,
                'average_category_Pusat Perbelanjaan': 0.0,
                'average_category_Taman Hiburan': 0.0,
                'number_of_ratings_Bahari': 0,
                'number_of_ratings_Budaya': 0,
                'number_of_ratings_Cagar Alam': 0,
                'number_of_ratings_Pusat Perbelanjaan': 0,
                'number_of_ratings_Taman Hiburan': 0,
                'Average_All_Ratings': 0.0
            }

        aggregated_data[IdUser]['IdUser'] = IdUser
        aggregated_data[IdUser]['average_category_' + category] += rating
        aggregated_data[IdUser]['number_of_ratings_' + category] += 1
        aggregated_data[IdUser]['Average_All_Ratings'] += rating

    # Calculate average ratings and overall average
    for IdUser, data in aggregated_data.items():
        for category in ['Bahari', 'Budaya', 'Cagar Alam', 'Pusat Perbelanjaan', 'Taman Hiburan']:
            count = data['number_of_ratings_' + category]
            if count > 0:
                data['average_category_' + category] /= count

        total_ratings = sum(data['number_of_ratings_' + category] for category in ['Bahari', 'Budaya', 'Cagar Alam', 'Pusat Perbelanjaan', 'Taman Hiburan'])
        data['Average_All_Ratings'] /= total_ratings

    # Convert aggregated data to DataFrame
    pivoted_data = pd.DataFrame.from_dict(aggregated_data, orient='index')

    # Reorder columns
    columns_order = ['IdUser', 'average_category_Bahari', 'average_category_Budaya', 'average_category_Cagar Alam',
                     'average_category_Pusat Perbelanjaan', 'average_category_Taman Hiburan',
                     'number_of_ratings_Bahari', 'number_of_ratings_Budaya', 'number_of_ratings_Cagar Alam',
                     'number_of_ratings_Pusat Perbelanjaan', 'number_of_ratings_Taman Hiburan', 'Average_All_Ratings']

    pivoted_data = pivoted_data[columns_order]

    # Display the resulting DataFrame
    return pivoted_data

In [34]:
user_vec = calculate_user_ratings(df_pred, destination_jogja=destination_jogja)
user_vec

Unnamed: 0,IdUser,average_category_Bahari,average_category_Budaya,average_category_Cagar Alam,average_category_Pusat Perbelanjaan,average_category_Taman Hiburan,number_of_ratings_Bahari,number_of_ratings_Budaya,number_of_ratings_Cagar Alam,number_of_ratings_Pusat Perbelanjaan,number_of_ratings_Taman Hiburan,Average_All_Ratings
400,400,4.333333,4.0,5.0,0.0,0.0,6,2,1,0,0,4.333333


In [35]:
user_vecs = np.tile(user_vec, (len(destination_fix), 1))
user_vecs

array([[400.        ,   4.33333333,   4.        , ...,   0.        ,
          0.        ,   4.33333333],
       [400.        ,   4.33333333,   4.        , ...,   0.        ,
          0.        ,   4.33333333],
       [400.        ,   4.33333333,   4.        , ...,   0.        ,
          0.        ,   4.33333333],
       ...,
       [400.        ,   4.33333333,   4.        , ...,   0.        ,
          0.        ,   4.33333333],
       [400.        ,   4.33333333,   4.        , ...,   0.        ,
          0.        ,   4.33333333],
       [400.        ,   4.33333333,   4.        , ...,   0.        ,
          0.        ,   4.33333333]])

In [36]:
user_vec = calculate_user_ratings(df_pred, destination_jogja)
user_vec

Unnamed: 0,IdUser,average_category_Bahari,average_category_Budaya,average_category_Cagar Alam,average_category_Pusat Perbelanjaan,average_category_Taman Hiburan,number_of_ratings_Bahari,number_of_ratings_Budaya,number_of_ratings_Cagar Alam,number_of_ratings_Pusat Perbelanjaan,number_of_ratings_Taman Hiburan,Average_All_Ratings
400,400,4.333333,4.0,5.0,0.0,0.0,6,2,1,0,0,4.333333


In [37]:
item_vecs = destination_fix.drop('Place_Name', axis=1)
item_vecs

Unnamed: 0,IdWisata,Price,Rating,Category_Bahari,Category_Budaya,Category_Cagar Alam,Category_Pusat Perbelanjaan,Category_Taman Hiburan
0,1,6000,4.5,0,0,0,0,1
1,2,15000,4.6,0,1,0,0,0
2,3,20000,4.2,0,0,0,0,1
3,4,3000,4.6,0,1,0,0,0
4,5,50000,4.4,0,1,0,0,0
...,...,...,...,...,...,...,...,...
121,122,8000,4.4,0,0,1,0,0
122,123,15000,4.4,0,0,0,0,1
123,124,10000,4.6,0,0,1,0,0
124,125,10000,4.5,1,0,0,0,0


In [38]:
# scale our user and item vectors
suser_vecs = scalerUser.transform(user_vecs)
sitem_vecs = scalerItem.transform(item_vecs)


# make a prediction
y_p = model.predict([suser_vecs[:, 1:], sitem_vecs[:, 1:]])
# # unscale y prediction 
y_pu = scalerTarget.inverse_transform(y_p)
y_pu

# sort the results, highest prediction first
sorted_index = np.argsort(-y_pu,axis=0).reshape(-1).tolist()  #negate to get largest rating first
sorted_ypu   = y_pu[sorted_index]
sorted_index = [x+0 for x in sorted_index]
sorted_items = item_vecs.loc[sorted_index]  #using unscaled vectors for display
sorted_items
# print_pred_movies(sorted_ypu, sorted_items, movie_dict, maxcount = 10)

  "X does not have valid feature names, but"


Unnamed: 0,IdWisata,Price,Rating,Category_Bahari,Category_Budaya,Category_Cagar Alam,Category_Pusat Perbelanjaan,Category_Taman Hiburan
59,60,500000,4.6,0,0,1,0,0
121,122,8000,4.4,0,0,1,0,0
107,108,15000,4.4,0,0,1,0,0
64,65,3000,4.4,0,0,1,0,0
79,80,2500,4.4,0,0,1,0,0
...,...,...,...,...,...,...,...,...
11,12,0,4.6,0,0,0,0,1
10,11,0,5.0,0,0,0,0,1
18,19,0,4.7,0,0,0,0,1
7,8,0,4.7,0,0,0,0,1


In [39]:
def print_prediction(sorted_items, sorted_ypu, num_recommend, destination_jogja=destination_jogja):
    res = pd.merge(sorted_items[['IdWisata']], destination_jogja[["IdWisata", "Place_Name", "Category", "Rating"]], on="IdWisata")[:num_recommend]
    res["Predict_Rating"] = sorted_ypu[:num_recommend]
    return res

In [40]:
print_prediction(sorted_items, sorted_ypu, 40, destination_jogja=destination_jogja)

Unnamed: 0,IdWisata,Place_Name,Category,Rating,Predict_Rating
0,60,Goa Jomblang,Cagar Alam,4.6,4.619604
1,122,Wisata Kaliurang,Cagar Alam,4.4,4.513032
2,108,Air Terjun Sri Gethuk,Cagar Alam,4.4,4.512347
3,65,Goa Cerme,Cagar Alam,4.4,4.51089
4,80,Pintoe Langit Dahromo,Cagar Alam,4.4,4.510578
5,53,Wisata Alam Kalibiru,Cagar Alam,4.4,4.510474
6,47,Watu Lumbung,Cagar Alam,4.3,4.468362
7,64,Goa Rancang Kencono,Cagar Alam,4.3,4.468361
8,94,Goa Pindul,Cagar Alam,4.5,4.402765
9,9,Gembira Loka Zoo,Cagar Alam,4.5,4.399655


# Convert Model To TFLITE

In [41]:
# convert model to tflite

# Convert the model to the TensorFlow Lite format with quantization

converter = tf.lite.TFLiteConverter.from_keras_model(model)

converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]

tflite_model = converter.convert()

# Save the model to disk

open("Recommender_Model.tflite", "wb").write(tflite_model)




2023-06-12 02:09:18.099988: I tensorflow/core/grappler/devices.cc:55] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2023-06-12 02:09:18.100307: I tensorflow/core/grappler/clusters/single_machine.cc:356] Starting new session
2023-06-12 02:09:18.105765: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:797] Optimization results for grappler item: graph_to_optimize
2023-06-12 02:09:18.105825: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:799]   function_optimizer: function_optimizer did nothing. time = 0.005ms.
2023-06-12 02:09:18.105837: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:799]   function_optimizer: function_optimizer did nothing. time = 0.001ms.
2023-06-12 02:09:18.238897: I tensorflow/core/grappler/devices.cc:55] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2023-06-12 02:09:18.239160: I tensorflow/core/grappler/clusters/single_machine.cc:356] Starting new session
2023-06-12 02:09:18.253577: I tenso

14784

In [42]:
# convert model to h5

model.save('Recommender_Model.h5')


In [44]:
# # print_prediction based on tflite model

# # Load the TFLite model and allocate tensors.

# interpreter = tf.lite.Interpreter(model_path="Recommender_Model.tflite")

# interpreter.allocate_tensors()

# # Get input and output tensors.

# input_details = interpreter.get_input_details()

# output_details = interpreter.get_output_details()


# # test the model with this data

# new_data = {
#     'IdUser': [1, 1, 1, 1, 1],
#     'IdWisata': [1, 2, 3, 4, 5],
#     'rating': [5, 5, 5, 3, 3]
# }

# new_data = pd.DataFrame(new_data)

# user_vec = calculate_user_ratings(new_data, destination_jogja=destination_jogja)

# user_vecs = np.tile(user_vec, (len(destination_fix), 1))

# user_vec = calculate_user_ratings(new_data, destination_jogja)

# item_vecs = destination_fix.drop('Place_Name', axis=1)

# # scale our user and item vectors

# suser_vecs = scalerUser.transform(user_vecs)

# sitem_vecs = scalerItem.transform(item_vecs)

# # make a prediction

# print(suser_vecs)

# print(sitem_vecs)

# # convert to array 

# print("array")

# suser_vecs = np.array(suser_vecs, dtype=np.float32)

# sitem_vecs = np.array(sitem_vecs, dtype=np.float32)

# print(suser_vecs)

# print(sitem_vecs)


# interpreter.invoke()

# y_p = interpreter.get_tensor(output_details[0]['index'])

# # unscale y prediction

# y_pu = scalerTarget.inverse_transform(y_p)

# # sort the results, highest prediction first

# sorted_index = np.argsort(-y_pu,axis=0).reshape(-1).tolist()  #negate to get largest rating first

# sorted_ypu   = y_pu[sorted_index]

# sorted_index = [x+0 for x in sorted_index]

# sorted_items = item_vecs.loc[sorted_index]  #using unscaled vectors for display

# print_prediction(sorted_items, sorted_ypu, 40, destination_jogja=destination_jogja)
