In [82]:
import pandas as pd
import numpy as np
from zipfile import ZipFile
from pathlib import Path
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, MinMaxScaler

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [83]:
rating = pd.read_csv('../data/tourism_rating.csv')
destination = pd.read_csv('../data/tourism_with_id.csv')
user = pd.read_csv('../data/user.csv')

<h2> PRE-PROCESSING

<h4> DATA DESTINATION

In [84]:
destination.head()

Unnamed: 0,IdWisata,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Unnamed: 11,Unnamed: 12
0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153,,1
1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125,,2
2,3,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Taman Hiburan,Jakarta,270000,4.6,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-6.125312,106.833538,,3
3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Taman Hiburan,Jakarta,10000,4.5,,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-6.302446,106.895156,,4
4,5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,Jakarta,94000,4.5,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-6.12419,106.839134,,5


In [85]:
destination = destination.drop(['Unnamed: 11','Unnamed: 12', 'Time_Minutes'], axis=1)
destination.head(2)

Unnamed: 0,IdWisata,Place_Name,Description,Category,City,Price,Rating,Coordinate,Lat,Long
0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153
1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125


In [86]:
destination_jogja = destination[destination['City'] == 'Yogyakarta']
destination_jogja.head()

Unnamed: 0,IdWisata,Place_Name,Description,Category,City,Price,Rating,Coordinate,Lat,Long
84,85,Taman Pintar Yogyakarta,Taman Pintar Yogyakarta (bahasa Jawa: Hanacara...,Taman Hiburan,Yogyakarta,6000,4.5,"{'lat': -7.800671500000001, 'lng': 110.3676551}",-7.800671,110.367655
85,86,Keraton Yogyakarta,Keraton Ngayogyakarta Hadiningrat atau Keraton...,Budaya,Yogyakarta,15000,4.6,"{'lat': -7.8052845, 'lng': 110.3642031}",-7.805284,110.364203
86,87,Sindu Kusuma Edupark (SKE),Sindu Kusuma Edupark (SKE) merupakan sebuah de...,Taman Hiburan,Yogyakarta,20000,4.2,"{'lat': -7.767297300000001, 'lng': 110.3542486}",-7.767297,110.354249
87,88,Museum Benteng Vredeburg Yogyakarta,Museum Benteng Vredeburg (bahasa Jawa: ꦩꦸꦱꦶꦪꦸꦩ...,Budaya,Yogyakarta,3000,4.6,"{'lat': -7.800201599999999, 'lng': 110.3663044}",-7.800202,110.366304
88,89,De Mata Museum Jogja,Museum De Mata merupakan salah satu museum yan...,Budaya,Yogyakarta,50000,4.4,"{'lat': -7.816315599999999, 'lng': 110.3871442}",-7.816316,110.387144


In [87]:
# reset index 

destination_jogja = destination_jogja.reset_index(drop=True)
destination_jogja['IdWisata'] = destination_jogja.index + 1
destination_jogja.head()

Unnamed: 0,IdWisata,Place_Name,Description,Category,City,Price,Rating,Coordinate,Lat,Long
0,1,Taman Pintar Yogyakarta,Taman Pintar Yogyakarta (bahasa Jawa: Hanacara...,Taman Hiburan,Yogyakarta,6000,4.5,"{'lat': -7.800671500000001, 'lng': 110.3676551}",-7.800671,110.367655
1,2,Keraton Yogyakarta,Keraton Ngayogyakarta Hadiningrat atau Keraton...,Budaya,Yogyakarta,15000,4.6,"{'lat': -7.8052845, 'lng': 110.3642031}",-7.805284,110.364203
2,3,Sindu Kusuma Edupark (SKE),Sindu Kusuma Edupark (SKE) merupakan sebuah de...,Taman Hiburan,Yogyakarta,20000,4.2,"{'lat': -7.767297300000001, 'lng': 110.3542486}",-7.767297,110.354249
3,4,Museum Benteng Vredeburg Yogyakarta,Museum Benteng Vredeburg (bahasa Jawa: ꦩꦸꦱꦶꦪꦸꦩ...,Budaya,Yogyakarta,3000,4.6,"{'lat': -7.800201599999999, 'lng': 110.3663044}",-7.800202,110.366304
4,5,De Mata Museum Jogja,Museum De Mata merupakan salah satu museum yan...,Budaya,Yogyakarta,50000,4.4,"{'lat': -7.816315599999999, 'lng': 110.3871442}",-7.816316,110.387144


In [88]:
destination_jogja.tail()

Unnamed: 0,IdWisata,Place_Name,Description,Category,City,Price,Rating,Coordinate,Lat,Long
121,122,Wisata Kaliurang,"Jogja selalu menarik untuk dikulik, terlebih t...",Cagar Alam,Yogyakarta,8000,4.4,"{'lat': -7.6120675, 'lng': 110.4205209}",-7.612068,110.420521
122,123,Heha Sky View,HeHa Sky View adalah salah satu tempat wisata ...,Taman Hiburan,Yogyakarta,15000,4.4,"{'lat': -7.8496144, 'lng': 110.478324}",-7.849614,110.478324
123,124,Taman Sungai Mudal,"Taman Sungai Mudal, sebuah objek wisata alam t...",Cagar Alam,Yogyakarta,10000,4.6,"{'lat': -7.762813599999998, 'lng': 110.1161626}",-7.762814,110.116163
124,125,Pantai Sanglen,Pantai Sanglen. Lokasinya berada di Desa Kemad...,Bahari,Yogyakarta,10000,4.5,"{'lat': -8.1367456, 'lng': 110.5716362}",-8.136746,110.571636
125,126,Pantai Congot,"Selain Pantai Glagah dan Pantai Trisik, ternya...",Bahari,Yogyakarta,3000,4.3,"{'lat': -7.907542500000001, 'lng': 110.0535658}",-7.907542,110.053566


In [89]:
destination_jogja.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 126 entries, 0 to 125
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   IdWisata     126 non-null    int64  
 1   Place_Name   126 non-null    object 
 2   Description  126 non-null    object 
 3   Category     126 non-null    object 
 4   City         126 non-null    object 
 5   Price        126 non-null    int64  
 6   Rating       126 non-null    float64
 7   Coordinate   126 non-null    object 
 8   Lat          126 non-null    float64
 9   Long         126 non-null    float64
dtypes: float64(3), int64(2), object(5)
memory usage: 10.0+ KB


In [90]:
# save to pickle destination_jogja

import pickle

with open('../data/destination_jogja.pkl', 'wb') as file:
    pickle.dump(destination_jogja, file)

    

<h4> DATA RATING

In [91]:
rating.head(10)

Unnamed: 0,IdUser,IdWisata,rating
0,1,179,3
1,1,344,2
2,1,5,5
3,1,373,3
4,1,101,4
5,1,312,2
6,1,258,5
7,1,20,4
8,1,154,2
9,1,393,5


In [92]:
id_jogja = destination_jogja['IdWisata']
ratings_jogja = rating[rating["IdWisata"].isin(id_jogja)]
ratings_jogja = ratings_jogja.reset_index(drop=True)
ratings_jogja.head()

Unnamed: 0,IdUser,IdWisata,rating
0,1,5,5
1,1,101,4
2,1,20,4
3,1,103,3
4,1,89,3


In [93]:
ratings_jogja.tail()

Unnamed: 0,IdUser,IdWisata,rating
2872,300,69,1
2873,300,8,1
2874,300,108,5
2875,300,103,5
2876,300,64,4


In [94]:
ratings_jogja.shape

(2877, 3)

<h4> DATA USER

In [95]:
user.head()

Unnamed: 0,IdUser,Location,Age
0,1,"Semarang, Jawa Tengah",20
1,2,"Bekasi, Jawa Barat",21
2,3,"Cirebon, Jawa Barat",23
3,4,"Bekasi, Jawa Barat",21
4,5,"Lampung, Sumatera Selatan",20


In [96]:
user.shape

(301, 3)

In [97]:
id_user = ratings_jogja['IdUser']
user_jogja = user[user['IdUser'].isin(id_user)]
user_jogja = user_jogja.reset_index(drop=True)
user_jogja.head()

Unnamed: 0,IdUser,Location,Age
0,1,"Semarang, Jawa Tengah",20
1,2,"Bekasi, Jawa Barat",21
2,3,"Cirebon, Jawa Barat",23
3,4,"Bekasi, Jawa Barat",21
4,5,"Lampung, Sumatera Selatan",20


In [98]:
user_jogja.shape

(300, 3)

<h1> PREPARE DATA FOR MODELLING

In [99]:
# Merge with destination_jogja to get category information
df = ratings_jogja.merge(pd.DataFrame(destination_jogja), on='IdWisata')

# Initialize dictionary to store aggregated data
aggregated_data = {}

# Iterate through each row and aggregate ratings
for row in df.itertuples(index=False):
    IdUser = row.IdUser
    category = row.Category
    rating = row.rating
    
    if IdUser not in aggregated_data:
        aggregated_data[IdUser] = {
            'average_category_Bahari': 0.0,
            'average_category_Budaya': 0.0,
            'average_category_Cagar Alam': 0.0,
            'average_category_Pusat Perbelanjaan': 0.0,
            'average_category_Taman Hiburan': 0.0,
            'number_of_ratings_Bahari': 0,
            'number_of_ratings_Budaya': 0,
            'number_of_ratings_Cagar Alam': 0,
            'number_of_ratings_Pusat Perbelanjaan': 0,
            'number_of_ratings_Taman Hiburan': 0,
            'Average_All_Ratings': 0.0
        }
    
    aggregated_data[IdUser]['IdUser'] = IdUser
    aggregated_data[IdUser]['average_category_' + category] += rating
    aggregated_data[IdUser]['number_of_ratings_' + category] += 1
    aggregated_data[IdUser]['Average_All_Ratings'] += rating

# Calculate average ratings and overall average
for IdUser, data in aggregated_data.items():
    for category in ['Bahari', 'Budaya', 'Cagar Alam', 'Pusat Perbelanjaan', 'Taman Hiburan']:
        count = data['number_of_ratings_' + category]
        if count > 0:
            data['average_category_' + category] /= count

    total_ratings = sum(data['number_of_ratings_' + category] for category in ['Bahari', 'Budaya', 'Cagar Alam', 'Pusat Perbelanjaan', 'Taman Hiburan'])
    data['Average_All_Ratings'] /= total_ratings

# Convert aggregated data to DataFrame
pivoted_data = pd.DataFrame.from_dict(aggregated_data, orient='index')

# Reorder columns
columns_order = ['IdUser', 'average_category_Bahari', 'average_category_Budaya', 'average_category_Cagar Alam',
                 'average_category_Pusat Perbelanjaan', 'average_category_Taman Hiburan',
                 'number_of_ratings_Bahari', 'number_of_ratings_Budaya', 'number_of_ratings_Cagar Alam',
                 'number_of_ratings_Pusat Perbelanjaan', 'number_of_ratings_Taman Hiburan', 'Average_All_Ratings']

pivoted_data = pivoted_data[columns_order]

# Display the resulting DataFrame
pivoted_data

Unnamed: 0,IdUser,average_category_Bahari,average_category_Budaya,average_category_Cagar Alam,average_category_Pusat Perbelanjaan,average_category_Taman Hiburan,number_of_ratings_Bahari,number_of_ratings_Budaya,number_of_ratings_Cagar Alam,number_of_ratings_Pusat Perbelanjaan,number_of_ratings_Taman Hiburan,Average_All_Ratings
1,1,3.333333,3.80,2.0,2.0,4.000000,3,5,1,1,1,3.363636
4,4,2.333333,4.00,3.0,0.0,3.500000,3,4,1,0,4,3.333333
11,11,3.333333,3.75,2.0,0.0,3.333333,3,4,1,0,3,3.363636
12,12,4.250000,5.00,4.0,0.0,3.666667,4,1,4,0,6,4.000000
35,35,3.000000,3.00,4.0,0.0,0.000000,2,2,2,0,0,3.333333
...,...,...,...,...,...,...,...,...,...,...,...,...
90,90,1.000000,4.00,2.0,0.0,2.250000,1,1,2,0,4,2.250000
209,209,2.800000,4.00,0.0,0.0,0.000000,5,1,0,0,0,3.000000
191,191,4.000000,5.00,2.0,0.0,2.500000,1,1,1,0,2,3.200000
161,161,4.000000,0.00,1.5,0.0,3.857143,1,0,2,0,7,3.400000


In [100]:
pivoted_data

Unnamed: 0,IdUser,average_category_Bahari,average_category_Budaya,average_category_Cagar Alam,average_category_Pusat Perbelanjaan,average_category_Taman Hiburan,number_of_ratings_Bahari,number_of_ratings_Budaya,number_of_ratings_Cagar Alam,number_of_ratings_Pusat Perbelanjaan,number_of_ratings_Taman Hiburan,Average_All_Ratings
1,1,3.333333,3.80,2.0,2.0,4.000000,3,5,1,1,1,3.363636
4,4,2.333333,4.00,3.0,0.0,3.500000,3,4,1,0,4,3.333333
11,11,3.333333,3.75,2.0,0.0,3.333333,3,4,1,0,3,3.363636
12,12,4.250000,5.00,4.0,0.0,3.666667,4,1,4,0,6,4.000000
35,35,3.000000,3.00,4.0,0.0,0.000000,2,2,2,0,0,3.333333
...,...,...,...,...,...,...,...,...,...,...,...,...
90,90,1.000000,4.00,2.0,0.0,2.250000,1,1,2,0,4,2.250000
209,209,2.800000,4.00,0.0,0.0,0.000000,5,1,0,0,0,3.000000
191,191,4.000000,5.00,2.0,0.0,2.500000,1,1,1,0,2,3.200000
161,161,4.000000,0.00,1.5,0.0,3.857143,1,0,2,0,7,3.400000


In [101]:
destination_fix = destination_jogja[["IdWisata", "Place_Name", "Category", "Price", "Rating"]]

# Perform one-hot encoding on the "Category" column
category_encoded = pd.get_dummies(destination_fix["Category"], prefix="Category")

# Concatenate the one-hot encoded categories with the selected columns
destination_fix = pd.concat([destination_fix[["IdWisata", "Place_Name", "Price", "Rating"]], category_encoded], axis=1)
destination_fix

Unnamed: 0,IdWisata,Place_Name,Price,Rating,Category_Bahari,Category_Budaya,Category_Cagar Alam,Category_Pusat Perbelanjaan,Category_Taman Hiburan
0,1,Taman Pintar Yogyakarta,6000,4.5,0,0,0,0,1
1,2,Keraton Yogyakarta,15000,4.6,0,1,0,0,0
2,3,Sindu Kusuma Edupark (SKE),20000,4.2,0,0,0,0,1
3,4,Museum Benteng Vredeburg Yogyakarta,3000,4.6,0,1,0,0,0
4,5,De Mata Museum Jogja,50000,4.4,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...
121,122,Wisata Kaliurang,8000,4.4,0,0,1,0,0
122,123,Heha Sky View,15000,4.4,0,0,0,0,1
123,124,Taman Sungai Mudal,10000,4.6,0,0,1,0,0
124,125,Pantai Sanglen,10000,4.5,1,0,0,0,0


In [136]:
import pickle

# save destination_fix to pickle

with open('../data/destination_fix.pkl', 'wb') as file:
    pickle.dump(destination_fix, file)

In [102]:
merged_df = pd.merge(pivoted_data, ratings_jogja, on='IdUser', how='left')

# Merge the destination_fix dataset with the merged pivoted_data and df2 datasets based on 'IdWisata'
final_df = pd.merge(merged_df, destination_fix, on='IdWisata', how='left')
final_df

Unnamed: 0,IdUser,average_category_Bahari,average_category_Budaya,average_category_Cagar Alam,average_category_Pusat Perbelanjaan,average_category_Taman Hiburan,number_of_ratings_Bahari,number_of_ratings_Budaya,number_of_ratings_Cagar Alam,number_of_ratings_Pusat Perbelanjaan,...,IdWisata,rating,Place_Name,Price,Rating,Category_Bahari,Category_Budaya,Category_Cagar Alam,Category_Pusat Perbelanjaan,Category_Taman Hiburan
0,1,3.333333,3.8,2.0,2.0,4.0,3,5,1,1,...,5,5,De Mata Museum Jogja,50000,4.4,0,1,0,0,0
1,1,3.333333,3.8,2.0,2.0,4.0,3,5,1,1,...,101,4,Pantai Krakal,10000,4.5,1,0,0,0,0
2,1,3.333333,3.8,2.0,2.0,4.0,3,5,1,1,...,20,4,Tebing Breksi,20000,4.4,0,1,0,0,0
3,1,3.333333,3.8,2.0,2.0,4.0,3,5,1,1,...,103,3,Pantai Siung,10000,4.6,1,0,0,0,0
4,1,3.333333,3.8,2.0,2.0,4.0,3,5,1,1,...,89,3,Pantai Nglambor,10000,4.4,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2872,199,3.000000,3.5,4.5,0.0,1.0,2,2,2,0,...,93,3,Pantai Parangtritis,10000,4.5,1,0,0,0,0
2873,199,3.000000,3.5,4.5,0.0,1.0,2,2,2,0,...,38,3,Watu Goyang,2500,4.4,0,1,0,0,0
2874,199,3.000000,3.5,4.5,0.0,1.0,2,2,2,0,...,73,3,Pantai Baron,10000,4.4,1,0,0,0,0
2875,199,3.000000,3.5,4.5,0.0,1.0,2,2,2,0,...,87,4,Candi Ijo,5000,4.6,0,1,0,0,0


In [103]:
full_df = final_df.sample(frac=1).reset_index(drop=True)
full_df

Unnamed: 0,IdUser,average_category_Bahari,average_category_Budaya,average_category_Cagar Alam,average_category_Pusat Perbelanjaan,average_category_Taman Hiburan,number_of_ratings_Bahari,number_of_ratings_Budaya,number_of_ratings_Cagar Alam,number_of_ratings_Pusat Perbelanjaan,...,IdWisata,rating,Place_Name,Price,Rating,Category_Bahari,Category_Budaya,Category_Cagar Alam,Category_Pusat Perbelanjaan,Category_Taman Hiburan
0,54,2.500000,2.000,0.000000,3.0,2.333333,4,2,0,1,...,24,2,Embung Tambakboyo,0,4.4,0,0,0,0,1
1,18,3.500000,0.000,5.000000,0.0,3.800000,6,0,2,0,...,50,4,Desa Wisata Gamplong,10000,4.4,0,0,0,0,1
2,152,3.000000,4.000,2.000000,0.0,0.000000,2,1,1,0,...,122,2,Wisata Kaliurang,8000,4.4,0,0,1,0,0
3,287,1.750000,3.250,3.200000,0.0,0.000000,4,4,5,0,...,112,4,Pantai Ngrenehan,3000,4.4,1,0,0,0,0
4,184,2.250000,3.625,0.000000,3.0,4.000000,4,8,0,1,...,103,4,Pantai Siung,10000,4.6,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2872,28,4.333333,4.500,3.666667,3.0,2.000000,3,2,3,1,...,28,2,Bukit Bintang Yogyakarta,25000,4.5,0,0,0,0,1
2873,284,2.000000,5.000,1.500000,0.0,5.000000,1,1,2,0,...,94,2,Goa Pindul,40000,4.5,0,0,1,0,0
2874,170,2.000000,3.000,0.000000,4.0,2.250000,2,1,0,1,...,72,3,Pantai Pulang Sawal,10000,4.5,1,0,0,0,0
2875,146,2.500000,1.500,4.000000,0.0,2.500000,2,2,2,0,...,107,2,Pantai Drini,10000,4.5,1,0,0,0,0


In [104]:
user_full = full_df[['IdUser', 'average_category_Bahari',
       'average_category_Budaya', 'average_category_Cagar Alam',
       'average_category_Pusat Perbelanjaan', 'average_category_Taman Hiburan',
       'number_of_ratings_Bahari', 'number_of_ratings_Budaya',
       'number_of_ratings_Cagar Alam', 'number_of_ratings_Pusat Perbelanjaan',
       'number_of_ratings_Taman Hiburan', 'Average_All_Ratings']]
user_full

Unnamed: 0,IdUser,average_category_Bahari,average_category_Budaya,average_category_Cagar Alam,average_category_Pusat Perbelanjaan,average_category_Taman Hiburan,number_of_ratings_Bahari,number_of_ratings_Budaya,number_of_ratings_Cagar Alam,number_of_ratings_Pusat Perbelanjaan,number_of_ratings_Taman Hiburan,Average_All_Ratings
0,54,2.500000,2.000,0.000000,3.0,2.333333,4,2,0,1,3,2.400000
1,18,3.500000,0.000,5.000000,0.0,3.800000,6,0,2,0,5,3.846154
2,152,3.000000,4.000,2.000000,0.0,0.000000,2,1,1,0,0,3.000000
3,287,1.750000,3.250,3.200000,0.0,0.000000,4,4,5,0,0,2.769231
4,184,2.250000,3.625,0.000000,3.0,4.000000,4,8,0,1,5,3.388889
...,...,...,...,...,...,...,...,...,...,...,...,...
2872,28,4.333333,4.500,3.666667,3.0,2.000000,3,2,3,1,1,3.800000
2873,284,2.000000,5.000,1.500000,0.0,5.000000,1,1,2,0,1,3.000000
2874,170,2.000000,3.000,0.000000,4.0,2.250000,2,1,0,1,4,2.500000
2875,146,2.500000,1.500,4.000000,0.0,2.500000,2,2,2,0,4,2.600000


In [105]:
item_full = full_df[['IdWisata', 'Price', 'Rating', 'Category_Bahari', 'Category_Budaya', 'Category_Cagar Alam', 
                      'Category_Pusat Perbelanjaan', 'Category_Taman Hiburan']]
item_full

Unnamed: 0,IdWisata,Price,Rating,Category_Bahari,Category_Budaya,Category_Cagar Alam,Category_Pusat Perbelanjaan,Category_Taman Hiburan
0,24,0,4.4,0,0,0,0,1
1,50,10000,4.4,0,0,0,0,1
2,122,8000,4.4,0,0,1,0,0
3,112,3000,4.4,1,0,0,0,0
4,103,10000,4.6,1,0,0,0,0
...,...,...,...,...,...,...,...,...
2872,28,25000,4.5,0,0,0,0,1
2873,94,40000,4.5,0,0,1,0,0
2874,72,10000,4.5,1,0,0,0,0
2875,107,10000,4.5,1,0,0,0,0


In [106]:
y_full = full_df[['rating']]
y_full

Unnamed: 0,rating
0,2
1,4
2,2
3,4
4,4
...,...
2872,2
2873,2
2874,3
2875,2


In [107]:
item_full_unscaled = item_full
user_full_unscaled = user_full
y_full_unscaled    = y_full

scalerItem = StandardScaler()
scalerItem.fit(item_full)
item_full = scalerItem.transform(item_full)

scalerUser = StandardScaler()
scalerUser.fit(user_full)
user_full = scalerUser.transform(user_full)

scalerTarget = MinMaxScaler((-1, 1))
scalerTarget.fit(y_full.to_numpy().reshape(-1, 1))
y_full = scalerTarget.transform(y_full.to_numpy().reshape(-1, 1))
#ynorm_test = scalerTarget.transform(y_test.reshape(-1, 1))

print(np.allclose(item_full_unscaled, scalerItem.inverse_transform(item_full)))
print(np.allclose(user_full_unscaled, scalerUser.inverse_transform(user_full)))

True
True


In [137]:
# save scalerItem, scalerUser, scalerTarget to pickle

with open('../data/scalerItem.pkl', 'wb') as file:
    pickle.dump(scalerItem, file)

with open('../data/scalerUser.pkl', 'wb') as file:
    pickle.dump(scalerUser, file)

with open('../data/scalerTarget.pkl', 'wb') as file:
    pickle.dump(scalerTarget, file)

    

In [108]:
# save Scaler target to pickle
import pickle

pickle.dump(scalerTarget, open('scalerTarget.pkl', 'wb'))


In [109]:
total_rows = len(y_full)
train_rows = int(0.85 * total_rows)
test_rows = total_rows - train_rows

In [110]:
user_train = user_full[:train_rows]
user_test = user_full[train_rows:]

item_train = item_full[:train_rows]
item_test = item_full[train_rows:]

y_train = y_full[:train_rows]
y_test = y_full[train_rows:]
print(f"item training data shape: {item_train.shape}")
print(f"item test data shape: {item_test.shape}")

print(f"user training data shape: {user_train.shape}")
print(f"user test data shape: {user_test.shape}")

num_user_features = user_train.shape[1] - 1
num_item_features = item_train.shape[1] - 1

print(f"user test data shape: {num_user_features}")
print(f"item test data shape: {num_item_features}")


item training data shape: (2445, 8)
item test data shape: (432, 8)
user training data shape: (2445, 12)
user test data shape: (432, 12)
user test data shape: 11
item test data shape: 7


<h3>MODELLING

In [111]:
num_outputs = 16
tf.random.set_seed(1)
user_NN = tf.keras.models.Sequential([    
    tf.keras.layers.Dense(32, activation='relu', name = 'layer1'),
    tf.keras.layers.Dense(16, activation='relu', name = 'layer2'),
    tf.keras.layers.Dense(num_outputs, activation='linear', name = 'layer3')
])

item_NN = tf.keras.models.Sequential([   
    tf.keras.layers.Dense(32, activation='relu', name = 'layer1'),
    tf.keras.layers.Dense(16, activation='relu', name = 'layer2'),
    tf.keras.layers.Dense(num_outputs, activation='linear', name = 'layer3')
])

# create the user input and point to the base network
input_user = tf.keras.layers.Input(shape=(num_user_features))
vu = user_NN(input_user)
vu = tf.linalg.l2_normalize(vu, axis=1)

# create the item input and point to the base network
input_item = tf.keras.layers.Input(shape=(num_item_features))
vm = item_NN(input_item)
vm = tf.linalg.l2_normalize(vm, axis=1)

# compute the dot product of the two vectors vu and vm
output = tf.keras.layers.Dot(axes=1)([vu, vm])

# specify the inputs and output of the model
model = tf.keras.Model([input_user, input_item], output)

model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 11)]         0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            [(None, 7)]          0                                            
__________________________________________________________________________________________________
sequential_2 (Sequential)       (None, 16)           1184        input_3[0][0]                    
__________________________________________________________________________________________________
sequential_3 (Sequential)       (None, 16)           1056        input_4[0][0]                    
____________________________________________________________________________________________

In [112]:
tf.random.set_seed(1)
cost_fn = tf.keras.losses.MeanSquaredError()
opt = keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=opt,
              loss=cost_fn)
# model.compile(loss = tf.keras.losses.MeanSquaredError(),
#     optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.002),
#     metrics=[tf.keras.metrics.RootMeanSquaredError()]
#     )

In [113]:
tf.random.set_seed(1)
model.fit([user_train[:, 1:], item_train[:,1:]], y_train, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fe25cb891d0>

In [114]:
model.evaluate([user_test[:, 1:], item_test[:, 1:]], y_test)



0.3732629716396332

<h2> COBA_COBA PREDICT

In [115]:
data = {
    'IdUser': [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400],
    'IdWisata': [154, 209, 113, 149, 210, 85, 87, 207, 152, 153, 95, 103, 115, 118, 100, 179, 105],
    'rating': [3, 4, 2, 3, 4, 5, 4, 5, 4, 5, 4, 4, 5, 5, 5, 5, 5]
}

df_pred = pd.DataFrame(data)
print(df_pred)

    IdUser  IdWisata  rating
0      400       154       3
1      400       209       4
2      400       113       2
3      400       149       3
4      400       210       4
5      400        85       5
6      400        87       4
7      400       207       5
8      400       152       4
9      400       153       5
10     400        95       4
11     400       103       4
12     400       115       5
13     400       118       5
14     400       100       5
15     400       179       5
16     400       105       5


In [116]:
def calculate_user_ratings(ratings_jogja, destination_jogja=destination_jogja):
    # Merge with destination_jogja to get category information
    df = ratings_jogja.merge(pd.DataFrame(destination_jogja), on='IdWisata')

    # Initialize dictionary to store aggregated data
    aggregated_data = {}

    # Iterate through each row and aggregate ratings
    for row in df.itertuples(index=False):
        IdUser = row.IdUser
        category = row.Category
        rating = row.rating

        if IdUser not in aggregated_data:
            aggregated_data[IdUser] = {
                'average_category_Bahari': 0.0,
                'average_category_Budaya': 0.0,
                'average_category_Cagar Alam': 0.0,
                'average_category_Pusat Perbelanjaan': 0.0,
                'average_category_Taman Hiburan': 0.0,
                'number_of_ratings_Bahari': 0,
                'number_of_ratings_Budaya': 0,
                'number_of_ratings_Cagar Alam': 0,
                'number_of_ratings_Pusat Perbelanjaan': 0,
                'number_of_ratings_Taman Hiburan': 0,
                'Average_All_Ratings': 0.0
            }

        aggregated_data[IdUser]['IdUser'] = IdUser
        aggregated_data[IdUser]['average_category_' + category] += rating
        aggregated_data[IdUser]['number_of_ratings_' + category] += 1
        aggregated_data[IdUser]['Average_All_Ratings'] += rating

    # Calculate average ratings and overall average
    for IdUser, data in aggregated_data.items():
        for category in ['Bahari', 'Budaya', 'Cagar Alam', 'Pusat Perbelanjaan', 'Taman Hiburan']:
            count = data['number_of_ratings_' + category]
            if count > 0:
                data['average_category_' + category] /= count

        total_ratings = sum(data['number_of_ratings_' + category] for category in ['Bahari', 'Budaya', 'Cagar Alam', 'Pusat Perbelanjaan', 'Taman Hiburan'])
        data['Average_All_Ratings'] /= total_ratings

    # Convert aggregated data to DataFrame
    pivoted_data = pd.DataFrame.from_dict(aggregated_data, orient='index')

    # Reorder columns
    columns_order = ['IdUser', 'average_category_Bahari', 'average_category_Budaya', 'average_category_Cagar Alam',
                     'average_category_Pusat Perbelanjaan', 'average_category_Taman Hiburan',
                     'number_of_ratings_Bahari', 'number_of_ratings_Budaya', 'number_of_ratings_Cagar Alam',
                     'number_of_ratings_Pusat Perbelanjaan', 'number_of_ratings_Taman Hiburan', 'Average_All_Ratings']

    pivoted_data = pivoted_data[columns_order]

    # Display the resulting DataFrame
    return pivoted_data

In [117]:
user_vec = calculate_user_ratings(df_pred, destination_jogja=destination_jogja)
user_vec

Unnamed: 0,IdUser,average_category_Bahari,average_category_Budaya,average_category_Cagar Alam,average_category_Pusat Perbelanjaan,average_category_Taman Hiburan,number_of_ratings_Bahari,number_of_ratings_Budaya,number_of_ratings_Cagar Alam,number_of_ratings_Pusat Perbelanjaan,number_of_ratings_Taman Hiburan,Average_All_Ratings
400,400,4.333333,4.0,5.0,0.0,0.0,6,2,1,0,0,4.333333


In [118]:
user_vecs = np.tile(user_vec, (len(destination_fix), 1))
user_vecs

array([[400.        ,   4.33333333,   4.        , ...,   0.        ,
          0.        ,   4.33333333],
       [400.        ,   4.33333333,   4.        , ...,   0.        ,
          0.        ,   4.33333333],
       [400.        ,   4.33333333,   4.        , ...,   0.        ,
          0.        ,   4.33333333],
       ...,
       [400.        ,   4.33333333,   4.        , ...,   0.        ,
          0.        ,   4.33333333],
       [400.        ,   4.33333333,   4.        , ...,   0.        ,
          0.        ,   4.33333333],
       [400.        ,   4.33333333,   4.        , ...,   0.        ,
          0.        ,   4.33333333]])

In [119]:
user_vec = calculate_user_ratings(df_pred, destination_jogja)
user_vec

Unnamed: 0,IdUser,average_category_Bahari,average_category_Budaya,average_category_Cagar Alam,average_category_Pusat Perbelanjaan,average_category_Taman Hiburan,number_of_ratings_Bahari,number_of_ratings_Budaya,number_of_ratings_Cagar Alam,number_of_ratings_Pusat Perbelanjaan,number_of_ratings_Taman Hiburan,Average_All_Ratings
400,400,4.333333,4.0,5.0,0.0,0.0,6,2,1,0,0,4.333333


In [120]:
item_vecs = destination_fix.drop('Place_Name', axis=1)
item_vecs

Unnamed: 0,IdWisata,Price,Rating,Category_Bahari,Category_Budaya,Category_Cagar Alam,Category_Pusat Perbelanjaan,Category_Taman Hiburan
0,1,6000,4.5,0,0,0,0,1
1,2,15000,4.6,0,1,0,0,0
2,3,20000,4.2,0,0,0,0,1
3,4,3000,4.6,0,1,0,0,0
4,5,50000,4.4,0,1,0,0,0
...,...,...,...,...,...,...,...,...
121,122,8000,4.4,0,0,1,0,0
122,123,15000,4.4,0,0,0,0,1
123,124,10000,4.6,0,0,1,0,0
124,125,10000,4.5,1,0,0,0,0


In [121]:
# save item_vecs to pickle

import pickle

with open('item_vecs.pkl', 'wb') as f:
    pickle.dump(item_vecs, f)

In [122]:
# scale our user and item vectors
suser_vecs = scalerUser.transform(user_vecs)
sitem_vecs = scalerItem.transform(item_vecs)



# print_pred_movies(sorted_ypu, sorted_items, movie_dict, maxcount = 10)

  "X does not have valid feature names, but"


In [123]:
# save suser_vecs and sitem_vecs to pickle

import pickle

with open('suser_vecs.pickle', 'wb') as f:
    pickle.dump(suser_vecs, f)

with open('sitem_vecs.pickle', 'wb') as f:

    pickle.dump(sitem_vecs, f)

# load suser_vecs and sitem_vecs from pickle

with open('suser_vecs.pickle', 'rb') as f:
    suser_vecs = pickle.load(f)

with open('sitem_vecs.pickle', 'rb') as f:

    sitem_vecs = pickle.load(f)

In [124]:
# make a prediction
y_p = model.predict([suser_vecs[:, 1:], sitem_vecs[:, 1:]])
# # unscale y prediction 
y_pu = scalerTarget.inverse_transform(y_p)
y_pu

# sort the results, highest prediction first
sorted_index = np.argsort(-y_pu,axis=0).reshape(-1).tolist()  #negate to get largest rating first
sorted_ypu   = y_pu[sorted_index]
sorted_index = [x+0 for x in sorted_index]
sorted_items = item_vecs.loc[sorted_index]  #using unscaled vectors for display
sorted_items
# print_pred_movies(sorted_ypu, sorted_items, movie_dict, maxcount = 10)

Unnamed: 0,IdWisata,Price,Rating,Category_Bahari,Category_Budaya,Category_Cagar Alam,Category_Pusat Perbelanjaan,Category_Taman Hiburan
35,36,250000,4.5,0,1,0,0,0
77,78,220000,4.5,0,1,0,0,0
59,60,500000,4.6,0,0,1,0,0
8,9,60000,4.5,0,0,1,0,0
93,94,40000,4.5,0,0,1,0,0
...,...,...,...,...,...,...,...,...
80,81,2000,4.2,0,0,0,0,1
18,19,0,4.7,0,0,0,0,1
7,8,0,4.7,0,0,0,0,1
10,11,0,5.0,0,0,0,0,1


In [125]:
def print_prediction(sorted_items, sorted_ypu, num_recommend, destination_jogja=destination_jogja):
    res = pd.merge(sorted_items[['IdWisata']], destination_jogja[["IdWisata", "Place_Name", "Category", "Rating"]], on="IdWisata")[:num_recommend]
    res["Predict_Rating"] = sorted_ypu[:num_recommend]
    return res

In [126]:
print_prediction(sorted_items, sorted_ypu, 40, destination_jogja=destination_jogja)

Unnamed: 0,IdWisata,Place_Name,Category,Rating,Predict_Rating
0,36,Bukit Panguk Kediwung,Budaya,4.5,4.268539
1,78,Kampung Wisata Dipowinatan,Budaya,4.5,4.240309
2,60,Goa Jomblang,Cagar Alam,4.6,4.189852
3,9,Gembira Loka Zoo,Cagar Alam,4.5,4.044326
4,94,Goa Pindul,Cagar Alam,4.5,4.034315
5,53,Wisata Alam Kalibiru,Cagar Alam,4.4,4.026642
6,108,Air Terjun Sri Gethuk,Cagar Alam,4.4,4.01956
7,122,Wisata Kaliurang,Cagar Alam,4.4,4.012348
8,65,Goa Cerme,Cagar Alam,4.4,4.007025
9,80,Pintoe Langit Dahromo,Cagar Alam,4.4,4.006215


# Convert Model To TFLITE

In [127]:
recommender_Model = "saved_model/recommender_model"

tf.saved_model.save(model, recommender_Model)



INFO:tensorflow:Assets written to: saved_model/recommender_model/assets


INFO:tensorflow:Assets written to: saved_model/recommender_model/assets


In [128]:
%%bash -s "$recommender_Model"
saved_model_cli show --dir $1 --tag_set serve --signature_def serving_default

The given SavedModel SignatureDef contains the following input(s):
  inputs['input_3'] tensor_info:
      dtype: DT_FLOAT
      shape: (-1, 11)
      name: serving_default_input_3:0
  inputs['input_4'] tensor_info:
      dtype: DT_FLOAT
      shape: (-1, 7)
      name: serving_default_input_4:0
The given SavedModel SignatureDef contains the following output(s):
  outputs['dot_1'] tensor_info:
      dtype: DT_FLOAT
      shape: (-1, 1)
      name: StatefulPartitionedCall:0
Method name is: tensorflow/serving/predict


In [129]:
loaded = tf.saved_model.load(recommender_Model)

In [130]:
print(list(loaded.signatures.keys()))  # ["serving_default"]
infer = loaded.signatures["serving_default"]
print(infer.structured_outputs)
print(infer.structured_input_signature)

['serving_default']
{'dot_1': TensorSpec(shape=(None, 1), dtype=tf.float32, name='dot_1')}
((), {'input_3': TensorSpec(shape=(None, 11), dtype=tf.float32, name='input_3'), 'input_4': TensorSpec(shape=(None, 7), dtype=tf.float32, name='input_4')})


In [131]:
# convert using tf.lite.TFLiteConverter

converter = tf.lite.TFLiteConverter.from_saved_model(recommender_Model)



In [132]:
# post training quantization

converter.optimizations = [tf.lite.Optimize.DEFAULT]

In [133]:
# post training quantization

def representative_dataset_gen():
    for i in range(100):
        yield([suser_vecs[:, 1:], sitem_vecs[:, 1:]])
        

In [134]:
converter.representative_dataset = representative_dataset_gen

In [135]:
tflite_model = converter.convert()
tflite_model_file = 'saved_model/recommender_model.tflite'

with open(tflite_model_file, "wb") as f:
    f.write(tflite_model)

2023-06-13 14:35:25.751288: I tensorflow/core/grappler/devices.cc:55] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2023-06-13 14:35:25.751696: I tensorflow/core/grappler/clusters/single_machine.cc:356] Starting new session
2023-06-13 14:35:25.764820: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:797] Optimization results for grappler item: graph_to_optimize
2023-06-13 14:35:25.764885: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:799]   function_optimizer: Graph size after: 82 nodes (66), 117 edges (101), time = 6.436ms.
2023-06-13 14:35:25.764907: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:799]   function_optimizer: function_optimizer did nothing. time = 0.132ms.
2023-06-13 14:35:25.947870: I tensorflow/core/grappler/devices.cc:55] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2023-06-13 14:35:25.948155: I tensorflow/core/grappler/clusters/single_machine.cc:356] Starting new session
2023-06-13 14:35:

ValueError: Cannot set tensor: Got value of type NOTYPE but expected type FLOAT32 for input 0, name: input_3 