In [4]:
import pandas as pd
import numpy as np
from zipfile import ZipFile
from pathlib import Path
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, MinMaxScaler

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [5]:
rating = pd.read_csv('./data/tourism_rating.csv')
destination = pd.read_csv('./data/tourism_with_id.csv')
user = pd.read_csv('./data/user.csv')

<h2> PRE-PROCESSING

<h4> DATA DESTINATION

In [6]:
destination.head()

Unnamed: 0,IdWisata,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Unnamed: 11,Unnamed: 12
0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153,,1
1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125,,2
2,3,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Taman Hiburan,Jakarta,270000,4.6,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-6.125312,106.833538,,3
3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Taman Hiburan,Jakarta,10000,4.5,,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-6.302446,106.895156,,4
4,5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,Jakarta,94000,4.5,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-6.12419,106.839134,,5


In [7]:
destination = destination.drop(['Unnamed: 11','Unnamed: 12', 'Time_Minutes'], axis=1)
destination.head(2)

Unnamed: 0,IdWisata,Place_Name,Description,Category,City,Price,Rating,Coordinate,Lat,Long
0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153
1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125


In [8]:
destination_jogja = destination[destination['City'] == 'Yogyakarta']
destination_jogja.head()

Unnamed: 0,IdWisata,Place_Name,Description,Category,City,Price,Rating,Coordinate,Lat,Long
84,85,Taman Pintar Yogyakarta,Taman Pintar Yogyakarta (bahasa Jawa: Hanacara...,Taman Hiburan,Yogyakarta,6000,4.5,"{'lat': -7.800671500000001, 'lng': 110.3676551}",-7.800671,110.367655
85,86,Keraton Yogyakarta,Keraton Ngayogyakarta Hadiningrat atau Keraton...,Budaya,Yogyakarta,15000,4.6,"{'lat': -7.8052845, 'lng': 110.3642031}",-7.805284,110.364203
86,87,Sindu Kusuma Edupark (SKE),Sindu Kusuma Edupark (SKE) merupakan sebuah de...,Taman Hiburan,Yogyakarta,20000,4.2,"{'lat': -7.767297300000001, 'lng': 110.3542486}",-7.767297,110.354249
87,88,Museum Benteng Vredeburg Yogyakarta,Museum Benteng Vredeburg (bahasa Jawa: ꦩꦸꦱꦶꦪꦸꦩ...,Budaya,Yogyakarta,3000,4.6,"{'lat': -7.800201599999999, 'lng': 110.3663044}",-7.800202,110.366304
88,89,De Mata Museum Jogja,Museum De Mata merupakan salah satu museum yan...,Budaya,Yogyakarta,50000,4.4,"{'lat': -7.816315599999999, 'lng': 110.3871442}",-7.816316,110.387144


In [6]:
destination_jogja.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 126 entries, 84 to 209
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   IdWisata     126 non-null    int64  
 1   Place_Name   126 non-null    object 
 2   Description  126 non-null    object 
 3   Category     126 non-null    object 
 4   City         126 non-null    object 
 5   Price        126 non-null    int64  
 6   Rating       126 non-null    float64
 7   Coordinate   126 non-null    object 
 8   Lat          126 non-null    float64
 9   Long         126 non-null    float64
dtypes: float64(3), int64(2), object(5)
memory usage: 10.8+ KB


<h4> DATA RATING

In [9]:
rating.head(10)

Unnamed: 0,IdUser,IdWisata,rating
0,1,179,3
1,1,344,2
2,1,5,5
3,1,373,3
4,1,101,4
5,1,312,2
6,1,258,5
7,1,20,4
8,1,154,2
9,1,393,5


In [10]:
id_jogja = destination_jogja['IdWisata']
ratings_jogja = rating[rating["IdWisata"].isin(id_jogja)]
ratings_jogja = ratings_jogja.reset_index(drop=True)
ratings_jogja.head()

Unnamed: 0,IdUser,IdWisata,rating
0,1,179,3
1,1,101,4
2,1,154,2
3,1,103,3
4,1,208,5


In [11]:
ratings_jogja.shape

(2871, 3)

<h4> DATA USER

In [12]:
user.head()

Unnamed: 0,IdUser,Location,Age
0,1,"Semarang, Jawa Tengah",20
1,2,"Bekasi, Jawa Barat",21
2,3,"Cirebon, Jawa Barat",23
3,4,"Bekasi, Jawa Barat",21
4,5,"Lampung, Sumatera Selatan",20


In [13]:
user.shape

(300, 3)

In [14]:
id_user = ratings_jogja['IdUser']
user_jogja = user[user['IdUser'].isin(id_user)]
user_jogja = user_jogja.reset_index(drop=True)
user_jogja.head()

Unnamed: 0,IdUser,Location,Age
0,1,"Semarang, Jawa Tengah",20
1,2,"Bekasi, Jawa Barat",21
2,3,"Cirebon, Jawa Barat",23
3,4,"Bekasi, Jawa Barat",21
4,5,"Lampung, Sumatera Selatan",20


In [15]:
user_jogja.shape

(300, 3)

<h1> PREPARE DATA FOR MODELLING

In [16]:
# Merge with destination_jogja to get category information
df = ratings_jogja.merge(pd.DataFrame(destination_jogja), on='IdWisata')

# Initialize dictionary to store aggregated data
aggregated_data = {}

# Iterate through each row and aggregate ratings
for row in df.itertuples(index=False):
    IdUser = row.IdUser
    category = row.Category
    rating = row.rating
    
    if IdUser not in aggregated_data:
        aggregated_data[IdUser] = {
            'average_category_Bahari': 0.0,
            'average_category_Budaya': 0.0,
            'average_category_Cagar Alam': 0.0,
            'average_category_Pusat Perbelanjaan': 0.0,
            'average_category_Taman Hiburan': 0.0,
            'number_of_ratings_Bahari': 0,
            'number_of_ratings_Budaya': 0,
            'number_of_ratings_Cagar Alam': 0,
            'number_of_ratings_Pusat Perbelanjaan': 0,
            'number_of_ratings_Taman Hiburan': 0,
            'Average_All_Ratings': 0.0
        }
    
    aggregated_data[IdUser]['IdUser'] = IdUser
    aggregated_data[IdUser]['average_category_' + category] += rating
    aggregated_data[IdUser]['number_of_ratings_' + category] += 1
    aggregated_data[IdUser]['Average_All_Ratings'] += rating

# Calculate average ratings and overall average
for IdUser, data in aggregated_data.items():
    for category in ['Bahari', 'Budaya', 'Cagar Alam', 'Pusat Perbelanjaan', 'Taman Hiburan']:
        count = data['number_of_ratings_' + category]
        if count > 0:
            data['average_category_' + category] /= count

    total_ratings = sum(data['number_of_ratings_' + category] for category in ['Bahari', 'Budaya', 'Cagar Alam', 'Pusat Perbelanjaan', 'Taman Hiburan'])
    data['Average_All_Ratings'] /= total_ratings

# Convert aggregated data to DataFrame
pivoted_data = pd.DataFrame.from_dict(aggregated_data, orient='index')

# Reorder columns
columns_order = ['IdUser', 'average_category_Bahari', 'average_category_Budaya', 'average_category_Cagar Alam',
                 'average_category_Pusat Perbelanjaan', 'average_category_Taman Hiburan',
                 'number_of_ratings_Bahari', 'number_of_ratings_Budaya', 'number_of_ratings_Cagar Alam',
                 'number_of_ratings_Pusat Perbelanjaan', 'number_of_ratings_Taman Hiburan', 'Average_All_Ratings']

pivoted_data = pivoted_data[columns_order]

# Display the resulting DataFrame
pivoted_data

Unnamed: 0,IdUser,average_category_Bahari,average_category_Budaya,average_category_Cagar Alam,average_category_Pusat Perbelanjaan,average_category_Taman Hiburan,number_of_ratings_Bahari,number_of_ratings_Budaya,number_of_ratings_Cagar Alam,number_of_ratings_Pusat Perbelanjaan,number_of_ratings_Taman Hiburan,Average_All_Ratings
1,1,2.000000,3.333333,5.000000,0.0,3.00,1,3,1,0,1,3.333333
22,22,2.000000,3.000000,3.333333,3.0,5.00,1,4,3,1,1,3.200000
40,40,3.833333,2.666667,2.666667,4.0,3.20,6,3,3,1,5,3.277778
49,49,3.000000,3.666667,5.000000,0.0,2.40,2,3,1,0,5,3.090909
74,74,2.000000,3.750000,3.000000,0.0,4.25,1,4,2,0,4,3.636364
...,...,...,...,...,...,...,...,...,...,...,...,...
202,202,3.000000,4.000000,5.000000,4.0,0.00,1,1,1,1,0,4.000000
48,48,5.000000,0.000000,5.000000,0.0,1.00,1,0,1,0,1,3.666667
69,69,3.000000,1.000000,0.000000,0.0,3.00,1,2,0,0,1,2.000000
120,120,3.500000,0.000000,5.000000,0.0,0.00,2,0,1,0,0,4.000000


In [17]:
pivoted_data

Unnamed: 0,IdUser,average_category_Bahari,average_category_Budaya,average_category_Cagar Alam,average_category_Pusat Perbelanjaan,average_category_Taman Hiburan,number_of_ratings_Bahari,number_of_ratings_Budaya,number_of_ratings_Cagar Alam,number_of_ratings_Pusat Perbelanjaan,number_of_ratings_Taman Hiburan,Average_All_Ratings
1,1,2.000000,3.333333,5.000000,0.0,3.00,1,3,1,0,1,3.333333
22,22,2.000000,3.000000,3.333333,3.0,5.00,1,4,3,1,1,3.200000
40,40,3.833333,2.666667,2.666667,4.0,3.20,6,3,3,1,5,3.277778
49,49,3.000000,3.666667,5.000000,0.0,2.40,2,3,1,0,5,3.090909
74,74,2.000000,3.750000,3.000000,0.0,4.25,1,4,2,0,4,3.636364
...,...,...,...,...,...,...,...,...,...,...,...,...
202,202,3.000000,4.000000,5.000000,4.0,0.00,1,1,1,1,0,4.000000
48,48,5.000000,0.000000,5.000000,0.0,1.00,1,0,1,0,1,3.666667
69,69,3.000000,1.000000,0.000000,0.0,3.00,1,2,0,0,1,2.000000
120,120,3.500000,0.000000,5.000000,0.0,0.00,2,0,1,0,0,4.000000


In [18]:
destination_fix = destination_jogja[["IdWisata", "Place_Name", "Category", "Price", "Rating"]]

# Perform one-hot encoding on the "Category" column
category_encoded = pd.get_dummies(destination_fix["Category"], prefix="Category")

# Concatenate the one-hot encoded categories with the selected columns
destination_fix = pd.concat([destination_fix[["IdWisata", "Place_Name", "Price", "Rating"]], category_encoded], axis=1)
destination_fix

Unnamed: 0,IdWisata,Place_Name,Price,Rating,Category_Bahari,Category_Budaya,Category_Cagar Alam,Category_Pusat Perbelanjaan,Category_Taman Hiburan
84,85,Taman Pintar Yogyakarta,6000,4.5,0,0,0,0,1
85,86,Keraton Yogyakarta,15000,4.6,0,1,0,0,0
86,87,Sindu Kusuma Edupark (SKE),20000,4.2,0,0,0,0,1
87,88,Museum Benteng Vredeburg Yogyakarta,3000,4.6,0,1,0,0,0
88,89,De Mata Museum Jogja,50000,4.4,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...
205,206,Wisata Kaliurang,8000,4.4,0,0,1,0,0
206,207,Heha Sky View,15000,4.4,0,0,0,0,1
207,208,Taman Sungai Mudal,10000,4.6,0,0,1,0,0
208,209,Pantai Sanglen,10000,4.5,1,0,0,0,0


In [19]:
merged_df = pd.merge(pivoted_data, ratings_jogja, on='IdUser', how='left')

# Merge the destination_fix dataset with the merged pivoted_data and df2 datasets based on 'IdWisata'
final_df = pd.merge(merged_df, destination_fix, on='IdWisata', how='left')
final_df

Unnamed: 0,IdUser,average_category_Bahari,average_category_Budaya,average_category_Cagar Alam,average_category_Pusat Perbelanjaan,average_category_Taman Hiburan,number_of_ratings_Bahari,number_of_ratings_Budaya,number_of_ratings_Cagar Alam,number_of_ratings_Pusat Perbelanjaan,...,IdWisata,rating,Place_Name,Price,Rating,Category_Bahari,Category_Budaya,Category_Cagar Alam,Category_Pusat Perbelanjaan,Category_Taman Hiburan
0,1,2.0,3.333333,5.0,0.0,3.0,1,3,1,0,...,179,3,Candi Ratu Boko,75000,4.6,0,1,0,0,0
1,1,2.0,3.333333,5.0,0.0,3.0,1,3,1,0,...,101,4,Kampung Wisata Sosro Menduran,0,4.0,0,1,0,0,0
2,1,2.0,3.333333,5.0,0.0,3.0,1,3,1,0,...,154,2,Pantai Ngrawe (Mesra),10000,4.5,1,0,0,0,0
3,1,2.0,3.333333,5.0,0.0,3.0,1,3,1,0,...,103,3,Tugu Pal Putih Jogja,0,4.7,0,0,0,0,1
4,1,2.0,3.333333,5.0,0.0,3.0,1,3,1,0,...,208,5,Taman Sungai Mudal,10000,4.6,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2866,120,3.5,0.000000,5.0,0.0,0.0,2,0,1,0,...,155,5,Pantai Ngobaran,5000,4.6,1,0,0,0,0
2867,120,3.5,0.000000,5.0,0.0,0.0,2,0,1,0,...,177,2,Pantai Parangtritis,10000,4.5,1,0,0,0,0
2868,73,1.0,5.000000,0.0,0.0,3.0,1,1,0,0,...,133,3,Puncak Kebun Buah Mangunan,5000,4.6,0,0,0,0,1
2869,73,1.0,5.000000,0.0,0.0,3.0,1,1,0,0,...,125,5,Alun-alun Utara Keraton Yogyakarta,0,4.6,0,1,0,0,0


In [20]:
full_df = final_df.sample(frac=1).reset_index(drop=True)
full_df

Unnamed: 0,IdUser,average_category_Bahari,average_category_Budaya,average_category_Cagar Alam,average_category_Pusat Perbelanjaan,average_category_Taman Hiburan,number_of_ratings_Bahari,number_of_ratings_Budaya,number_of_ratings_Cagar Alam,number_of_ratings_Pusat Perbelanjaan,...,IdWisata,rating,Place_Name,Price,Rating,Category_Bahari,Category_Budaya,Category_Cagar Alam,Category_Pusat Perbelanjaan,Category_Taman Hiburan
0,207,0.000000,3.250000,2.000000,0.0,3.333333,0,4,1,0,...,125,5,Alun-alun Utara Keraton Yogyakarta,0,4.6,0,1,0,0,0
1,44,4.333333,3.000000,4.000000,3.0,3.750000,3,2,2,1,...,210,3,Pantai Congot,3000,4.3,1,0,0,0,0
2,134,4.000000,2.777778,2.666667,0.0,4.500000,2,9,3,0,...,179,2,Candi Ratu Boko,75000,4.6,0,1,0,0,0
3,265,3.500000,2.500000,2.800000,0.0,5.000000,4,4,5,0,...,144,2,Goa Jomblang,500000,4.6,0,0,1,0,0
4,155,2.500000,1.000000,2.000000,0.0,2.600000,2,1,1,0,...,209,4,Pantai Sanglen,10000,4.5,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2866,272,5.000000,3.500000,3.333333,0.0,4.000000,2,2,3,0,...,164,3,Pintoe Langit Dahromo,2500,4.4,0,0,1,0,0
2867,167,3.000000,3.333333,0.000000,0.0,3.500000,2,6,0,0,...,113,4,Gedung Agung Yogyakarta,0,4.6,0,1,0,0,0
2868,153,3.000000,3.000000,5.000000,0.0,3.666667,1,2,1,0,...,156,3,Pantai Pulang Sawal,10000,4.5,1,0,0,0,0
2869,132,3.666667,1.000000,0.000000,1.0,3.000000,3,1,0,1,...,118,1,Museum Sonobudoyo Unit I,5000,4.6,0,1,0,0,0


In [21]:
user_full = full_df[['IdUser', 'average_category_Bahari',
       'average_category_Budaya', 'average_category_Cagar Alam',
       'average_category_Pusat Perbelanjaan', 'average_category_Taman Hiburan',
       'number_of_ratings_Bahari', 'number_of_ratings_Budaya',
       'number_of_ratings_Cagar Alam', 'number_of_ratings_Pusat Perbelanjaan',
       'number_of_ratings_Taman Hiburan', 'Average_All_Ratings']]
user_full

Unnamed: 0,IdUser,average_category_Bahari,average_category_Budaya,average_category_Cagar Alam,average_category_Pusat Perbelanjaan,average_category_Taman Hiburan,number_of_ratings_Bahari,number_of_ratings_Budaya,number_of_ratings_Cagar Alam,number_of_ratings_Pusat Perbelanjaan,number_of_ratings_Taman Hiburan,Average_All_Ratings
0,207,0.000000,3.250000,2.000000,0.0,3.333333,0,4,1,0,3,3.125000
1,44,4.333333,3.000000,4.000000,3.0,3.750000,3,2,2,1,4,3.750000
2,134,4.000000,2.777778,2.666667,0.0,4.500000,2,9,3,0,2,3.125000
3,265,3.500000,2.500000,2.800000,0.0,5.000000,4,4,5,0,1,3.071429
4,155,2.500000,1.000000,2.000000,0.0,2.600000,2,1,1,0,5,2.333333
...,...,...,...,...,...,...,...,...,...,...,...,...
2866,272,5.000000,3.500000,3.333333,0.0,4.000000,2,2,3,0,4,3.909091
2867,167,3.000000,3.333333,0.000000,0.0,3.500000,2,6,0,0,2,3.300000
2868,153,3.000000,3.000000,5.000000,0.0,3.666667,1,2,1,0,3,3.571429
2869,132,3.666667,1.000000,0.000000,1.0,3.000000,3,1,0,1,1,2.666667


In [22]:
item_full = full_df[['IdWisata', 'Price', 'Rating', 'Category_Bahari', 'Category_Budaya', 'Category_Cagar Alam', 
                      'Category_Pusat Perbelanjaan', 'Category_Taman Hiburan']]
item_full

Unnamed: 0,IdWisata,Price,Rating,Category_Bahari,Category_Budaya,Category_Cagar Alam,Category_Pusat Perbelanjaan,Category_Taman Hiburan
0,125,0,4.6,0,1,0,0,0
1,210,3000,4.3,1,0,0,0,0
2,179,75000,4.6,0,1,0,0,0
3,144,500000,4.6,0,0,1,0,0
4,209,10000,4.5,1,0,0,0,0
...,...,...,...,...,...,...,...,...
2866,164,2500,4.4,0,0,1,0,0
2867,113,0,4.6,0,1,0,0,0
2868,156,10000,4.5,1,0,0,0,0
2869,118,5000,4.6,0,1,0,0,0


In [23]:
y_full = full_df[['rating']]
y_full

Unnamed: 0,rating
0,5
1,3
2,2
3,2
4,4
...,...
2866,3
2867,4
2868,3
2869,1


In [24]:
item_full_unscaled = item_full
user_full_unscaled = user_full
y_full_unscaled    = y_full

scalerItem = StandardScaler()
scalerItem.fit(item_full)
item_full = scalerItem.transform(item_full)

scalerUser = StandardScaler()
scalerUser.fit(user_full)
user_full = scalerUser.transform(user_full)

scalerTarget = MinMaxScaler((-1, 1))
scalerTarget.fit(y_full.to_numpy().reshape(-1, 1))
y_full = scalerTarget.transform(y_full.to_numpy().reshape(-1, 1))
#ynorm_test = scalerTarget.transform(y_test.reshape(-1, 1))

print(np.allclose(item_full_unscaled, scalerItem.inverse_transform(item_full)))
print(np.allclose(user_full_unscaled, scalerUser.inverse_transform(user_full)))

True
True


In [25]:
total_rows = len(y_full)
train_rows = int(0.85 * total_rows)
test_rows = total_rows - train_rows

In [26]:
user_train = user_full[:train_rows]
user_test = user_full[train_rows:]

item_train = item_full[:train_rows]
item_test = item_full[train_rows:]

y_train = y_full[:train_rows]
y_test = y_full[train_rows:]
print(f"movie/item training data shape: {item_train.shape}")
print(f"movie/item test data shape: {item_test.shape}")

print(f"user training data shape: {user_train.shape}")
print(f"user test data shape: {user_test.shape}")

num_user_features = user_train.shape[1] - 1
num_item_features = item_train.shape[1] - 1

print(f"user test data shape: {num_user_features}")
print(f"item test data shape: {num_item_features}")


movie/item training data shape: (2440, 8)
movie/item test data shape: (431, 8)
user training data shape: (2440, 12)
user test data shape: (431, 12)
user test data shape: 11
item test data shape: 7


<h3>MODELLING

In [32]:
num_outputs = 16
tf.random.set_seed(1)
user_NN = tf.keras.models.Sequential([    
    tf.keras.layers.Dense(32, activation='relu', name = 'layer1'),
    tf.keras.layers.Dense(16, activation='relu', name = 'layer2'),
    tf.keras.layers.Dense(num_outputs, activation='linear', name = 'layer3')
])

item_NN = tf.keras.models.Sequential([   
    tf.keras.layers.Dense(32, activation='relu', name = 'layer1'),
    tf.keras.layers.Dense(16, activation='relu', name = 'layer2'),
    tf.keras.layers.Dense(num_outputs, activation='linear', name = 'layer3')
])

# create the user input and point to the base network
input_user = tf.keras.layers.Input(shape=(num_user_features))
vu = user_NN(input_user)
vu = tf.linalg.l2_normalize(vu, axis=1)

# create the item input and point to the base network
input_item = tf.keras.layers.Input(shape=(num_item_features))
vm = item_NN(input_item)
vm = tf.linalg.l2_normalize(vm, axis=1)

# compute the dot product of the two vectors vu and vm
output = tf.keras.layers.Dot(axes=1)([vu, vm])

# specify the inputs and output of the model
model = tf.keras.Model([input_user, input_item], output)

model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, 11)]         0           []                               
                                                                                                  
 input_4 (InputLayer)           [(None, 7)]          0           []                               
                                                                                                  
 sequential_2 (Sequential)      (None, 16)           1184        ['input_3[0][0]']                
                                                                                                  
 sequential_3 (Sequential)      (None, 16)           1056        ['input_4[0][0]']                
                                                                                            

In [33]:
tf.random.set_seed(1)
cost_fn = tf.keras.losses.MeanSquaredError()
opt = keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=opt,
              loss=cost_fn)
# model.compile(loss = tf.keras.losses.MeanSquaredError(),
#     optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.002),
#     metrics=[tf.keras.metrics.RootMeanSquaredError()]
#     )

In [34]:
tf.random.set_seed(1)
model.fit([user_train[:, 1:], item_train[:,1:]], y_train, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1d134852130>

In [35]:
model.evaluate([user_test[:, 1:], item_test[:, 1:]], y_test)



0.3850158452987671

<h2> COBA_COBA PREDICT

In [36]:
data = {
    'IdUser': [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400],
    'IdWisata': [154, 209, 113, 149, 210, 85, 87, 207, 152, 153, 95, 103, 115, 118, 100, 179, 105],
    'rating': [3, 4, 2, 3, 4, 5, 4, 5, 4, 5, 4, 4, 5, 5, 5, 5, 5]
}

df_pred = pd.DataFrame(data)
print(df_pred)

    IdUser  IdWisata  rating
0       400       154              3
1       400       209              4
2       400       113              2
3       400       149              3
4       400       210              4
5       400        85              5
6       400        87              4
7       400       207              5
8       400       152              4
9       400       153              5
10      400        95              4
11      400       103              4
12      400       115              5
13      400       118              5
14      400       100              5
15      400       179              5
16      400       105              5


In [37]:
def calculate_user_ratings(ratings_jogja, destination_jogja=destination_jogja):
    # Merge with destination_jogja to get category information
    df = ratings_jogja.merge(pd.DataFrame(destination_jogja), on='IdWisata')

    # Initialize dictionary to store aggregated data
    aggregated_data = {}

    # Iterate through each row and aggregate ratings
    for row in df.itertuples(index=False):
        IdUser = row.IdUser
        category = row.Category
        rating = row.rating

        if IdUser not in aggregated_data:
            aggregated_data[IdUser] = {
                'average_category_Bahari': 0.0,
                'average_category_Budaya': 0.0,
                'average_category_Cagar Alam': 0.0,
                'average_category_Pusat Perbelanjaan': 0.0,
                'average_category_Taman Hiburan': 0.0,
                'number_of_ratings_Bahari': 0,
                'number_of_ratings_Budaya': 0,
                'number_of_ratings_Cagar Alam': 0,
                'number_of_ratings_Pusat Perbelanjaan': 0,
                'number_of_ratings_Taman Hiburan': 0,
                'Average_All_Ratings': 0.0
            }

        aggregated_data[IdUser]['IdUser'] = IdUser
        aggregated_data[IdUser]['average_category_' + category] += rating
        aggregated_data[IdUser]['number_of_ratings_' + category] += 1
        aggregated_data[IdUser]['Average_All_Ratings'] += rating

    # Calculate average ratings and overall average
    for IdUser, data in aggregated_data.items():
        for category in ['Bahari', 'Budaya', 'Cagar Alam', 'Pusat Perbelanjaan', 'Taman Hiburan']:
            count = data['number_of_ratings_' + category]
            if count > 0:
                data['average_category_' + category] /= count

        total_ratings = sum(data['number_of_ratings_' + category] for category in ['Bahari', 'Budaya', 'Cagar Alam', 'Pusat Perbelanjaan', 'Taman Hiburan'])
        data['Average_All_Ratings'] /= total_ratings

    # Convert aggregated data to DataFrame
    pivoted_data = pd.DataFrame.from_dict(aggregated_data, orient='index')

    # Reorder columns
    columns_order = ['IdUser', 'average_category_Bahari', 'average_category_Budaya', 'average_category_Cagar Alam',
                     'average_category_Pusat Perbelanjaan', 'average_category_Taman Hiburan',
                     'number_of_ratings_Bahari', 'number_of_ratings_Budaya', 'number_of_ratings_Cagar Alam',
                     'number_of_ratings_Pusat Perbelanjaan', 'number_of_ratings_Taman Hiburan', 'Average_All_Ratings']

    pivoted_data = pivoted_data[columns_order]

    # Display the resulting DataFrame
    return pivoted_data

In [38]:
user_vec = calculate_user_ratings(df_pred, destination_jogja=destination_jogja)
user_vec

Unnamed: 0,IdUser,average_category_Bahari,average_category_Budaya,average_category_Cagar Alam,average_category_Pusat Perbelanjaan,average_category_Taman Hiburan,number_of_ratings_Bahari,number_of_ratings_Budaya,number_of_ratings_Cagar Alam,number_of_ratings_Pusat Perbelanjaan,number_of_ratings_Taman Hiburan,Average_All_Ratings
400,400,4.0,4.5,3.0,0.0,4.4,5,6,1,0,5,4.235294


In [39]:
user_vecs = np.tile(user_vec, (len(destination_fix), 1))
user_vecs

array([[400.        ,   4.        ,   4.5       , ...,   0.        ,
          5.        ,   4.23529412],
       [400.        ,   4.        ,   4.5       , ...,   0.        ,
          5.        ,   4.23529412],
       [400.        ,   4.        ,   4.5       , ...,   0.        ,
          5.        ,   4.23529412],
       ...,
       [400.        ,   4.        ,   4.5       , ...,   0.        ,
          5.        ,   4.23529412],
       [400.        ,   4.        ,   4.5       , ...,   0.        ,
          5.        ,   4.23529412],
       [400.        ,   4.        ,   4.5       , ...,   0.        ,
          5.        ,   4.23529412]])

In [40]:
user_vec = calculate_user_ratings(df_pred, destination_jogja)
user_vec

Unnamed: 0,IdUser,average_category_Bahari,average_category_Budaya,average_category_Cagar Alam,average_category_Pusat Perbelanjaan,average_category_Taman Hiburan,number_of_ratings_Bahari,number_of_ratings_Budaya,number_of_ratings_Cagar Alam,number_of_ratings_Pusat Perbelanjaan,number_of_ratings_Taman Hiburan,Average_All_Ratings
400,400,4.0,4.5,3.0,0.0,4.4,5,6,1,0,5,4.235294


In [41]:
item_vecs = destination_fix.drop('Place_Name', axis=1)
item_vecs

Unnamed: 0,IdWisata,Price,Rating,Category_Bahari,Category_Budaya,Category_Cagar Alam,Category_Pusat Perbelanjaan,Category_Taman Hiburan
84,85,6000,4.5,0,0,0,0,1
85,86,15000,4.6,0,1,0,0,0
86,87,20000,4.2,0,0,0,0,1
87,88,3000,4.6,0,1,0,0,0
88,89,50000,4.4,0,1,0,0,0
...,...,...,...,...,...,...,...,...
205,206,8000,4.4,0,0,1,0,0
206,207,15000,4.4,0,0,0,0,1
207,208,10000,4.6,0,0,1,0,0
208,209,10000,4.5,1,0,0,0,0


In [42]:
# scale our user and item vectors
suser_vecs = scalerUser.transform(user_vecs)
sitem_vecs = scalerItem.transform(item_vecs)

# make a prediction
y_p = model.predict([suser_vecs[:, 1:], sitem_vecs[:, 1:]])
# # unscale y prediction 
y_pu = scalerTarget.inverse_transform(y_p)
y_pu

# sort the results, highest prediction first
sorted_index = np.argsort(-y_pu,axis=0).reshape(-1).tolist()  #negate to get largest rating first
sorted_ypu   = y_pu[sorted_index]
sorted_index = [x+84 for x in sorted_index]
sorted_items = item_vecs.loc[sorted_index]  #using unscaled vectors for display
sorted_items
# print_pred_movies(sorted_ypu, sorted_items, movie_dict, maxcount = 10)





Unnamed: 0,IdWisata,Price,Rating,Category_Bahari,Category_Budaya,Category_Cagar Alam,Category_Pusat Perbelanjaan,Category_Taman Hiburan
127,128,0,4.5,0,0,0,0,1
203,204,0,4.4,0,0,0,0,1
150,151,0,4.4,0,0,0,0,1
90,91,0,4.4,0,0,0,0,1
107,108,0,4.4,0,0,0,0,1
...,...,...,...,...,...,...,...,...
136,137,20000,4.4,0,0,1,0,0
92,93,60000,4.5,0,0,1,0,0
168,169,5000,4.2,0,0,1,0,0
147,148,5000,4.3,0,0,1,0,0


In [43]:
def print_prediction(sorted_items, sorted_ypu, num_recommend, destination_jogja=destination_jogja):
    res = pd.merge(sorted_items[['IdWisata']], destination_jogja[["IdWisata", "Place_Name", "Category", "Rating"]], on="IdWisata")[:num_recommend]
    res["Predict_Rating"] = sorted_ypu[:num_recommend]
    return res

In [44]:
print_prediction(sorted_items, sorted_ypu, 40, destination_jogja=destination_jogja)

Unnamed: 0,IdWisata,Place_Name,Category,Rating,Predict_Rating
0,128,Gumuk Pasir Parangkusumo,Taman Hiburan,4.5,4.133144
1,204,Desa Wisata Pulesari,Taman Hiburan,4.4,4.131425
2,151,Desa Wisata Kelor,Taman Hiburan,4.4,4.131425
3,91,Situs Warungboto,Taman Hiburan,4.4,4.131425
4,108,Embung Tambakboyo,Taman Hiburan,4.4,4.131425
5,129,Bukit Lintang Sewu,Taman Hiburan,4.5,4.129424
6,116,Jurang Tembelan Kanigoro,Taman Hiburan,4.5,4.12848
7,111,Puncak Pinus Becici,Taman Hiburan,4.5,4.12753
8,109,Hutan Pinus Pengger,Taman Hiburan,4.5,4.12753
9,140,Bendung Lepen,Taman Hiburan,4.6,4.126689
