In [30]:
import numpy as np
import pandas as pd
import seaborn as sns

In [31]:
info_tourism = pd.read_csv('./data/tourism_with_id.csv')
tourism_rating = pd.read_csv('./data/tourism_rating.csv')
users = pd.read_csv('./data/user.csv')

# Preprocessing


## Info Tourism


In [32]:
info_tourism.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 437 entries, 0 to 436
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Place_Id      437 non-null    int64  
 1   Place_Name    437 non-null    object 
 2   Description   437 non-null    object 
 3   Category      437 non-null    object 
 4   City          437 non-null    object 
 5   Price         437 non-null    int64  
 6   Rating        437 non-null    float64
 7   Time_Minutes  205 non-null    float64
 8   Coordinate    437 non-null    object 
 9   Lat           437 non-null    float64
 10  Long          437 non-null    float64
 11  Unnamed: 11   0 non-null      float64
 12  Unnamed: 12   437 non-null    int64  
dtypes: float64(5), int64(3), object(5)
memory usage: 44.5+ KB


In [33]:
info_tourism.head(5)

Unnamed: 0,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Unnamed: 11,Unnamed: 12
0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153,,1
1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125,,2
2,3,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Taman Hiburan,Jakarta,270000,4.6,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-6.125312,106.833538,,3
3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Taman Hiburan,Jakarta,10000,4.5,,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-6.302446,106.895156,,4
4,5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,Jakarta,94000,4.5,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-6.12419,106.839134,,5


In [34]:
info_tourism.columns

Index(['Place_Id', 'Place_Name', 'Description', 'Category', 'City', 'Price',
       'Rating', 'Time_Minutes', 'Coordinate', 'Lat', 'Long', 'Unnamed: 11',
       'Unnamed: 12'],
      dtype='object')

In [35]:
# Menghilangkan unnamed column
info_tourism = info_tourism.drop(['Unnamed: 11', 'Unnamed: 12'], axis=1)
info_tourism.columns

Index(['Place_Id', 'Place_Name', 'Description', 'Category', 'City', 'Price',
       'Rating', 'Time_Minutes', 'Coordinate', 'Lat', 'Long'],
      dtype='object')

In [36]:
# # Asumsi daerah wisata yang ramah lingkungan adalah bahari, cagar alam, dan budaya
info_tourism = info_tourism.loc[info_tourism['Category'].isin(['Bahari', 'Cagar Alam', 'Budaya'])]
info_tourism['Category'].unique()

array(['Budaya', 'Cagar Alam', 'Bahari'], dtype=object)

In [37]:
#info_tourism['Category'].unique()

### Missing Value

In [38]:
def check_missing_value(df):
  col_na = df.isnull().sum().sort_values(ascending=False)
  percent = col_na/len(df)

  missing_data = pd.concat([col_na, percent], axis=1, keys=['Total', 'Percent'])
  print(missing_data[missing_data['Total']>0])

In [39]:
check_missing_value(info_tourism)

              Total   Percent
Time_Minutes    147  0.544444


In [40]:
# Menghilangkan kolom time_minutes
info_tourism = info_tourism.drop(['Time_Minutes'], axis=1)
info_tourism.columns

Index(['Place_Id', 'Place_Name', 'Description', 'Category', 'City', 'Price',
       'Rating', 'Coordinate', 'Lat', 'Long'],
      dtype='object')

In [41]:
check_missing_value(info_tourism)

Empty DataFrame
Columns: [Total, Percent]
Index: []


### Duplicate Value

In [42]:
info_tourism.duplicated().sum()

0

## Tourism Rating

In [43]:
tourism_rating.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype
---  ------         --------------  -----
 0   User_Id        10000 non-null  int64
 1   Place_Id       10000 non-null  int64
 2   Place_Ratings  10000 non-null  int64
dtypes: int64(3)
memory usage: 234.5 KB


In [44]:
tourism_rating.head(5)

Unnamed: 0,User_Id,Place_Id,Place_Ratings
0,1,179,3
1,1,344,2
2,1,5,5
3,1,373,3
4,1,101,4


### Missing Value

In [45]:
check_missing_value(tourism_rating)

Empty DataFrame
Columns: [Total, Percent]
Index: []


### Duplicate Value

In [46]:
tourism_rating.duplicated(['User_Id', 'Place_Id']).sum()

403

In [47]:
tourism_rating.drop_duplicates(['User_Id', 'Place_Id'], keep='last', inplace=True, ignore_index=True)
tourism_rating.duplicated().sum()

0

In [None]:
tourism.head(10)

: 

## Users

In [48]:
users.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300 entries, 0 to 299
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   User_Id   300 non-null    int64 
 1   Location  300 non-null    object
 2   Age       300 non-null    int64 
dtypes: int64(2), object(1)
memory usage: 7.2+ KB


In [49]:
users.head(5)

Unnamed: 0,User_Id,Location,Age
0,1,"Semarang, Jawa Tengah",20
1,2,"Bekasi, Jawa Barat",21
2,3,"Cirebon, Jawa Barat",23
3,4,"Bekasi, Jawa Barat",21
4,5,"Lampung, Sumatera Selatan",20


### Missing Value


In [50]:
check_missing_value(users)

Empty DataFrame
Columns: [Total, Percent]
Index: []


### Duplicate Value

In [51]:
users.duplicated().sum()

0

# Collaborative Modelling

In [52]:
import tensorflow as tf
from tensorflow import keras

## Membuat matrix R dan Y

In [63]:
jumlah_wisata = len(info_tourism)
jumlah_user = len(users)
R = np.zeros((jumlah_wisata, jumlah_user))
Y = np.zeros((jumlah_wisata, jumlah_user))

In [64]:
for i in range(1, jumlah_wisata + 1):
  for j in range(1, jumlah_user + 1):
    temp = tourism_rating.loc[(tourism_rating['Place_Id'] == i) & (tourism_rating['User_Id'] == j)]
    if not temp.empty:
      R[i-1, j-1] = 1
      Y[i-1, j-1] = temp['Place_Ratings']

  Y[i-1, j-1] = temp['Place_Ratings']


In [65]:
R

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [None]:
Y

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 5., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 2., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

## Learning Wisata

In [55]:
my_ratings = np.zeros(jumlah_wisata)

my_ratings[0] = 5 # Monumen Nasional
my_ratings[64] = 4 # Museum Basoeki Abdullah
my_ratings[67] = 1 # Pasar Petak Sembilan
my_ratings[82] = 1 # Alive Museum Ancol
my_ratings[88] = 5 # De Mata Museum Jogja
my_rated = [i for i in range(len(my_ratings)) if my_ratings[i] > 0]

print('\nNew user ratings:\n')

info_tourism = info_tourism.reset_index(drop=True)
for i in range(len(my_ratings)):
    if my_ratings[i] > 0:
        print(f'Rated {my_ratings[i]} for {info_tourism.loc[i, "Place_Name"]} dengan kategori {info_tourism.loc[i, "Category"]}')


New user ratings:

Rated 5.0 for Monumen Nasional dengan kategori Budaya
Rated 4.0 for Kauman Pakualaman Yogyakarta dengan kategori Budaya
Rated 1.0 for Watu Lumbung dengan kategori Cagar Alam
Rated 1.0 for Pantai Ngrawe (Mesra) dengan kategori Bahari
Rated 5.0 for Bukit Paralayang, Watugupit dengan kategori Cagar Alam


In [56]:
Y = np.c_[my_ratings, Y]
R = np.c_[(my_ratings != 0).astype(float), R]

In [57]:
def cofi_cost_func_v(X, W, b, Y, R, lambda_):
    j = (tf.linalg.matmul(X, tf.transpose(W)) + b - Y)*R
    J = 0.5 * tf.reduce_sum(j**2) + (lambda_/2) * (tf.reduce_sum(X**2) + tf.reduce_sum(W**2))
    return J

In [58]:
#  Useful Values
jumlah_wisata, jumlah_user = Y.shape
jumlah_fitur = 100

# Set Initial Parameters (W, X), use tf.Variable to track these variables
tf.random.set_seed(1234) # for consistent results
W = tf.Variable(tf.random.normal((jumlah_user,  jumlah_fitur),dtype=tf.float64),  name='W')
X = tf.Variable(tf.random.normal((jumlah_wisata, jumlah_fitur),dtype=tf.float64),  name='X')
b = tf.Variable(tf.random.normal((1, jumlah_user),   dtype=tf.float64),  name='b')

# Instantiate an optimizer.
optimizer = keras.optimizers.Adam(learning_rate=1e-1)

In [59]:
iterations = 1000
lambda_ = 1
for iter in range(iterations):
    # Use TensorFlow’s GradientTape
    # to record the operations used to compute the cost 
    with tf.GradientTape() as tape:

        # Compute the cost (forward pass included in cost)
        cost_value = cofi_cost_func_v(X, W, b, Y, R, lambda_)

    # Use the gradient tape to automatically retrieve
    # the gradients of the trainable variables with respect to the loss
    grads = tape.gradient( cost_value, [X,W,b] )

    # Run one step of gradient descent by updating
    # the value of the variables to minimize the loss.
    optimizer.apply_gradients( zip(grads, [X,W,b]) )

    # Log periodically.
    if iter % 20 == 0:
        print(f"Training loss at iteration {iter}: {cost_value:0.1f}")

Training loss at iteration 0: 141464.2
Training loss at iteration 20: 12669.4
Training loss at iteration 40: 5042.6
Training loss at iteration 60: 2703.0
Training loss at iteration 80: 1693.3
Training loss at iteration 100: 1189.1
Training loss at iteration 120: 913.5
Training loss at iteration 140: 753.2
Training loss at iteration 160: 653.8
Training loss at iteration 180: 587.3
Training loss at iteration 200: 539.4
Training loss at iteration 220: 503.1
Training loss at iteration 240: 475.1
Training loss at iteration 260: 453.7
Training loss at iteration 280: 437.8
Training loss at iteration 300: 426.2
Training loss at iteration 320: 417.9
Training loss at iteration 340: 412.0
Training loss at iteration 360: 407.7
Training loss at iteration 380: 404.7
Training loss at iteration 400: 402.5
Training loss at iteration 420: 400.9
Training loss at iteration 440: 399.7
Training loss at iteration 460: 398.8
Training loss at iteration 480: 398.1
Training loss at iteration 500: 397.5
Training 

In [60]:
# Make a prediction using trained weights and biases
p = np.matmul(X.numpy(), np.transpose(W.numpy())) + b.numpy()

#restore the mean
pm = p

my_predictions = pm[:,0]

# sort predictions
ix = tf.argsort(my_predictions, direction='DESCENDING')

for i in range(20):
    j = ix[i]
    if j not in my_rated:
        print(f'Predicting rating {my_predictions[j]:0.2f} for wisata {info_tourism.loc[j.numpy(), "Place_Name"]} dengan kategori {info_tourism.loc[j.numpy(), "Category"]}')

print('\n\nOriginal vs Predicted ratings:\n')
for i in range(len(my_ratings)):
    if my_ratings[i] > 0:
        print(f'Original {my_ratings[i]}, Predicted {my_predictions[i]:0.2f} for {info_tourism.loc[i, "Place_Name"]} dengan kategori {info_tourism.loc[i, "Category"]}')

Predicting rating 4.23 for wisata Gedung Agung Yogyakarta dengan kategori Budaya
Predicting rating 4.16 for wisata Museum Tekstil dengan kategori Budaya
Predicting rating 4.15 for wisata Museum Tengah Kebun dengan kategori Budaya
Predicting rating 4.13 for wisata Museum Fatahillah dengan kategori Budaya
Predicting rating 4.13 for wisata Istana Negara Republik Indonesia dengan kategori Budaya
Predicting rating 4.11 for wisata Museum Macan (Modern and Contemporary Art in Nusantara) dengan kategori Budaya
Predicting rating 4.11 for wisata Museum Bahari Jakarta dengan kategori Budaya
Predicting rating 4.05 for wisata Tebing Breksi dengan kategori Budaya
Predicting rating 4.02 for wisata Pintoe Langit Dahromo dengan kategori Cagar Alam
Predicting rating 4.01 for wisata Pulau Pramuka dengan kategori Bahari
Predicting rating 3.93 for wisata Bukit Wisata Pulepayung dengan kategori Cagar Alam
Predicting rating 3.84 for wisata Monumen Sanapati dengan kategori Budaya
Predicting rating 3.83 for wi

# Content Based Modelling

In [69]:
import sklearn
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split

## Membuat dataset users

In [70]:
users_dataset = users.drop(['Location', 'Age'], axis=1)
users_dataset['avg_budaya'] = 0
users_dataset['avg_taman_hiburan'] = 0
users_dataset['avg_cagar_alam'] = 0
users_dataset['avg_bahari'] = 0
users_dataset['avg_pusat_perbelanjaan'] = 0
users_dataset['avg_tempat_ibadah'] = 0

users_dataset

Unnamed: 0,User_Id,avg_budaya,avg_taman_hiburan,avg_cagar_alam,avg_bahari,avg_pusat_perbelanjaan,avg_tempat_ibadah
0,1,0,0,0,0,0,0
1,2,0,0,0,0,0,0
2,3,0,0,0,0,0,0
3,4,0,0,0,0,0,0
4,5,0,0,0,0,0,0
...,...,...,...,...,...,...,...
295,296,0,0,0,0,0,0
296,297,0,0,0,0,0,0
297,298,0,0,0,0,0,0
298,299,0,0,0,0,0,0


In [None]:
def get_avg_rating_on_category():
  df_cat = info_tourism[['Place_Id', 'Category']].reset_index(drop=True)
  df_users_cat = tourism_rating.merge(df_cat)
  avg = df_users_cat.groupby(["User_Id", "Category"])['Place_Ratings'].mean().reset_index()
  return avg

avg_dataset = get_avg_rating_on_category()
avg_dataset

Unnamed: 0,User_Id,Category,Place_Ratings
0,1,Bahari,2.000000
1,1,Budaya,3.500000
2,1,Cagar Alam,3.625000
3,1,Pusat Perbelanjaan,3.000000
4,1,Taman Hiburan,3.800000
...,...,...,...
1625,300,Bahari,3.000000
1626,300,Budaya,3.500000
1627,300,Cagar Alam,2.800000
1628,300,Taman Hiburan,3.285714


In [None]:
for index, row in avg_dataset.iterrows():
  if row['Category'] == 'Budaya':
    users_dataset.at[row['User_Id']-1, 'avg_budaya'] = row['Place_Ratings']
  elif row['Category'] == 'Taman Hiburan':
    users_dataset.at[row['User_Id']-1, 'avg_taman_hiburan'] = row['Place_Ratings']
  elif row['Category'] == 'Cagar Alam':
    users_dataset.at[row['User_Id']-1, 'avg_cagar_alam'] = row['Place_Ratings']
  elif row['Category'] == 'Bahari':
    users_dataset.at[row['User_Id']-1, 'avg_bahari'] = row['Place_Ratings']
  elif row['Category'] == 'Pusat Perbelanjaan':
    users_dataset.at[row['User_Id']-1, 'avg_pusat_perbelanjaan'] = row['Place_Ratings']
  elif row['Category'] == 'Tempat Ibadah':
    users_dataset.at[row['User_Id']-1, 'avg_tempat_ibadah'] = row['Place_Ratings']

users_dataset

Unnamed: 0,User_Id,avg_budaya,avg_taman_hiburan,avg_cagar_alam,avg_bahari,avg_pusat_perbelanjaan,avg_tempat_ibadah
0,1,3.500000,3.800000,3.625000,2.000000,3.0,2.0
1,2,3.444444,3.500000,2.555556,2.000000,0.0,3.5
2,3,3.230769,2.900000,4.000000,4.000000,0.0,5.0
3,4,3.083333,3.500000,3.750000,5.000000,4.0,0.0
4,5,3.285714,3.500000,3.000000,3.500000,5.0,5.0
...,...,...,...,...,...,...,...
295,296,3.000000,3.000000,2.800000,3.000000,2.0,2.5
296,297,2.818182,4.000000,3.142857,3.333333,1.0,2.5
297,298,3.833333,4.222222,3.428571,2.750000,5.0,2.0
298,299,3.250000,2.714286,2.250000,2.000000,2.5,4.0


In [None]:
users_dataset = users_dataset.merge(tourism_rating)
users_dataset = users_dataset.sort_values(['User_Id', 'Place_Id'])
users_dataset

Unnamed: 0,User_Id,avg_budaya,avg_taman_hiburan,avg_cagar_alam,avg_bahari,avg_pusat_perbelanjaan,avg_tempat_ibadah,Place_Id,Place_Ratings
2,1,3.5,3.800000,3.625,2.0,3.0,2.0,5,5
21,1,3.5,3.800000,3.625,2.0,3.0,2.0,15,3
7,1,3.5,3.800000,3.625,2.0,3.0,2.0,20,4
24,1,3.5,3.800000,3.625,2.0,3.0,2.0,21,2
14,1,3.5,3.800000,3.625,2.0,3.0,2.0,41,5
...,...,...,...,...,...,...,...,...,...
9588,300,3.5,3.285714,2.800,3.0,0.0,4.0,363,1
9573,300,3.5,3.285714,2.800,3.0,0.0,4.0,397,2
9589,300,3.5,3.285714,2.800,3.0,0.0,4.0,416,4
9592,300,3.5,3.285714,2.800,3.0,0.0,4.0,425,2


## Membuat dataset target

In [None]:
y_train = users_dataset['Place_Ratings'].to_numpy()
y_train

array([5, 3, 4, ..., 4, 2, 4])

## Membuat dataset wisata

In [None]:
tourism_dataset = info_tourism.drop(['Place_Name', 'Description', 'Coordinate'], axis=1)
tourism_dataset

Unnamed: 0,Place_Id,Category,City,Price,Rating,Lat,Long
0,1,Budaya,Jakarta,20000,4.6,-6.175392,106.827153
1,2,Budaya,Jakarta,0,4.6,-6.137645,106.817125
2,3,Taman Hiburan,Jakarta,270000,4.6,-6.125312,106.833538
3,4,Taman Hiburan,Jakarta,10000,4.5,-6.302446,106.895156
4,5,Taman Hiburan,Jakarta,94000,4.5,-6.124190,106.839134
...,...,...,...,...,...,...,...
432,433,Budaya,Surabaya,2000,4.4,-7.433859,112.719906
433,434,Taman Hiburan,Surabaya,0,4.6,-7.291347,112.739822
434,435,Taman Hiburan,Surabaya,0,4.4,-7.275296,112.754938
435,436,Taman Hiburan,Surabaya,0,4.6,-7.294330,112.761753


In [None]:
tourism_dataset = pd.get_dummies(tourism_dataset)
tourism_dataset_for_pred = tourism_dataset

In [None]:
tourism_dataset = tourism_dataset.merge(tourism_rating)
tourism_dataset = tourism_dataset.sort_values(['User_Id', 'Place_Id'])
tourism_dataset

Unnamed: 0,Place_Id,Price,Rating,Lat,Long,Category_Bahari,Category_Budaya,Category_Cagar Alam,Category_Pusat Perbelanjaan,Category_Taman Hiburan,Category_Tempat Ibadah,City_Bandung,City_Jakarta,City_Semarang,City_Surabaya,City_Yogyakarta,User_Id,Place_Ratings
81,5,94000,4.5,-6.124190,106.839134,0,0,0,0,1,0,0,1,0,0,0,1,5
289,15,0,4.4,-6.176687,106.841767,0,0,0,1,0,0,0,1,0,0,0,1,3
398,20,2000,4.5,-6.172224,106.818969,0,1,0,0,0,0,0,1,0,0,0,1,4
417,21,5000,4.5,-6.134907,106.812445,0,1,0,0,0,0,0,1,0,0,0,1,2
872,41,2000,4.4,-6.126955,106.808590,0,1,0,0,0,0,0,1,0,0,0,1,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7964,363,5000,3.4,-6.954569,110.360095,1,0,0,0,0,0,0,0,1,0,0,300,1
8707,397,0,4.4,-7.318220,112.784240,0,0,0,0,1,0,0,0,0,1,0,300,2
9123,416,0,4.4,-7.256755,112.794220,0,1,0,0,0,0,0,0,0,1,0,300,4
9321,425,35000,4.1,-7.247796,112.799824,0,0,0,0,1,0,0,0,0,1,0,300,2


## Menyiapkan training data

In [None]:
tourism_train_unscaled = tourism_dataset
user_train_unscaled = users_dataset
y_train_unscaled = y_train

scalerTourism = StandardScaler()
scalerTourism.fit(tourism_dataset)
tourism_train = scalerTourism.transform(tourism_dataset)

scalerUser = StandardScaler()
scalerUser.fit(users_dataset)
user_train = scalerUser.transform(users_dataset)

scalerTarget = MinMaxScaler((-1, 1))
scalerTarget.fit(y_train.reshape(-1, 1))
y_train = scalerTarget.transform(y_train.reshape(-1, 1))

print(np.allclose(tourism_train_unscaled, scalerTourism.inverse_transform(tourism_train)))
print(np.allclose(user_train_unscaled, scalerUser.inverse_transform(user_train)))

True
True


In [None]:
tourism_train, tourism_test = train_test_split(tourism_train, train_size=0.80, shuffle=True, random_state=1)
user_train, user_test = train_test_split(user_train, train_size=0.80, shuffle=True, random_state=1)
y_train, y_test       = train_test_split(y_train,    train_size=0.80, shuffle=True, random_state=1)
print(f"tourism training data shape: {tourism_train.shape}")
print(f"tourism test data shape: {tourism_test.shape}")

tourism training data shape: (7677, 18)
tourism test data shape: (1920, 18)


## Membuat NN

In [None]:
num_user_features = 6
num_tourism_features = 15
num_outputs = 32
tf.random.set_seed(1)
user_NN = tf.keras.models.Sequential([
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_outputs),
])

tourism_NN = tf.keras.models.Sequential([
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_outputs),
])

# create the user input and point to the base network
input_user = tf.keras.layers.Input(shape=(num_user_features))
vu = user_NN(input_user)
vu = tf.linalg.l2_normalize(vu, axis=1)

# create the tourism input and point to the base network
input_tourism = tf.keras.layers.Input(shape=(num_tourism_features))
vm = tourism_NN(input_tourism)
vm = tf.linalg.l2_normalize(vm, axis=1)

# compute the dot product of the two vectors vu and vm
output = tf.keras.layers.Dot(axes=1)([vu, vm])

# specify the inputs and output of the model
model = tf.keras.Model([input_user, input_tourism], output)

model.summary()

Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_5 (InputLayer)           [(None, 6)]          0           []                               
                                                                                                  
 input_6 (InputLayer)           [(None, 15)]         0           []                               
                                                                                                  
 sequential_4 (Sequential)      (None, 32)           38816       ['input_5[0][0]']                
                                                                                                  
 sequential_5 (Sequential)      (None, 32)           41120       ['input_6[0][0]']                
                                                                                            

In [None]:
tf.random.set_seed(1)
cost_fn = tf.keras.losses.MeanSquaredError()
opt = keras.optimizers.Adam(learning_rate=0.01)
model.compile(optimizer=opt,
              loss=cost_fn)

In [None]:
tf.random.set_seed(1)
model.fit([user_train[:, 1:-2], tourism_train[:, 1:-2]], y_train, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7f234cb86950>

In [None]:
model.evaluate([user_test[:, 1:-2], tourism_test[:, 1:-2]], y_test)



0.4560774862766266

In [None]:
new_user_id = 301
new_avg_budaya = 4.9
new_avg_taman_hiburan = 1.0
new_avg_cagar_alam = 5.0
new_avg_bahari = 4.7
new_avg_pusat_perbelanjaan = 0
new_avg_tempat_ibadah = 0

user_vec = np.array([[new_user_id, new_avg_budaya, new_avg_taman_hiburan,
                      new_avg_cagar_alam, new_avg_bahari, new_avg_pusat_perbelanjaan,
                      new_avg_tempat_ibadah, 0, 0]])

In [None]:
user_vecs = np.tile(user_vec, (len(tourism_dataset_for_pred), 1))
user_vecs

array([[301. ,   4.9,   1. , ...,   0. ,   0. ,   0. ],
       [301. ,   4.9,   1. , ...,   0. ,   0. ,   0. ],
       [301. ,   4.9,   1. , ...,   0. ,   0. ,   0. ],
       ...,
       [301. ,   4.9,   1. , ...,   0. ,   0. ,   0. ],
       [301. ,   4.9,   1. , ...,   0. ,   0. ,   0. ],
       [301. ,   4.9,   1. , ...,   0. ,   0. ,   0. ]])

In [None]:
tourism_dataset_for_pred['User_Id'] = 0
tourism_dataset_for_pred['Place_Ratings'] = 0
tourism_dataset_for_pred

Unnamed: 0,Place_Id,Price,Rating,Lat,Long,Category_Bahari,Category_Budaya,Category_Cagar Alam,Category_Pusat Perbelanjaan,Category_Taman Hiburan,Category_Tempat Ibadah,City_Bandung,City_Jakarta,City_Semarang,City_Surabaya,City_Yogyakarta,User_Id,Place_Ratings
0,1,20000,4.6,-6.175392,106.827153,0,1,0,0,0,0,0,1,0,0,0,0,0
1,2,0,4.6,-6.137645,106.817125,0,1,0,0,0,0,0,1,0,0,0,0,0
2,3,270000,4.6,-6.125312,106.833538,0,0,0,0,1,0,0,1,0,0,0,0,0
3,4,10000,4.5,-6.302446,106.895156,0,0,0,0,1,0,0,1,0,0,0,0,0
4,5,94000,4.5,-6.124190,106.839134,0,0,0,0,1,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
432,433,2000,4.4,-7.433859,112.719906,0,1,0,0,0,0,0,0,0,1,0,0,0
433,434,0,4.6,-7.291347,112.739822,0,0,0,0,1,0,0,0,0,1,0,0,0
434,435,0,4.4,-7.275296,112.754938,0,0,0,0,1,0,0,0,0,1,0,0,0
435,436,0,4.6,-7.294330,112.761753,0,0,0,0,1,0,0,0,0,1,0,0,0


In [None]:
suser_vecs = scalerUser.transform(user_vecs)
stourism_vecs = scalerTourism.transform(tourism_dataset_for_pred)

# make a prediction
y_p = model.predict([suser_vecs[:, 1:-2], stourism_vecs[:, 1:-2]])

# unscale y prediction 
y_pu = scalerTarget.inverse_transform(y_p)

# sort the results, highest prediction first
sorted_index = np.argsort(-y_pu,axis=0).reshape(-1).tolist()  #negate to get largest rating first
sorted_ypu   = y_pu[sorted_index]
sorted_items = tourism_dataset_for_pred.loc[sorted_index]  #using unscaled vectors for display

display(sorted_items[:10])





Unnamed: 0,Place_Id,Price,Rating,Lat,Long,Category_Bahari,Category_Budaya,Category_Cagar Alam,Category_Pusat Perbelanjaan,Category_Taman Hiburan,Category_Tempat Ibadah,City_Bandung,City_Jakarta,City_Semarang,City_Surabaya,City_Yogyakarta,User_Id,Place_Ratings
55,56,25000,4.4,-6.119421,106.850244,1,0,0,0,0,0,0,1,0,0,0,0,0
161,162,220000,4.5,-7.810553,110.370499,0,1,0,0,0,0,0,0,0,0,1,0,0
49,50,2000,4.0,-6.186184,106.836476,0,1,0,0,0,0,0,1,0,0,0,0,0
119,120,250000,4.5,-7.958369,110.440762,0,1,0,0,0,0,0,0,0,0,1,0,0
124,125,0,4.6,-7.803897,110.364423,0,1,0,0,0,0,0,0,0,0,1,0,0
112,113,0,4.6,-7.800151,110.363751,0,1,0,0,0,0,0,0,0,0,1,0,0
10,11,5000,4.6,-6.035833,106.746944,1,0,0,0,0,0,0,1,0,0,0,0,0
87,88,3000,4.6,-7.800202,110.366304,0,1,0,0,0,0,0,0,0,0,1,0,0
99,100,0,4.5,-7.800104,110.367658,0,1,0,0,0,0,0,0,0,0,1,0,0
146,147,0,4.5,-7.785724,110.374695,0,1,0,0,0,0,0,0,0,0,1,0,0
