In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.layers import Input, Embedding, Flatten, Dot, Dense, Concatenate
from tensorflow.keras.models import Model
from scipy.spatial.distance import cosine
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

In [None]:
# Mount drive to access dataset
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
# Load the dataset
data = pd.read_csv('gdrive/My Drive/Capstone Project/ML Code/jgo_data_merge.csv') # Load from Drive
data.head()

Unnamed: 0,place_id,place_name,description_english,description_indonesia,category,price,latitude,longitude,user_id,user_rate,user_location,user_age
0,1,Taman Pintar Yogyakarta,Taman Pintar Yogyakarta is a tourist attractio...,Taman Pintar Yogyakarta adalah wahana wisata y...,Taman Hiburan,6000,-7.800671,110.367655,93,5.0,"Lampung, Sumatera Selatan",18.0
1,1,Taman Pintar Yogyakarta,Taman Pintar Yogyakarta is a tourist attractio...,Taman Pintar Yogyakarta adalah wahana wisata y...,Taman Hiburan,6000,-7.800671,110.367655,282,1.0,"Yogyakarta, DIY",39.0
2,1,Taman Pintar Yogyakarta,Taman Pintar Yogyakarta is a tourist attractio...,Taman Pintar Yogyakarta adalah wahana wisata y...,Taman Hiburan,6000,-7.800671,110.367655,276,1.0,"Lampung, Sumatera Selatan",39.0
3,1,Taman Pintar Yogyakarta,Taman Pintar Yogyakarta is a tourist attractio...,Taman Pintar Yogyakarta adalah wahana wisata y...,Taman Hiburan,6000,-7.800671,110.367655,269,1.0,"Cirebon, Jawa Barat",36.0
4,1,Taman Pintar Yogyakarta,Taman Pintar Yogyakarta is a tourist attractio...,Taman Pintar Yogyakarta adalah wahana wisata y...,Taman Hiburan,6000,-7.800671,110.367655,262,1.0,"Bogor, Jawa Barat",38.0


In [None]:
# Get user ID, user ratings, and place name data
data = data.groupby(['user_id', 'place_name'], as_index=False).agg({
    'user_rate': 'mean' # Find mean of user ratings for each place to avoid duplicate entries
    })

# Converting data from long to wide format
data_wide = data.pivot(index="user_id",columns="place_name",values="user_rate")
data_wide.head()

place_name,ARTJOG MMXIX,Affandi Museum,Agro Tourism Bhumi Merapi,Air Terjun Banyu Nibo,Air Terjun Kedung Manglu,Air Terjun Kedung Pedut,Air Terjun Sindet,Air Terjun Sri Gethuk,Aisha Tour Planner & Transport Service,Alun Alun Selatan Yogyakarta,...,Wisata Jaga Bendung,Wisata Kalibiru,Wisata Kaliurang,Wisata Kraton Jogja,Wisata Pangol Hill,Wisata Taman Kelinci Borobudur,Wisata Telaga Potorono,Wisata Watu Amben,XT Square,Yogyakarta Night Tours - Meeting Point Klasik
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,


In [None]:
# Replace NaN data with 0
data_wide.fillna(0, inplace=True)
data_wide.head()

place_name,ARTJOG MMXIX,Affandi Museum,Agro Tourism Bhumi Merapi,Air Terjun Banyu Nibo,Air Terjun Kedung Manglu,Air Terjun Kedung Pedut,Air Terjun Sindet,Air Terjun Sri Gethuk,Aisha Tour Planner & Transport Service,Alun Alun Selatan Yogyakarta,...,Wisata Jaga Bendung,Wisata Kalibiru,Wisata Kaliurang,Wisata Kraton Jogja,Wisata Pangol Hill,Wisata Taman Kelinci Borobudur,Wisata Telaga Potorono,Wisata Watu Amben,XT Square,Yogyakarta Night Tours - Meeting Point Klasik
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# Drop the user column in different DF
data_tourismbased = data_wide.copy()
data_tourismbased = data_tourismbased.reset_index()
data_tourismbased = data_tourismbased.drop("user_id", axis=1)
data_tourismbased.head()

place_name,ARTJOG MMXIX,Affandi Museum,Agro Tourism Bhumi Merapi,Air Terjun Banyu Nibo,Air Terjun Kedung Manglu,Air Terjun Kedung Pedut,Air Terjun Sindet,Air Terjun Sri Gethuk,Aisha Tour Planner & Transport Service,Alun Alun Selatan Yogyakarta,...,Wisata Jaga Bendung,Wisata Kalibiru,Wisata Kaliurang,Wisata Kraton Jogja,Wisata Pangol Hill,Wisata Taman Kelinci Borobudur,Wisata Telaga Potorono,Wisata Watu Amben,XT Square,Yogyakarta Night Tours - Meeting Point Klasik
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# Create a dataframe for place data (place vs place) to find relations
tourismbased = pd.DataFrame(index=data_tourismbased.columns,
                            columns=data_tourismbased.columns)
tourismbased.head()

place_name,ARTJOG MMXIX,Affandi Museum,Agro Tourism Bhumi Merapi,Air Terjun Banyu Nibo,Air Terjun Kedung Manglu,Air Terjun Kedung Pedut,Air Terjun Sindet,Air Terjun Sri Gethuk,Aisha Tour Planner & Transport Service,Alun Alun Selatan Yogyakarta,...,Wisata Jaga Bendung,Wisata Kalibiru,Wisata Kaliurang,Wisata Kraton Jogja,Wisata Pangol Hill,Wisata Taman Kelinci Borobudur,Wisata Telaga Potorono,Wisata Watu Amben,XT Square,Yogyakarta Night Tours - Meeting Point Klasik
place_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ARTJOG MMXIX,,,,,,,,,,,...,,,,,,,,,,
Affandi Museum,,,,,,,,,,,...,,,,,,,,,,
Agro Tourism Bhumi Merapi,,,,,,,,,,,...,,,,,,,,,,
Air Terjun Banyu Nibo,,,,,,,,,,,...,,,,,,,,,,
Air Terjun Kedung Manglu,,,,,,,,,,,...,,,,,,,,,,


In [None]:
# Calculate similarity between places
for i in range(0,len(tourismbased.columns)) :
    # Loop through the columns for each column
    for j in range(0,len(tourismbased.columns)) :
      # Fill in placeholder with cosine similarities
      tourismbased.iloc[i,j] = 1-cosine(data_tourismbased.iloc[:,i],data_tourismbased.iloc[:,j])

In [None]:
tourismbased.head()

place_name,ARTJOG MMXIX,Affandi Museum,Agro Tourism Bhumi Merapi,Air Terjun Banyu Nibo,Air Terjun Kedung Manglu,Air Terjun Kedung Pedut,Air Terjun Sindet,Air Terjun Sri Gethuk,Aisha Tour Planner & Transport Service,Alun Alun Selatan Yogyakarta,...,Wisata Jaga Bendung,Wisata Kalibiru,Wisata Kaliurang,Wisata Kraton Jogja,Wisata Pangol Hill,Wisata Taman Kelinci Borobudur,Wisata Telaga Potorono,Wisata Watu Amben,XT Square,Yogyakarta Night Tours - Meeting Point Klasik
place_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ARTJOG MMXIX,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Affandi Museum,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Agro Tourism Bhumi Merapi,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Air Terjun Banyu Nibo,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Air Terjun Kedung Manglu,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# Looking for neighbour data based on the similarity matrix
data_neighbours = pd.DataFrame(index=tourismbased.columns,columns=range(1,11))

# Loop through our similarity dataframe and fill in neighbouring place names
for i in range(0,len(tourismbased.columns)):
    data_neighbours.iloc[i,:10] = tourismbased.iloc[0:,i].sort_values(ascending=False)[:10].index

data_neighbours

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10
place_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
ARTJOG MMXIX,ARTJOG MMXIX,Pantai Ngrawah,Pantai Samas,Pantai Sadranan,Pantai Pulang Sawal,Pantai Pok Tunggal,Pantai Patihan,Pantai Pasir Puncu,Pantai Pasir Mendit,Pantai Parangtritis
Affandi Museum,Affandi Museum,Pantai Ngrawah,Pantai Samas,Pantai Sadranan,Pantai Pulang Sawal,Pantai Pok Tunggal,Pantai Patihan,Pantai Pasir Puncu,Pantai Pasir Mendit,Pantai Parangtritis
Agro Tourism Bhumi Merapi,Agro Tourism Bhumi Merapi,ARTJOG MMXIX,Pantai Pasir Mendit,Pantai Ngrenehan,Pantai Ngrumput,Pantai Nguluran,Pantai Parangkusumo,Pantai Parangracuk,Pantai Parangtritis,Pantai Pasir Puncu
Air Terjun Banyu Nibo,Air Terjun Banyu Nibo,ARTJOG MMXIX,Pantai Pasir Mendit,Pantai Ngrenehan,Pantai Ngrumput,Pantai Nguluran,Pantai Parangkusumo,Pantai Parangracuk,Pantai Parangtritis,Pantai Pasir Puncu
Air Terjun Kedung Manglu,Air Terjun Kedung Manglu,ARTJOG MMXIX,Pantai Pasir Mendit,Pantai Ngrenehan,Pantai Ngrumput,Pantai Nguluran,Pantai Parangkusumo,Pantai Parangracuk,Pantai Parangtritis,Pantai Pasir Puncu
...,...,...,...,...,...,...,...,...,...,...
Wisata Taman Kelinci Borobudur,Wisata Taman Kelinci Borobudur,ARTJOG MMXIX,Pantai Pasir Mendit,Pantai Ngrenehan,Pantai Ngrumput,Pantai Nguluran,Pantai Parangkusumo,Pantai Parangracuk,Pantai Parangtritis,Pantai Pasir Puncu
Wisata Telaga Potorono,Wisata Telaga Potorono,ARTJOG MMXIX,Pantai Pasir Mendit,Pantai Ngrenehan,Pantai Ngrumput,Pantai Nguluran,Pantai Parangkusumo,Pantai Parangracuk,Pantai Parangtritis,Pantai Pasir Puncu
Wisata Watu Amben,Wisata Watu Amben,ARTJOG MMXIX,Pantai Pasir Mendit,Pantai Ngrenehan,Pantai Ngrumput,Pantai Nguluran,Pantai Parangkusumo,Pantai Parangracuk,Pantai Parangtritis,Pantai Pasir Puncu
XT Square,XT Square,ARTJOG MMXIX,Pantai Pasir Mendit,Pantai Ngrenehan,Pantai Ngrumput,Pantai Nguluran,Pantai Parangkusumo,Pantai Parangracuk,Pantai Parangtritis,Pantai Pasir Puncu


In [None]:
# Helper function to get similarity scores
def getScore(history, similarities):
   return sum(history*similarities)/sum(similarities)

# Reset data index to start from 0
data_userbased1 = data_wide.reset_index()
data_userbased1.head()

place_name,user_id,ARTJOG MMXIX,Affandi Museum,Agro Tourism Bhumi Merapi,Air Terjun Banyu Nibo,Air Terjun Kedung Manglu,Air Terjun Kedung Pedut,Air Terjun Sindet,Air Terjun Sri Gethuk,Aisha Tour Planner & Transport Service,...,Wisata Jaga Bendung,Wisata Kalibiru,Wisata Kaliurang,Wisata Kraton Jogja,Wisata Pangol Hill,Wisata Taman Kelinci Borobudur,Wisata Telaga Potorono,Wisata Watu Amben,XT Square,Yogyakarta Night Tours - Meeting Point Klasik
0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# Create a place holder matrix for similarities, and fill in the user name column
userbased = pd.DataFrame(index=data_userbased1.index,columns=data_userbased1.columns)
userbased.iloc[:,:1] = data_userbased1.iloc[:,:1]
userbased.head()

place_name,user_id,ARTJOG MMXIX,Affandi Museum,Agro Tourism Bhumi Merapi,Air Terjun Banyu Nibo,Air Terjun Kedung Manglu,Air Terjun Kedung Pedut,Air Terjun Sindet,Air Terjun Sri Gethuk,Aisha Tour Planner & Transport Service,...,Wisata Jaga Bendung,Wisata Kalibiru,Wisata Kaliurang,Wisata Kraton Jogja,Wisata Pangol Hill,Wisata Taman Kelinci Borobudur,Wisata Telaga Potorono,Wisata Watu Amben,XT Square,Yogyakarta Night Tours - Meeting Point Klasik
0,1,,,,,,,,,,...,,,,,,,,,,
1,2,,,,,,,,,,...,,,,,,,,,,
2,3,,,,,,,,,,...,,,,,,,,,,
3,4,,,,,,,,,,...,,,,,,,,,,
4,5,,,,,,,,,,...,,,,,,,,,,


In [None]:
# New DF for the first 540 rows and all columns of the data_userbased1 DF
data_userbased12 = data_userbased1.iloc[:540,:]

# New DF for the first 540 rows and all columns of the userbased DF
data_userbased11 = userbased.iloc[:540,:]

In [None]:
# Iterate through each data in the matrix
for i in range(0,len(data_userbased11.index)):
    for j in range(1,len(data_userbased11.columns)):

        # Get the current user and tourism (place)
        user = data_userbased11.index[i]
        tourism = data_userbased11.columns[j]

        # If the user has already rated the place, set the predicted rating to 0
        if data_userbased12.iloc[i][j] == 1:
            data_userbased11.iloc[i][j] = 0
        # If the user has not rated the place, predict the rating
        else:
            # Get the top 10 similar places to the current tourism
            tourism_top_names = data_neighbours.loc[tourism][1:10]
            # Get the similarity scores between the current tourism and its top 10 similar places
            tourism_top_sims = tourismbased.loc[tourism].sort_values(ascending=False)[1:10]
            # Get the ratings the user has given to those similar places
            user_rated = data_tourismbased.loc[user,tourism_top_names]

            # Calculate the predicted rating using the getScore function
            data_userbased11.iloc[i][j] = getScore(user_rated,tourism_top_sims)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  data_userbased11.iloc[i][j] = getScore(user_rated,tourism_top_sims)
  data_userbased11.iloc[i][j] = getScore(user_rated,tourism_top_sims)
  if data_userbased12.iloc[i][j] == 1:
  return sum(history*similarities)/sum(similarities)
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps upda

In [None]:
# Get the top places for user
data_recommend = pd.DataFrame(index=userbased.index, columns=['user_id','1','2','3','4','5','6'])
data_recommend.iloc[0:,0] = userbased.iloc[:,0]

In [None]:
# Instead of top places scores, show names
for i in range(0,len(userbased.index)):
    data_recommend.iloc[i,1:] = userbased.iloc[i,:].sort_values(ascending=False).iloc[1:7,].index.transpose()

In [None]:
# Print a sample
print (data_recommend.iloc[:10,:4])

  user_id                                    1  \
0       1                              user_id   
1       2        Kampung Wisata Sosro Menduran   
2       3                   Keraton Yogyakarta   
3       4  Museum Benteng Vredeburg Yogyakarta   
4       5                          Lava Bantal   
5       6                      Candi Prambanan   
6       7                   Bukit Lintang Sewu   
7       8                 Studio Alam Gamplong   
8       9                      Pantai Sadranan   
9      10  Desa Wisata Rumah Domes/Teletubbies   

                                     2                                   3  
0        Kampung Wisata Sosro Menduran                De Mata Museum Jogja  
1              Bukit Wisata Pulepayung                    Gembira Loka Zoo  
2           Kampung Wisata Dipowinatan                      Pantai Greweng  
3                           Goa Pindul                   Pasar Beringharjo  
4                          Ledok Sambi                Studio Ala

In [None]:
# Generate training data
train_users = []
train_places = []
train_ratings = []

# Create a mapping from place names to numerical indices
item_index = {tourism: i for i, tourism in enumerate(data_userbased11.columns[1:])}

for i in range(len(data_userbased11.index)):
    for j in range(1, len(data_userbased11.columns)):
        user = data_userbased11.index[i]
        tourism = data_userbased11.columns[j]
        score = data_userbased11.iloc[i, j]

        # Only include meaningful scores in training
        # Check if the score is not NaN and is a valid number
        if not np.isnan(score) and np.isfinite(score):
            train_users.append(user)
            # Use the numerical index instead of the place name
            train_places.append(item_index[tourism])
            train_ratings.append(score)

# Convert to numpy arrays
train_users = np.array(train_users)
train_places = np.array(train_places)
train_ratings = np.array(train_ratings)

# Number of users and places
n_users = len(data_userbased11.index)
n_places = len(data_userbased11.columns) - 1

# Define inputs
user_input = Input(shape=(1,))
item_input = Input(shape=(1,))

# Embeddings for users and places
user_embedding = Embedding(input_dim=n_users, output_dim=50)(user_input)
item_embedding = Embedding(input_dim=n_places, output_dim=50)(item_input)

# Flatten embeddings
user_vec = Flatten()(user_embedding)
item_vec = Flatten()(item_embedding)

# Combine embeddings
x = Concatenate()([user_vec, item_vec])

# Fully connected layers
x = Dense(128, activation='relu')(x)
x = Dense(64, activation='relu')(x)
output = Dense(1)(x)

# Compile the model
model = Model(inputs=[user_input, item_input], outputs=output)
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Train the model
history = model.fit([train_users, train_places], train_ratings, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the model
predicted_ratings = model.predict([train_users, train_places])

# Remove NaN values from predicted ratings before calculation
# This step is added as a precaution in case the model predicts NaNs
mask = np.isfinite(predicted_ratings.flatten())
predicted_ratings = predicted_ratings[mask]
train_ratings = train_ratings[mask]

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(train_ratings, predicted_ratings))

# Calculate MAE
mae = mean_absolute_error(train_ratings, predicted_ratings)

print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"Mean Absolute Error (MAE): {mae}")

# Interpreting the results:
# RMSE: It represents the average difference between the predicted ratings and the actual ratings.
# A lower RMSE indicates better accuracy.
# Ideally, you want RMSE to be below 1 in a rating prediction scenario where ratings are typically on a scale of 1 to 5.
# MAE: It is the average absolute difference between the predicted and actual ratings.
# It is less sensitive to outliers compared to RMSE. A lower MAE also indicates better accuracy.
# Similar to RMSE, a MAE value below 1 is generally considered good for a 1-5 rating scale.

Epoch 1/10
[1m1701/1701[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - loss: 0.0743 - mae: 0.1829 - val_loss: 0.0460 - val_mae: 0.2129
Epoch 2/10
[1m1701/1701[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - loss: 0.0676 - mae: 0.1697 - val_loss: 0.0674 - val_mae: 0.2519
Epoch 3/10
[1m1701/1701[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - loss: 0.0573 - mae: 0.1540 - val_loss: 0.0747 - val_mae: 0.2601
Epoch 4/10
[1m1701/1701[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - loss: 0.0477 - mae: 0.1413 - val_loss: 0.0563 - val_mae: 0.2224
Epoch 5/10
[1m1701/1701[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - loss: 0.0424 - mae: 0.1339 - val_loss: 0.0534 - val_mae: 0.2183
Epoch 6/10
[1m1701/1701[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - loss: 0.0381 - mae: 0.1278 - val_loss: 0.0898 - val_mae: 0.2819
Epoch 7/10
[1m1701/1701[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s

In [None]:
from google.colab import files

# Save model to H5 file
# files.download('gdrive/My Drive/Capstone Project/ML Code/jgo.h5')

# Save model to Pickle file
# import pickle
# with open('gdrive/My Drive/Capstone Project/ML Code/jgo.pkl', 'wb') as f:
#     pickle.dump(model, f)
# files.download('gdrive/My Drive/Capstone Project/ML Code/jgo.pkl')