In [4]:
import mysql.connector
# library for data analysis
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tensorflow.keras.optimizers import Adam
import tensorflow as tf
from tensorflow.keras.models import Model 
from tensorflow.keras.layers import Input, Embedding, Dot, Flatten, Dense
from sklearn.model_selection import train_test_split 
from sklearn.metrics.pairwise import cosine_similarity

import os 

In [5]:
# Membuat koneksi ke database
conn = mysql.connector.connect(
    host='localhost',
    user='root',
    password='',
    database='absensi'
)

In [6]:
# Mengeksekusi query untuk mengambil data dari tabel

query = "SELECT * FROM tourism"
destination = pd.read_sql_query(query, conn)

query = "SELECT * FROM ratings"
ratings = pd.read_sql_query(query, conn)

  destination = pd.read_sql_query(query, conn)
  ratings = pd.read_sql_query(query, conn)


In [7]:
destination.head(2)

Unnamed: 0,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long
0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,15,"{'lat': -6.1753924, 'lng': 106",-6.17539,106.8272
1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,5000,4.6,90,"{'lat': -6.137644799999999, 'l",-6.13764,106.8171


In [8]:
ratings.head(2)

Unnamed: 0,User_Id,Place_Id,Place_Ratings
0,0,0,0
1,1,179,3


In [9]:
print(f'number of tourist destination = {len(destination.Place_Id.unique())}')
print(f'number of user = {len(ratings.User_Id.unique())}')

number of tourist destination = 437
number of user = 301


In [10]:
destination = destination.drop(['Coordinate', 'Time_Minutes'], axis=1)
destination.head(2)

Unnamed: 0,Place_Id,Place_Name,Description,Category,City,Price,Rating,Lat,Long
0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,-6.17539,106.8272
1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,5000,4.6,-6.13764,106.8171


## Collaborative Filtering
Collaborative Filtering uses algorithms to filter data from user reviews to make personalized recommendations for users with similar preferences

### Split data for Training and Testing

In [11]:
train, test = train_test_split(ratings, test_size = 0.2)

In [12]:
print(train.shape)
print(test.shape)

(8000, 3)
(2001, 3)


In [13]:
number_user = len(ratings['User_Id'].unique())
number_destination = len(ratings['Place_Id'].unique())

print(f'number of user = {number_user}')
print(f'number of place name = {number_destination}')

number of user = 301
number of place name = 438


In [14]:
# build recommendation system using emmbedding layers 
EMBEDDING_DIM = 50

# input layers 
place_input = Input(shape=[1])
user_input = Input(shape=[1])

# embedding layers
place_embedding = Embedding(number_destination+1 , EMBEDDING_DIM)(place_input)
user_embedding = Embedding(number_user+1 , EMBEDDING_DIM)(user_input)

# flatte the embedddings
place_flat = Flatten()(place_embedding)
user_flat = Flatten()(user_embedding)

# add dense layers
dense1 = Dense(64, activation='relu')(place_flat)
dense2 = Dense(64, activation='relu')(user_flat)

# output layer
output = Dot(1)([dense1, dense2])

# the model
model = Model([place_input, user_input], [output])

In [15]:
model.compile(loss = 'mean_squared_error', 
              optimizer = Adam(learning_rate = 0.0005)
              )

In [16]:
history = model.fit(x= [train.Place_Id, train.User_Id], 
                    y= train.Place_Ratings, 
                    validation_data = ([test.Place_Id, test.User_Id], test.Place_Ratings), 
                    batch_size =32 , 
                    epochs =30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


## Collaborative Recomendation System

In [24]:
# recommendation system function using collaborative filtering
def collaborative_rec(User_Id, destination ,model, np_val, detail_user = []):
    # detail user : digunakan untuk menyimpan data detail user untuk mengerucutkan data yang akan di outputkan
    
    if User_Id in ratings['User_Id'].values:
            destination = destination.copy()
            user_ids = np.array([User_Id] * len(destination))
            results = model([destination.Place_Id.values, user_ids]).numpy().reshape(-1)
    
            destination['predicted_rating'] = pd.Series(results)
            destination = destination.sort_values('predicted_rating', ascending = False)
    else:
            destination = destination.copy()
            destination = destination.sort_values('Rating', ascending = False)
    
    if len(detail_user) != 0:
        destination = destination[destination['City'] == detail_user[0]]
    
    dataFinal = destination[:np_val]
    
    return dataFinal['Place_Id'].tolist()

In [18]:
detail_user = ['Bandung']

In [25]:
collaborative_rec(34, destination, model, 5, detail_user)

[261, 251, 227, 332, 264]

<H1>Recomendation system based from category</H1>

In [20]:
# fungsi untuk melakukan groouping data
def groupingCategory(df, budget, totalCategory, excepts = []):
    data = []
    
    if len(excepts) == 0:
        for idx, row in df.iterrows():
            if len(data) == totalCategory:
                break
            if row['Price'] < budget:
                data.append(row['Place_Id'])
                budget -= row['Price']    
    else:
        for x in excepts:
            if df['Place_Id'].eq(x).any():
                df = df.loc[df['Place_Id'] != x]
                
        for idx, row in df.iterrows():
            if len(data) == totalCategory:
                break
            if row['Price'] < budget:
                data.append(row['Place_Id'])
                budget -= row['Price']
        
    return data

In [21]:
# Function to recommend places based on user input
def recommend_places(df, category, city, price, rating, lat, long, top_n=50):
    # Filter dataset based on user input
    filtered_df = df[(df['Category'] == category) & (df['City'] == city) & (df['Price'] <= price) & (df['Rating'] >= rating)]
    
    # Calculate cosine similarity between user input and dataset
    user_input = [[price, rating, lat, long]]
    dataset = filtered_df[['Price', 'Rating', 'Lat', 'Long']]
    similarity_matrix = cosine_similarity(user_input, dataset)
    
    # Sort places based on similarity score
    filtered_df['Similarity'] = similarity_matrix[0]
    recommended_places = filtered_df.sort_values(by='Similarity', ascending=False).head(top_n)
    
    gold = []
    silver = []
    bronze = []
    
    gold = groupingCategory(recommended_places, price, 5)
    silver = groupingCategory(recommended_places, price, 5, gold)
    bronze = groupingCategory(recommended_places, price, 5, (silver + gold))
                
    return {"gold": gold, "silver": silver, "bronze": bronze}

In [22]:
# Example usage
category = 'Taman Hiburan'
city = 'Jakarta'
price = 50000
rating = 4.0
lat = -6.20000
long = 106.80000

In [23]:
recommendations = recommend_places(destination, category, city, price, rating, lat, long)
recommendations

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['Similarity'] = similarity_matrix[0]


{'gold': [28, 51, 78, 54, 77],
 'silver': [62, 4, 58, 47, 30],
 'bronze': [6, 76, 33, 79, 57]}