# Preference Page (Content Based)

In [None]:
# !pip install Sastrawi
# !pip install fuzzywuzzy
# !pip install tensorflow

import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

from fuzzywuzzy import fuzz
from nltk.tokenize import word_tokenize
from pathlib import Path
from zipfile import ZipFile

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import regularizers
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Embedding, Flatten, Concatenate, Input

from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory

from sklearn.impute import SimpleImputer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

tv = TfidfVectorizer(max_features=5000)
stem = StemmerFactory().create_stemmer()
stopword = StopWordRemoverFactory().create_stop_word_remover()

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
tourism = pd.read_csv('/content/drive/MyDrive/datasetRevisi.csv', encoding='latin-1')
rating = pd.read_csv('/content/drive/MyDrive/ratingDataset.csv')

In [None]:
tourism.head()

In [None]:
rating.head()

In [None]:
#Join/merge dataset tourism dan rating berdasarkan id tempat wisata(place_id)
df = tourism.merge(rating, how="left", on="Place_Id")
#menampilkan beberapa data pada dataframe
df.head()

Unnamed: 0,Place_Id,Place_Name,Description,Weekend_Price,Weekday_Price,Category,City,Rating,Alamat,Coordinate,Lat,Long,Gambar,Unnamed: 13,User_Id,Nama Tempat,Place_Rating
0,2,Agung Bali,Dapatkan berbagai produk oleh-oleh khas Bali b...,0,0,Belanja,Badung,4.0,"Jln. Dewi Sri No.18XX, Kuta, Badung, Bali","-8.700234336021559, 115.176534407375",-8.700234,115.176534,V,https://oleholehagungbali.com/,1,Agung Bali Oleh-Oleh,3
1,2,Agung Bali,Dapatkan berbagai produk oleh-oleh khas Bali b...,0,0,Belanja,Badung,4.0,"Jln. Dewi Sri No.18XX, Kuta, Badung, Bali","-8.700234336021559, 115.176534407375",-8.700234,115.176534,V,https://oleholehagungbali.com/,2,Agung Bali Oleh-Oleh,3
2,2,Agung Bali,Dapatkan berbagai produk oleh-oleh khas Bali b...,0,0,Belanja,Badung,4.0,"Jln. Dewi Sri No.18XX, Kuta, Badung, Bali","-8.700234336021559, 115.176534407375",-8.700234,115.176534,V,https://oleholehagungbali.com/,3,Agung Bali Oleh-Oleh,5
3,2,Agung Bali,Dapatkan berbagai produk oleh-oleh khas Bali b...,0,0,Belanja,Badung,4.0,"Jln. Dewi Sri No.18XX, Kuta, Badung, Bali","-8.700234336021559, 115.176534407375",-8.700234,115.176534,V,https://oleholehagungbali.com/,6,Agung Bali Oleh-Oleh,3
4,2,Agung Bali,Dapatkan berbagai produk oleh-oleh khas Bali b...,0,0,Belanja,Badung,4.0,"Jln. Dewi Sri No.18XX, Kuta, Badung, Bali","-8.700234336021559, 115.176534407375",-8.700234,115.176534,V,https://oleholehagungbali.com/,8,Agung Bali Oleh-Oleh,5


In [None]:
def preprocessing(data):
    data = data.lower()
    # data = stem.stem(data)
    data = stopword.remove(data)
    return data

In [None]:
data_tourism = tourism.copy()
for index, row in data_tourism.iterrows():
    if row['Weekend_Price'] == 0 and row['Weekday_Price'] == 0:
        data_tourism.at[index, 'Price Tags'] = 'Free'
    elif row['Weekend_Price'] <= 120000 and row['Weekday_Price'] <= 120000:
        data_tourism.at[index, 'Price Tags'] = 'Middle'
    elif row['Weekend_Price'] > 120000 and row['Weekday_Price'] > 120000:
        data_tourism.at[index, 'Price Tags'] = 'Expensive'

for index, row in data_tourism.iterrows():
    if row['Category'] == 'Agrowisata':
        data_tourism.at[index, 'Category Tags'] = 'Agrotourism'
    elif row['Category'] == 'Belanja':
        data_tourism.at[index, 'Category Tags'] = 'Shopping'
    elif row['Category'] == 'Pantai':
        data_tourism.at[index, 'Category Tags'] = 'Beach'
    elif row['Category'] == 'Religius':
        data_tourism.at[index, 'Category Tags'] = 'Religious'
    elif row['Category'] == 'Budaya':
        data_tourism.at[index, 'Category Tags'] = 'Culture'
    elif row['Category'] == 'Rekreasi':
        data_tourism.at[index, 'Category Tags'] = 'Recreation'
    elif row['Category'] == 'Cagar Alam':
        data_tourism.at[index, 'Category Tags'] = 'Biodiversity'
    elif row['Category'] == 'Alam':
        data_tourism.at[index, 'Category Tags'] = 'Nature'

data_tourism['Tags']  = data_tourism['Category Tags'] + ' ' + data_tourism['City'] + ' ' + data_tourism['Price Tags']
data_tourism.drop(['Gambar','Coordinate','Lat', 'Long','Unnamed: 13'],axis=1,inplace=True)
# data_tourism

In [None]:
data_tourism.Tags = data_tourism.Tags.apply(preprocessing)
data_tourism

Unnamed: 0,Place_Id,Place_Name,Description,Weekend_Price,Weekday_Price,Category,City,Rating,Alamat,Price Tags,Category Tags,Tags
0,2,Agung Bali,Dapatkan berbagai produk oleh-oleh khas Bali b...,0,0,Belanja,Badung,4.0,"Jln. Dewi Sri No.18XX, Kuta, Badung, Bali",Free,Shopping,shopping badung free
1,10,Bali Swing,Bali Swing Tegalalang yang menggantung di anta...,500000,500000,Rekreasi,Badung,4.5,"Jl. Dewi Saraswati No.7, Bongkasa Pertiwi, Kec...",Expensive,Recreation,recreation badung expensive
2,22,Garuda Wisnu Kencana,Patung Garuda Wisnu Kencana menjadi ikon pariw...,80000,70000,Budaya,Badung,3.5,"Jl. Raya Uluwatu, Ungasan, Kec. Kuta Sel., Kab...",Middle,Culture,culture badung middle
3,29,Pantai Jimbaran,Pantai Jimbaran Bali adalah salah satu pantai ...,0,0,Pantai,Badung,4.0,"Kecamatan Kuta selatan, kabupaten Badung, Prov...",Free,Beach,beach badung free
4,30,Pabrik Kata-Kata Joger,Bali Joger terkenal di kalangan wisatawan seba...,0,0,Belanja,Badung,4.0,"Jl. Raya Kuta, Kuta, Kec. Kuta, Kabupaten Badu...",Free,Shopping,shopping badung free
...,...,...,...,...,...,...,...,...,...,...,...,...
70,50,Pantai Pasih Uug,Broken Beach atau yang lebih dikenal sebagai P...,0,0,Pantai,Klungkung,4.5,"Lembongan, Kec. Nusa Penida, Kabupaten Klungku...",Free,Beach,beach klungkung free
71,16,Danau Beratan Bedugul,Danau Beratan Bedugul merupakan sebuah danau y...,30000,30000,Alam,Tabanan,4.5,"Kec. Baturiti, Kabupaten Tabanan, Bali",Middle,Nature,nature tabanan middle
72,28,Jatiluwih Green Land,Nominasi untuk penunjukan sebagai situs Warisa...,40000,40000,Agrowisata,Tabanan,4.5,"Jl. Jatiluwih Kawan No.Desa, Jatiluwih, Kec. P...",Middle,Agrotourism,agrotourism tabanan middle
73,61,Pura Luhur Batukaru,Pura Luhur Batukaru adalah sebuah pura yang te...,15000,15000,Religius,Tabanan,4.5,"Jl. Penatahan - Wongayagede, Wongaya Gede, Kec...",Middle,Religious,religious tabanan middle


In [None]:
# inisialisasi untuk mengubah teks menjadi representasi TF-IDF.
tfidf_vectorizer = TfidfVectorizer()

# Ubah Combined_Text menjadi vektor TF-IDF / mengubah kumpulan teks menjadi representasi vektor
tfidf_matrix = tfidf_vectorizer.fit_transform(data_tourism['Tags'])

# Melakukan normalisasi pada kolom 'Rating' menggunakan rumus Min-Max Scaling
normalized_rating = (data_tourism['Rating'].min()) / (data_tourism['Rating'].max() - data_tourism['Rating'].min())

# Memperbarui kolom 'Rating' dalam dataframe data_content_based_filtering dengan nilai-nilai yang sudah dinormalisasi.
data_tourism['Rating'] = normalized_rating

# Menginisialisai fungsi rekomendasi
def recommend_places(query, top_n=10):
    # memproses query dengan mengubahnya menjadi huruf kecil
    processed_query = query.lower()

    # Transformasi query menjadi vektor TF-IDF menggunakan transform() dari objek tfidf_vectorizer.
    query_vector = tfidf_vectorizer.transform([processed_query])

    # Menghitung cosine_similarities antara vektor query dan matriks TF-IDF menggunakan cosine_similarity().
    # Hasilnya kemudian diflatten menjadi satu dimensi
    cosine_similarities = cosine_similarity(query_vector, tfidf_matrix).flatten()

    # Mengalikan cosine_similarities dengan nilai rating yang sudah dinormalisasi untuk mendapatkan skor akhir.
    # Skor ini mencerminkan seberapa relevan tempat wisata dengan query pengguna.
    scores = cosine_similarities * data_tourism['Rating']

    # Mengurutkan indeks skor dari yang tertinggi ke terendah
    # dan membatasi hanya sejumlah top_n tempat dengan slicing [:top_n].
    top_indices = scores.argsort()[::-1][:top_n]

    # Mengambil Place_Name dari data_content_based_filtering berdasarkan indeks yang telah diurutkan
    # dan mengembalikan hasilnya dalam bentuk recommended_places.
    recommended_places = data_tourism.iloc[top_indices]['Place_Name']

    return recommended_places

# Mendapatkan input dari pengguna dan menampilkan rekomendasi tempat
def get_user_input():
    category = input("Enter the category: ").split(',')
    city = input("Enter the city: ")
    price_tags = input("Enter the price category: ")
    query = f"{category} {city} {price_tags} "
    recommendations = recommend_places(query)
    print("Recommended places:")
    for place in recommendations:
        print(place)

# Panggil fungsi get_user_input()
get_user_input()

Enter the category: nature, culture
Enter the city: bangli
Enter the price category: middle
Recommended places:
Desa Wisata Penglipuran 
Kintamani Highland
Gunung Batur
Danau Batur 
Air Terjun Tukad Cepung
Wisata Air Panas Toya Bungkah
Pura Kehen
Tari Barong dan Keris

Garuda Wisnu Kencana
Air Terjun Tegenungan
