In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, MinMaxScaler

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Membaca Data dari Dataset
destinations = pd.read_csv('/content/drive/MyDrive/data/Data_Destination_Tourism_West_java.csv', encoding = 'ISO-8859-1')
ratings = pd.read_csv('/content/drive/MyDrive/data/tourism_rating.csv')

# Jumlah data
print('Jumlah tempat wisata: ', len(destinations.Place_Id.unique()))
print('Jumlah rating: ', len(ratings.Place_Ratings))

Jumlah tempat wisata:  523
Jumlah rating:  10300


In [None]:
# Konversi kolom 'Latitude' ke float64
destinations['Latitude'] = pd.to_numeric(destinations['Latitude'], errors='coerce')

# Konversi kolom 'Rating ke float64
destinations['Rating'] = pd.to_numeric(destinations['Rating'], errors = 'coerce')

# Periksa kembali informasi dataset
print(destinations.info())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 523 entries, 0 to 522
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Place_Id     523 non-null    int64  
 1   Place_Name   523 non-null    object 
 2   Description  523 non-null    object 
 3   Category     523 non-null    object 
 4   City         523 non-null    object 
 5   Price        523 non-null    int64  
 6   Rating       431 non-null    float64
 7   No.Telepon   523 non-null    object 
 8   Coordinate   523 non-null    object 
 9   Latitude     521 non-null    float64
 10  Longitude    523 non-null    float64
dtypes: float64(3), int64(2), object(6)
memory usage: 45.1+ KB
None


In [None]:
destinations.head()

Unnamed: 0,Place_Id,Place_Name,Description,Category,City,Price,Rating,No.Telepon,Coordinate,Latitude,Longitude
0,1,Jalan Braga,Jalan Braga adalah nama sebuah jalan utama di ...,Budaya,Bandung,0,4.7,(021) 2254 5456,"{'lat': -6.9150534, 'lng': 107.6089842}",-6.915053,107.608984
1,2,Gedung Sate,"Gedung Sate, dengan ciri khasnya berupa orname...",Budaya,Bandung,5000,4.6,087880333592,"{'lat': -6.9024812, 'lng': 107.61881}",-6.902481,107.61881
2,3,Trans Studio Bandung,Trans Studio Bandung adalah kawasan wisata ter...,Taman Hiburan,Bandung,280000,4.5,08118118244,"{'lat': -6.9250943, 'lng': 107.6364944}",-6.925094,107.636494
3,4,Taman Hutan Raya Ir. H. Djuanda,Taman Hutan Raya Ir. H. Djuanda (Tahura Djuand...,Cagar Alam,Bandung,15000,4.5,(022) 2507891,"{'lat': -6.8565791, 'lng': 107.6323734}",-6.856579,107.632373
4,5,Farm House Susu Lembang,"Sebagai salah satu tempat wisata lembang baru,...",Taman Hiburan,Bandung,30000,4.4,082240207230,"{'lat': -6.832968999999999, 'lng': 107.6056183}",-6.832969,107.605618


In [None]:
ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10300 entries, 0 to 10299
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype
---  ------         --------------  -----
 0   User_Id        10300 non-null  int64
 1   Place_Id       10300 non-null  int64
 2   Place_Ratings  10300 non-null  int64
dtypes: int64(3)
memory usage: 241.5 KB


In [None]:
ratings.head()

Unnamed: 0,User_Id,Place_Id,Place_Ratings
0,1,179,3
1,1,344,2
2,1,5,5
3,1,373,3
4,1,101,4


In [None]:
ratings.describe()

Unnamed: 0,User_Id,Place_Id,Place_Ratings
count,10300.0,10300.0,10300.0
mean,156.075437,227.01932,3.079903
std,89.265122,131.964686,1.37633
min,1.0,1.0,1.0
25%,79.0,112.0,2.0
50%,156.0,226.0,3.0
75%,232.0,338.0,4.0
max,330.0,523.0,5.0


### Menghapus Kolom yang tidak perlu


Data yang diperlukan hanya ada pada kolom Place_Id, Place_Name, category, dan City


In [None]:
destinations = destinations.drop(['Description', 'Price', 'Rating', 'No.Telepon', 'Coordinate', 'Latitude', 'Longitude'], axis = 1)

In [None]:
# Pengecekan missing value destinasi wisata
destinations.isnull().sum()

Place_Id      0
Place_Name    0
Category      0
City          0
dtype: int64

In [None]:
# Pengecekan missing value rating pengguna
ratings.isnull().sum()

User_Id          0
Place_Id         0
Place_Ratings    0
dtype: int64

In [None]:
# Pengecekan data duplikan

print(f'Jumlah data destinasi wisata yang duplikat: {destinations.duplicated().sum()}')
print(f'Jumlah data rating pengguna wisata yang duplikat: {ratings.duplicated().sum()}')

Jumlah data destinasi wisata yang duplikat: 0
Jumlah data rating pengguna wisata yang duplikat: 83


In [None]:
# Menghapus data duplikat (Ratings)

ratings.drop_duplicates(inplace = True)

### Modeling Sistem Rekomendasi dengan Content Based Filtering

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
# Mengganti '_' dengan spasi dan menghapus spasi ekstra pada kolom 'City'
destinations['City'] = destinations['City'].apply(lambda x: x.replace('_', ' ') if '_' in x else x)
destinations['City'] = destinations['City'].apply(lambda x: ' '.join(x.split()))

# Membuat objek TfidfVectorizer
tf = TfidfVectorizer()

# Melatih vektorizer dengan data 'City'
tf.fit(destinations['City'])

# Mendapatkan daftar fitur (kata) yang diekstrak dari 'City'
feature_names = tf.get_feature_names_out()


In [None]:
tfidf_matrix = tf.fit_transform(destinations['City'])
tfidf_matrix.shape

(523, 21)

In [None]:
tfidf_matrix.todense()

matrix([[1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]])

In [None]:
pd.DataFrame(
    tfidf_matrix.todense(),
    columns = tf.get_feature_names_out(),
    index = destinations.Place_Name
).sample(15, axis = 0)

Unnamed: 0_level_0,bandung,banjar,bekasi,bogor,ciamis,cianjur,cimahi,cirebon,depok,garut,...,karawang,kota,kuningan,majalengka,pangandaran,purwakarta,subang,sukabumi,sumedang,tasikmalaya
Place_Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Taman Wisata Karang Resik,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.757089,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.653312
Darajat Pass,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
D'Kandang Amazing Farm,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Curug Anom,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Cirata Jangari,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Taman Tjimanoek Indramayu,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Nangorak Camp,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
Trans Studio Bandung,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Godongijo,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Agroeduwisata situbolang,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Consine Similarity

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

cosine_simlrty = cosine_similarity(tfidf_matrix)
cosine_simlrty

array([[1., 1., 1., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 1., 1.],
       [0., 0., 0., ..., 1., 1., 1.],
       [0., 0., 0., ..., 1., 1., 1.]])

In [None]:
cosine_sim_df = pd.DataFrame(
    cosine_simlrty, index=destinations['Place_Name'], columns=destinations['Place_Name'])

print('Shape: ', cosine_sim_df.shape)
cosine_sim_df.sample(20, axis = 0)

Shape:  (523, 523)


Place_Name,Jalan Braga,Gedung Sate,Trans Studio Bandung,Taman Hutan Raya Ir. H. Djuanda,Farm House Susu Lembang,Kebun Binatang Bandung,Kawah Putih,Tebing Karaton,Museum Geologi Bandung,Museum Konferensi Asia Afrika,...,Taman Lembah Gurame,Taman Herbal Insani Depok,Pondok Zidane,Godongijo,Taman Rekreasi Wiladatika,Setu Pengasinan,Taman Cibubur Ciraos Adib RS,Green Lake View Waterpark,Kampung 3D Depok,Tugu Batu Sawangan
Place_Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Situgunung camping ground,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Jendela Alam,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Curug Sodong,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Wahana Tirta Pasir Raya Panjalu,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Taman Kota,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Karacak Valley,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Curug Cikaso,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Curug Agung Galunggung,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Tahura Gunung Kunci,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Curug Sanghyang Taraje,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Recommendation Testing

In [None]:
def place_recommendation(place_name, similarity_data = cosine_sim_df, items = destinations[['Place_Name', 'Category', 'City']], k =10):
  index = similarity_data.loc[:,place_name].to_numpy().argpartition(range(-1, -k, -1))
  clossest = similarity_data.columns[index[-1:-(k+2):-1]]
  clossest = clossest.drop(place_name, errors = 'ignore')
  return pd.DataFrame(clossest).merge(items).head(k)

In [None]:
place_name = 'Jalan Braga'
destinations[destinations.Place_Name.eq(place_name)]

Unnamed: 0,Place_Id,Place_Name,Category,City
0,1,Jalan Braga,Budaya,Bandung


In [None]:
place_recommendation(place_name)

Unnamed: 0,Place_Name,Category,City
0,Taman Hutan Raya Ir. H. Juanda,Cagar Alam,Bandung
1,Taman Begonia,Cagar Alam,Bandung
2,Tafso Barn,Cagar Alam,Bandung
3,Curug Cimahi,Cagar Alam,Bandung
4,The Lodge Maribaya,Cagar Alam,Bandung
5,Kawah Rengganis Cibuni,Cagar Alam,Bandung
6,Taman Kupu-Kupu Cihanjuang,Cagar Alam,Bandung
7,Taman Kupu-Kupu Cihanjuang,Cagar Alam,Cimahi
8,Glamping Lakeside Rancabali,Taman Hiburan,Bandung
9,Situ Patenggang,Cagar Alam,Bandung
