# Import library

In [312]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.neighbors import NearestNeighbors
from sklearn.compose import ColumnTransformer
import numpy as np
from joblib import dump

# Load Dataset

In [194]:
wisata = pd.read_json("dataset.json")
wisata.head()

Unnamed: 0,id,name,category,location,image,rating,description,coordinate
0,1,Kawah Putih,Alam,"Bandung, Jawa Barat",https://images.unsplash.com/photo-159888094008...,4.8,Danau kawah vulkanik dengan air berwarna putih...,"{'latitude': -7.1655, 'longitude': 107.3999}"
1,2,Tangkuban Perahu,Alam,"Bandung, Jawa Barat",https://images.unsplash.com/photo-158866821440...,4.7,Gunung berapi aktif dengan kawah besar berbent...,"{'latitude': -6.7708, 'longitude': 107.6005}"
2,3,Farmhouse Lembang,Wisata Keluarga,"Lembang, Bandung",https://images.unsplash.com/photo-160056675235...,4.6,Taman rekreasi bertema pedesaan Eropa dengan r...,"{'latitude': -6.8117, 'longitude': 107.6175}"
3,4,Saung Angklung Udjo,Budaya,"Bandung, Jawa Barat",https://images.unsplash.com/photo-151628044061...,4.9,Pusat budaya Sunda yang menampilkan pertunjuka...,"{'latitude': -6.9175, 'longitude': 107.6191}"
4,5,Kampung Daun,Kuliner,"Bandung, Jawa Barat",https://images.unsplash.com/photo-151724813546...,4.5,Restoran alam dengan konsep gazebo-gazebo di a...,"{'latitude': -6.8506, 'longitude': 107.6339}"


# Preprocessing Data

In [195]:
wisata[['latitude', 'longitude']] = pd.json_normalize(wisata['coordinate'])
wisata.head()

Unnamed: 0,id,name,category,location,image,rating,description,coordinate,latitude,longitude
0,1,Kawah Putih,Alam,"Bandung, Jawa Barat",https://images.unsplash.com/photo-159888094008...,4.8,Danau kawah vulkanik dengan air berwarna putih...,"{'latitude': -7.1655, 'longitude': 107.3999}",-7.1655,107.3999
1,2,Tangkuban Perahu,Alam,"Bandung, Jawa Barat",https://images.unsplash.com/photo-158866821440...,4.7,Gunung berapi aktif dengan kawah besar berbent...,"{'latitude': -6.7708, 'longitude': 107.6005}",-6.7708,107.6005
2,3,Farmhouse Lembang,Wisata Keluarga,"Lembang, Bandung",https://images.unsplash.com/photo-160056675235...,4.6,Taman rekreasi bertema pedesaan Eropa dengan r...,"{'latitude': -6.8117, 'longitude': 107.6175}",-6.8117,107.6175
3,4,Saung Angklung Udjo,Budaya,"Bandung, Jawa Barat",https://images.unsplash.com/photo-151628044061...,4.9,Pusat budaya Sunda yang menampilkan pertunjuka...,"{'latitude': -6.9175, 'longitude': 107.6191}",-6.9175,107.6191
4,5,Kampung Daun,Kuliner,"Bandung, Jawa Barat",https://images.unsplash.com/photo-151724813546...,4.5,Restoran alam dengan konsep gazebo-gazebo di a...,"{'latitude': -6.8506, 'longitude': 107.6339}",-6.8506,107.6339


In [196]:
wisata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   id           100 non-null    int64  
 1   name         100 non-null    object 
 2   category     100 non-null    object 
 3   location     100 non-null    object 
 4   image        100 non-null    object 
 5   rating       100 non-null    float64
 6   description  100 non-null    object 
 7   coordinate   100 non-null    object 
 8   latitude     100 non-null    float64
 9   longitude    100 non-null    float64
dtypes: float64(3), int64(1), object(6)
memory usage: 7.9+ KB


In [197]:
wisata.describe()

Unnamed: 0,id,rating,latitude,longitude
count,100.0,100.0,100.0,100.0
mean,50.5,4.565,-6.927086,107.563906
std,29.011492,0.155294,0.278158,0.5181
min,1.0,4.2,-7.7333,106.5466
25%,25.75,4.475,-6.957775,107.3027
50%,50.5,4.6,-6.89705,107.61735
75%,75.25,4.7,-6.733975,107.6661
max,100.0,4.9,-6.5562,108.6634


In [198]:
wisata.isna().sum()

id             0
name           0
category       0
location       0
image          0
rating         0
description    0
coordinate     0
latitude       0
longitude      0
dtype: int64

In [199]:
wisata = wisata.drop(columns=["coordinate"])

In [200]:
print(f"Jumlah data duplikat : {wisata.duplicated().sum()}")

Jumlah data duplikat : 0


In [201]:
wisata[wisata["category"] == "Taman Kota"]

Unnamed: 0,id,name,category,location,image,rating,description,latitude,longitude
61,62,Taman Balai Kota Bandung,Taman Kota,"Bandung, Jawa Barat",https://images.unsplash.com/photo-161667629452...,4.4,"Taman kota yang sejuk dengan air mancur, area ...",-6.9147,107.6098
80,81,Alun-Alun Kota Garut,Taman Kota,"Garut, Jawa Barat",https://images.unsplash.com/photo-161870125829...,4.3,"Taman kota ikonik dengan air mancur, arena ber...",-7.2101,107.9086


In [202]:
wisata.loc[wisata["category"] == "Taman Alam", "category"] = "Taman"
wisata.loc[wisata["category"] == "Taman Botani", "category"] = "Taman"
wisata.loc[wisata["name"] == "Alun-Alun Bandung", "category"] = "Taman Kota"

In [203]:
wisata["category"].unique()

array(['Alam', 'Wisata Keluarga', 'Budaya', 'Kuliner',
       'Pemandian Air Panas', 'Pantai', 'Taman Kota', 'Kebun Binatang',
       'Pegunungan', 'Taman Hiburan', 'Taman', 'Sejarah', 'Danau'],
      dtype=object)

In [204]:
wisata[wisata["category"] == "Pemandian Air Panas"]

Unnamed: 0,id,name,category,location,image,rating,description,latitude,longitude
9,10,Pemandian Air Panas Alam Sari Ater,Pemandian Air Panas,"Subang, Jawa Barat",https://images.unsplash.com/photo-158133720487...,4.4,Kolam air panas alami dengan fasilitas lengkap...,-6.5608,107.7589
83,84,Sumber Air Panas Cibolang,Pemandian Air Panas,"Pangalengan, Bandung",https://images.unsplash.com/photo-162180151073...,4.5,Pemandian air panas alami dengan latar pegunun...,-7.1542,107.5861


In [207]:
import json
# Load data tempat
with open('dataset.json', 'r') as f:
    places = json.load(f)

In [208]:
import json
import requests
import time

# Ganti dengan nama file kamu
with open('dataset.json', 'r') as f:
    places = json.load(f)

for place in places:
    full_address = f"{place['name']}, {place['location']}"
    url = 'https://nominatim.openstreetmap.org/search'
    params = {
        'q': full_address,
        'format': 'json',
        'limit': 1
    }
    headers = {
        'User-Agent': 'Mozilla/5.0 (compatible; MyProject/1.0)'  # Penting!
    }

    response = requests.get(url, params=params, headers=headers)
    data = response.json()

    if data:
        place['coordinate']['latitude'] = float(data[0]['lat'])
        place['coordinate']['longitude'] = float(data[0]['lon'])
        print(f"Updated: {place['name']} => {data[0]['lat']}, {data[0]['lon']}")
    else:
        print(f"Gagal cari lokasi: {place['name']}")

    time.sleep(1)  # biar gak di-rate limit

# Simpan file baru
with open('dataset_updated.json', 'w') as f:
    json.dump(places, f, indent=2)

print("Selesai! Lihat file 'places_updated.json'")


Updated: Kawah Putih => -7.16611815, 107.40226688508221
Updated: Tangkuban Perahu => -6.7742798, 107.6361233
Updated: Farmhouse Lembang => -6.8327478, 107.6052985
Updated: Saung Angklung Udjo => -6.898269450000001, 107.65529705969041
Updated: Kampung Daun => -6.816085, 107.589551
Updated: Tebing Keraton => -6.8348584, 107.6638952
Gagal cari lokasi: Situ Patenggang
Gagal cari lokasi: Ranca Upas
Gagal cari lokasi: Kebun Begonia Lembang
Gagal cari lokasi: Pemandian Air Panas Alam Sari Ater
Gagal cari lokasi: Curug Maribaya
Updated: Taman Hutan Raya Ir. H. Juanda => -6.8411686, 107.65037816503335
Updated: Stone Garden => -6.8247319, 107.4381247
Gagal cari lokasi: Gunung Pancar
Updated: Taman Safari Indonesia => -6.7201709, 106.95195277895067
Updated: Kebun Raya Bogor => -6.5983032, 106.79941871332014
Updated: Puncak Bogor => -6.4759723, 106.5990915
Updated: Taman Bunga Nusantara => -6.7277478, 107.0794088
Updated: Pantai Pangandaran => -7.68905865, 108.64359043619461
Updated: Green Canyon 

In [None]:
new_wisata = pd.read_json("dataset_updated.json")
new_wisata["category"].unique()

array(['Alam', 'Wisata Keluarga', 'Budaya', 'Kuliner',
       'Pemandian Air Panas', 'Pantai', 'Kebun Binatang', 'Taman Kota',
       'Pegunungan', 'Taman Hiburan', 'Taman Alam', 'Taman Botani',
       'Sejarah', 'Danau'], dtype=object)

In [217]:
new_wisata.loc[new_wisata["category"] == "Taman Alam", "category"] = "Taman"
new_wisata.loc[new_wisata["category"] == "Taman Botani", "category"] = "Taman"
new_wisata.loc[new_wisata["name"] == "Alun-Alun Bandung", "category"] = "Taman Kota"

In [219]:
new_wisata["category"].unique()

array(['Alam', 'Wisata Keluarga', 'Budaya', 'Kuliner',
       'Pemandian Air Panas', 'Pantai', 'Taman Kota', 'Kebun Binatang',
       'Pegunungan', 'Taman Hiburan', 'Taman', 'Sejarah', 'Danau'],
      dtype=object)

# CBF System


In [247]:
# Pisahkan fitur dan target (meskipun kita tidak butuh target label karena ini bukan klasifikasi)
features = new_wisata[["category", "description", "rating"]]

In [284]:
features["category"].unique()

array(['Alam', 'Wisata Keluarga', 'Budaya', 'Kuliner',
       'Pemandian Air Panas', 'Pantai', 'Taman Kota', 'Kebun Binatang',
       'Pegunungan', 'Taman Hiburan', 'Taman', 'Sejarah', 'Danau'],
      dtype=object)

In [254]:
# Preprocessing pipeline
preprocessor = ColumnTransformer([
    ("tfidf", TfidfVectorizer(max_features=100), "description"),
    ("onehot", OneHotEncoder(), ["category"]),
    ("scaler", StandardScaler(), ["rating"])
])

In [255]:
# Transform semua fitur
X = preprocessor.fit_transform(features)

In [256]:
# Train KNN model (untuk mencari tempat mirip berdasarkan preferensi user)
knn_model = NearestNeighbors(n_neighbors=5, metric="cosine")
knn_model.fit(X)

In [257]:
# Simpan ulang nama-nama tempat wisata
place_names = new_wisata["name"].tolist()

# Tampilkan jumlah fitur akhir dan contoh 5 nama tempat
X.shape, place_names[:5]

((100, 114),
 ['Kawah Putih',
  'Tangkuban Perahu',
  'Farmhouse Lembang',
  'Saung Angklung Udjo',
  'Kampung Daun'])

In [292]:
def rekomendasi_tempat_multi(preferensi_kategori_list, deskripsi_user="", rating_user=4.5, top_k=5):
    if len(preferensi_kategori_list) < 3:
        raise ValueError("Minimal 3 kategori harus dipilih.")

    vectors = []
    for kategori in preferensi_kategori_list:
        input_df = pd.DataFrame([{
            "category": kategori,
            "description": deskripsi_user,
            "rating": rating_user
        }])
        vector = preprocessor.transform(input_df).toarray()  # Konversi ke dense array
        vectors.append(vector)

    avg_vector = np.mean(vectors, axis=0)
    distances, indices = knn_model.kneighbors(avg_vector, n_neighbors=top_k)

    rekomendasi = []
    for idx, dist in zip(indices[0], distances[0]):
        rekomendasi.append({
            "name": new_wisata.iloc[idx]["name"],
            "category": new_wisata.iloc[idx]["category"],
            "location": new_wisata.iloc[idx]["location"],
            "image": new_wisata.iloc[idx]["image"],
            "rating": new_wisata.iloc[idx]["rating"],
            "description": new_wisata.iloc[idx]["description"],
            "coordinate": new_wisata.iloc[idx]["coordinate"],
            "similarity": 1 - dist
        })

    return pd.DataFrame(rekomendasi)

## Try CBF

In [None]:
preferensi_nama = "Perkemahan dan juga alam yang sejuk"
preferensi_kategori = ["Alam", "Pegunungan", "Pantai"]

rekomendasi_tempat_multi(preferensi_kategori, preferensi_nama, top_k=10)


Unnamed: 0,name,category,location,image,rating,description,coordinate,similarity
0,Curug Malela,Alam,"Cianjur, Jawa Barat",https://images.unsplash.com/photo-150867201904...,4.5,Air terjun besar yang dijuluki 'Niagara mini' ...,"{'latitude': -7.018068565309185, 'longitude': ...",0.47851
1,Gunung Pancar,Alam,"Sentul, Bogor",https://images.unsplash.com/photo-151149758478...,4.5,Destinasi alam dengan pemandian air panas dan ...,"{'latitude': -6.5799844277853925, 'longitude':...",0.417475
2,Pantai Batu Hiu,Pantai,"Pangandaran, Jawa Barat",https://images.unsplash.com/photo-151985247656...,4.3,Pantai dengan tebing tinggi menyerupai sirip h...,"{'latitude': -7.698389975310129, 'longitude': ...",0.414536
3,Taman Bougenville,Taman,"Bandung, Jawa Barat",https://images.unsplash.com/photo-150922846851...,4.5,Taman alam dengan udara sejuk dan bunga bougen...,"{'latitude': -6.918204931672033, 'longitude': ...",0.407325
4,Karang Nini Beach,Pantai,"Pangandaran, Jawa Barat",https://images.unsplash.com/photo-157201196379...,4.4,Pantai dengan ombak tenang dan pemandangan sun...,"{'latitude': -7.68321612280516, 'longitude': 1...",0.401667
5,Kebun Raya Kuningan,Taman,"Kuningan, Jawa Barat",https://images.unsplash.com/photo-161571941969...,4.4,"Kebun raya yang kaya flora lokal, cocok untuk ...","{'latitude': -6.813036682200968, 'longitude': ...",0.395234
6,Taman Balai Kota Bandung,Taman Kota,"Bandung, Jawa Barat",https://images.unsplash.com/photo-161667629452...,4.4,"Taman kota yang sejuk dengan air mancur, area ...","{'latitude': -6.94274435, 'longitude': 107.624...",0.386013
7,Pantai Santolo,Pantai,"Garut, Jawa Barat",https://images.unsplash.com/photo-158524193693...,4.4,"Pantai dengan pasir putih dan dermaga klasik, ...","{'latitude': -7.650036810821868, 'longitude': ...",0.382965
8,Pantai Karang Hawu,Pantai,"Sukabumi, Jawa Barat",https://images.unsplash.com/photo-158597473470...,4.4,Pantai unik dengan karang menjorok ke laut dan...,"{'latitude': -6.9571414520382735, 'longitude':...",0.382019
9,Kampung Tulip,Alam,"Bandung, Jawa Barat",https://images.unsplash.com/photo-152587302057...,4.4,Taman bunga dengan konsep Belanda yang menampi...,"{'latitude': -6.963002247455258, 'longitude': ...",0.381517


In [None]:
# Contoh input
kategori_list = new_wisata["category"].unique()
for i in kategori_list:
    print(i)
kategori_input = input("Masukkan minimal 3 kategori, pisahkan dengan koma: ")
kategori_user = [k.strip() for k in kategori_input.split(",") if k.strip()]
deskripsi_user = input("Masukan deskripsi tempat yang kamu suka")



Alam
Wisata Keluarga
Budaya
Kuliner
Pemandian Air Panas
Pantai
Taman Kota
Kebun Binatang
Pegunungan
Taman Hiburan
Taman
Sejarah
Danau


In [None]:
# Jalankan fungsi rekomendasi
rekomendasi_tempat_multi(kategori_user, deskripsi_user, top_k=10)

Unnamed: 0,name,category,location,image,rating,description,coordinate,similarity
0,Kebun Raya Kuningan,Taman,"Kuningan, Jawa Barat",https://images.unsplash.com/photo-161571941969...,4.4,"Kebun raya yang kaya flora lokal, cocok untuk ...","{'latitude': -6.813036682200968, 'longitude': ...",0.536438
1,Taman Bougenville,Taman,"Bandung, Jawa Barat",https://images.unsplash.com/photo-150922846851...,4.5,Taman alam dengan udara sejuk dan bunga bougen...,"{'latitude': -6.918204931672033, 'longitude': ...",0.447247
2,Taman Wisata Alam Pangandaran,Taman,"Pangandaran, Jawa Barat",https://images.unsplash.com/photo-161719151800...,4.5,Kawasan hutan konservasi di tepi pantai dengan...,"{'latitude': -7.7051725418057835, 'longitude':...",0.440536
3,Gunung Pancar,Alam,"Sentul, Bogor",https://images.unsplash.com/photo-151149758478...,4.5,Destinasi alam dengan pemandian air panas dan ...,"{'latitude': -6.5799844277853925, 'longitude':...",0.435147
4,Curug Malela,Alam,"Cianjur, Jawa Barat",https://images.unsplash.com/photo-150867201904...,4.5,Air terjun besar yang dijuluki 'Niagara mini' ...,"{'latitude': -7.018068565309185, 'longitude': ...",0.414454
5,Curug Luhur,Alam,"Bogor, Jawa Barat",https://images.unsplash.com/photo-159751386504...,4.4,Air terjun eksotis dengan kolam renang dan fas...,"{'latitude': -7.23348483177532, 'longitude': 1...",0.396149
6,Pantai Batu Hiu,Pantai,"Pangandaran, Jawa Barat",https://images.unsplash.com/photo-151985247656...,4.3,Pantai dengan tebing tinggi menyerupai sirip h...,"{'latitude': -7.698389975310129, 'longitude': ...",0.392386
7,Kampung Tulip,Alam,"Bandung, Jawa Barat",https://images.unsplash.com/photo-152587302057...,4.4,Taman bunga dengan konsep Belanda yang menampi...,"{'latitude': -6.963002247455258, 'longitude': ...",0.35933
8,Kampung Wisata Sindangbarang,Budaya,"Bogor, Jawa Barat",https://images.unsplash.com/photo-157762162139...,4.4,Kampung adat tertua di Bogor yang melestarikan...,"{'latitude': -6.630155992147657, 'longitude': ...",0.35933
9,Curug Sawer,Alam,"Garut, Jawa Barat",https://images.unsplash.com/photo-1549924231-f...,4.4,Air terjun alami tersembunyi di tengah hutan d...,"{'latitude': -7.023613487875099, 'longitude': ...",0.35933


# Menyimpan Model Dalam Bentuk Joblib

In [313]:
# Simpan preprocessor dan model
dump(preprocessor, "preprocessor.joblib")
dump(knn_model, "CBF_model.joblib")

['CBF_model.joblib']