In [None]:
# Install required packages
!pip install sastrawi tensorflowjs



# **Import Library**

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras import layers, models, losses, optimizers
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import StandardScaler
import random

# **Load Data**

In [None]:
df=pd.read_csv('dataset_jogja_pre (1).csv')
df

Unnamed: 0,no,nama,vote_average,vote_count,htm_weekday,htm_weekend,latitude,longitude,type_clean_Agrowisata,type_clean_Alam,...,type_clean_Desa Wisata,type_clean_Kuliner,type_clean_Minat Khusus,type_clean_Museum,type_clean_Pantai,type_clean_Pendidikan,type_clean_Religi,type_clean_Seni,type_clean_Wisata Air,description_clean
0,9,Candi Borobudur,4.7,81922,50000.0,50000.0,-7.607087,110.203623,0,0,...,0,0,0,0,0,0,0,0,0,candi yang pernah masuk sebagai salah satu dar...
1,10,Candi Prambanan,4.7,71751,50000.0,50000.0,-7.751835,110.491532,0,0,...,0,0,0,0,0,0,0,0,0,candi prambanan adalah kompleks candi hindu te...
2,24,Tebing Breksi,4.4,51431,10000.0,10000.0,-7.781477,110.504576,0,1,...,0,0,0,0,0,0,0,0,0,tebing breksi merupakan tempat wisata yang ber...
3,343,Gembira Loka Zoo,4.5,36337,20000.0,25000.0,-7.806234,110.396798,0,0,...,0,0,0,0,0,0,0,0,0,gambira loka adalah kebun binatang yang berada...
4,346,The Palace of Yogyakarta (Keraton Yogyakarta),4.6,30091,8000.0,8000.0,-7.805284,110.364203,0,0,...,0,0,0,0,0,0,0,0,0,kompleks keraton merupakan museum yang menyimp...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
471,139,Pemancingan Adi Winata,5.0,1,0.0,0.0,-7.704577,110.512011,0,0,...,0,0,0,0,0,0,0,0,1,tempat rekreasi keluarga yang menawarkan fasil...
472,105,Ruang Perawatan Jenderal Soedirman,5.0,1,4000.0,4000.0,-7.776474,110.376744,0,0,...,0,0,0,0,0,0,0,0,0,situs sejarah berupa ruangan tempat jenderal s...
473,110,Situs Gedong Pusoko,5.0,1,15000.0,15000.0,-7.807846,110.403758,0,0,...,0,0,0,0,0,0,0,0,0,situs arkeologi yang menyimpan tinggalan sejar...
474,164,Taman Edukasi dan Outbound Sunan Kalijaga,5.0,1,0.0,0.0,-7.809207,110.413252,0,0,...,0,0,0,0,0,0,0,0,0,area edukatif yang menyediakan kegiatan outbou...


# **Preprocessing**

In [None]:
# Preprocessing teks
descriptions = df['description_clean'].astype(str).tolist()
tokenizer = Tokenizer(num_words=5000, oov_token="<OOV>")
tokenizer.fit_on_texts(descriptions)
sequences = tokenizer.texts_to_sequences(descriptions)
padded_sequences = pad_sequences(sequences, padding='post', maxlen=100)

In [None]:
# Buat pasangan data mirip dan tidak mirip
def create_pairs(data):
    pairs = []
    labels = []
    n = len(data)
    for i in range(n):
        # pasangan mirip (label=1)
        j = random.choice([x for x in range(n) if x != i])
        pairs.append([data[i], data[j]])
        labels.append(1 if df.iloc[i]['nama'][:4] == df.iloc[j]['nama'][:4] else 0)  # asumsi mirip jika nama mirip
    return np.array(pairs), np.array(labels)

pairs, labels = create_pairs(padded_sequences)

In [None]:
# ===============================
# 4. FEATURE ENGINEERING
# ===============================
tfidf = TfidfVectorizer(max_features=1000)
tfidf_matrix = tfidf.fit_transform(df['description_clean'])
tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=tfidf.get_feature_names_out())

type_columns = [col for col in df.columns if col.startswith('type_clean_')]
type_df = df[type_columns]

numeric_features = ['vote_average', 'vote_count', 'htm_weekday', 'htm_weekend']
numeric_df = pd.DataFrame(StandardScaler().fit_transform(df[numeric_features]), columns=numeric_features)

def get_tourism_types(row):
    return [col.replace('type_clean_', '').replace('_', ' ') for col in type_columns if row[col] == 1]

df['tourism_types'] = df.apply(get_tourism_types, axis=1)

all_features = pd.concat([numeric_df, type_df, tfidf_df], axis=1)
feature_dim = all_features.shape[1]

In [None]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(pairs, labels, test_size=0.2, random_state=42)

# **Build Model**

In [None]:
# Model base embedding
def build_base_network(input_shape):
    input = tf.keras.Input(shape=input_shape)
    x = layers.Embedding(input_dim=5000, output_dim=64, input_length=input_shape[0])(input)
    x = layers.Conv1D(64, 5, activation='relu')(x)
    x = layers.GlobalMaxPooling1D()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dense(64)(x)
    model = models.Model(input, x)
    return model

In [None]:
# Jarak cosine sebagai metrik kesamaan
@tf.keras.saving.register_keras_serializable()
def cosine_distance(vectors):
    x, y = vectors
    x = tf.math.l2_normalize(x, axis=1)
    y = tf.math.l2_normalize(y, axis=1)
    return 1 - tf.reduce_sum(x * y, axis=1, keepdims=True)

In [None]:
# Jaringan siamese
input_shape = (100,)
base_network = build_base_network(input_shape)

input_a = tf.keras.Input(shape=input_shape)
input_b = tf.keras.Input(shape=input_shape)

processed_a = base_network(input_a)
processed_b = base_network(input_b)

distance = layers.Lambda(cosine_distance)([processed_a, processed_b])
model = models.Model([input_a, input_b], distance)



In [None]:
# Loss dan compile
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', 'mae'])


# Latih model
model.fit([X_train[:, 0], X_train[:, 1]], y_train, batch_size=32, epochs=10, validation_split=0.2)


Epoch 1/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 124ms/step - accuracy: 0.9780 - loss: 0.1171 - mae: 0.0668 - val_accuracy: 0.9737 - val_loss: 0.1436 - val_mae: 0.0396
Epoch 2/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 88ms/step - accuracy: 0.9838 - loss: 0.0560 - mae: 0.0231 - val_accuracy: 0.9737 - val_loss: 0.1346 - val_mae: 0.0398
Epoch 3/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 82ms/step - accuracy: 0.9779 - loss: 0.0413 - mae: 0.0253 - val_accuracy: 0.9737 - val_loss: 0.1365 - val_mae: 0.0494
Epoch 4/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 79ms/step - accuracy: 0.9975 - loss: 0.0191 - mae: 0.0166 - val_accuracy: 0.9737 - val_loss: 0.1412 - val_mae: 0.0539
Epoch 5/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 97ms/step - accuracy: 1.0000 - loss: 0.0176 - mae: 0.0155 - val_accuracy: 0.9737 - val_loss: 0.1457 - val_mae: 0.0669
Epoch 6/10
[1m10/10[0m [32m━━━━

<keras.src.callbacks.history.History at 0x7ddad00d5a10>

In [None]:
# Evaluasi
loss, acc, mae = model.evaluate([X_test[:, 0], X_test[:, 1]], y_test)
print(f"Loss: {loss}, Accuracy: {acc},mae :{mae}")

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.9948 - loss: 0.0565 - mae: 0.0369
Loss: 0.07592698186635971, Accuracy: 0.9895833134651184,mae :0.03984662517905235


# **Kata Kunci**

In [None]:
import random

def get_daily_recommendations(
    keyword, df, embedding_model, tokenizer,
    durasi_hari=3, wisata_per_hari=3,
    type_wisata=None
):
    from sklearn.metrics.pairwise import cosine_similarity
    from tensorflow.keras.preprocessing.sequence import pad_sequences

    total_needed = durasi_hari * wisata_per_hari

    # Tokenisasi dan embedding keyword
    keyword_seq = tokenizer.texts_to_sequences([keyword])
    keyword_pad = pad_sequences(keyword_seq, maxlen=100, padding='post')
    keyword_embedding = embedding_model.predict(keyword_pad)

    # Tokenisasi dan embedding semua deskripsi
    all_descriptions = df['description_clean'].astype(str).tolist()
    all_sequences = tokenizer.texts_to_sequences(all_descriptions)
    all_padded = pad_sequences(all_sequences, maxlen=100, padding='post')
    all_embeddings = embedding_model.predict(all_padded)

    # Hitung similarity
    similarities = cosine_similarity(keyword_embedding, all_embeddings)[0]
    df['similarity'] = similarities

    # Filter berdasarkan jenis wisata jika diberikan
    if type_wisata:
        type_column = f'type_clean_{type_wisata}'.strip().replace(" ", "_")
        if type_column in df.columns:
            df = df[df[type_column] == 1]
        else:
            print(f"Tipe wisata '{type_wisata}' tidak ditemukan.")
            return []

    # Ambil total_needed wisata paling mirip
    df_sorted = df.sort_values(by='similarity', ascending=False).head(total_needed)

    # Bagi menjadi per hari
    daily_recommendations = {}
    for i in range(durasi_hari):
        start = i * wisata_per_hari
        end = start + wisata_per_hari
        daily_recommendations[f"Hari ke-{i+1}"] = df_sorted.iloc[start:end][
            ['nama', 'description_clean', 'similarity']
        ].reset_index(drop=True)

    return daily_recommendations


In [None]:
results = get_daily_recommendations(
    keyword="keluarga",
    df=df,
    embedding_model=embedding_model,
    tokenizer=tokenizer,
    durasi_hari=4,
    wisata_per_hari=3,
    type_wisata="Alam"
)

for day, wisata_df in results.items():
    print(f"\n{day}:")
    print(wisata_df)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 195ms/step
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step

Hari ke-1:
                     nama                                  description_clean  \
0       Bukit white stone  bukit dengan formasi batu kapur putih yang uni...   
1  Patung Kaligintung Lor  monumen berbentuk patung sebagai ikon lokal ya...   
2         Geger Menjangan  bukit karst dengan pemandangan luas dan cerita...   

   similarity  
0    0.986836  
1    0.982788  
2    0.981041  

Hari ke-2:
                                               nama  \
0                               Terasiring Sriharjo   
1  Lembah Bendo Camping Ground and Outdoor Activity   
2                                       puncak bucu   

                                   description_clean  similarity  
0  persawahan bertingkat yang indah dan hijau mir...    0.980869  
1  area camping luas yang berada di tengah alam a...    0.980704  
2  puncak bukit yang m

In [None]:
wisata_df

Unnamed: 0,nama,description_clean,similarity
0,Desa Wisata Nglinggo,terletak di perbukitan menoreh desa ini menyug...,0.980311
1,Jalur Lahar Point Kaliadem,wisata petualangan menyusuri jejak aliran laha...,0.980189
2,Pinus pengger Yogyakarta,wisata hutan pinus yang terkenal dengan spot f...,0.980169


In [None]:
model.save('model_rekomendasi_wisata.h5')



In [None]:
!pip install tensorflowjs



In [None]:
import tensorflow as tf

# Load the model with the custom object
model = tf.keras.models.load_model('model_rekomendasi_wisata.h5', custom_objects={'cosine_distance': cosine_distance})

tf.saved_model.save(model, 'modelwisata_saved_model')

converter = tf.lite.TFLiteConverter.from_saved_model('modelwisata_saved_model')
tflite_model = converter.convert()

with open('model.tflite', 'wb') as f:
    f.write(tflite_model)



In [None]:
!tensorflowjs_converter \
  --input_format=tf_saved_model \
  --output_format=tfjs_graph_model \
  modelwisata_saved_model \
  model_tfjs/


2025-05-31 13:12:16.187313: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748697136.257936    8701 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748697136.275040    8701 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[32m🌲 Try [0m[34mhttps://ydf.readthedocs.io[0m[32m, the successor of TensorFlow Decision Forests with more features and faster training![0m
2025-05-31 13:12:25.873062: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)
I0000 00:00:1748697146.171381    8701 devices.cc:67] Number of eligible GPUs (core count >= 8, compute capabi

In [None]:
import shutil

shutil.make_archive('model_tfjs', 'zip', 'model_tfjs')


'/content/model_tfjs.zip'