<a href="https://colab.research.google.com/github/PenditWiguna/Capstone/blob/main/Machine%20Learning/Notebook/Content%20Based%20Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Memuat data CSV
data = pd.read_csv('https://raw.githubusercontent.com/PenditWiguna/Capstone/main/Machine%20Learning/Dataset/Dataset%20-%20tourismBali.csv')

# Memisahkan data
df = data[['Place_Id', 'Description', 'Category']]

# Encode kategori
category_encoder = LabelEncoder()
df['Category_Encoded'] = category_encoder.fit_transform(df['Category'])

# Tokenisasi dan padding deskripsi
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['Description'])
sequences = tokenizer.texts_to_sequences(df['Description'])
padded_sequences = pad_sequences(sequences, padding='post')

# Tentukan panjang maksimal sequence
max_sequence_length = padded_sequences.shape[1]

# Data input untuk model
X_category = df['Category_Encoded'].values
X_description = padded_sequences


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Category_Encoded'] = category_encoder.fit_transform(df['Category'])


In [None]:
# Tentukan ukuran embedding
embedding_dim = 50
vocab_size = len(tokenizer.word_index) + 1
category_count = len(df['Category_Encoded'].unique())

# Input kategori
category_input = tf.keras.layers.Input(shape=(1,), name='category_input')
category_embedding = tf.keras.layers.Embedding(input_dim=category_count, output_dim=embedding_dim, name='category_embedding')(category_input)
category_flatten = tf.keras.layers.Flatten()(category_embedding)

# Input deskripsi
description_input = tf.keras.layers.Input(shape=(max_sequence_length,), name='description_input')
description_embedding = tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=embedding_dim, name='description_embedding')(description_input)
description_flatten = tf.keras.layers.GlobalAveragePooling1D()(description_embedding)

# Gabungkan embedding
concatenated = tf.keras.layers.Concatenate()([category_flatten, description_flatten])
output = tf.keras.layers.Dense(embedding_dim, activation='relu')(concatenated)

# Model
model = tf.keras.Model(inputs=[category_input, description_input], outputs=output)
model.compile(optimizer='adam', loss='mse')

model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 category_input (InputLayer  [(None, 1)]                  0         []                            
 )                                                                                                
                                                                                                  
 description_input (InputLa  [(None, 65)]                 0         []                            
 yer)                                                                                             
                                                                                                  
 category_embedding (Embedd  (None, 1, 50)                400       ['category_input[0][0]']      
 ing)                                                                                         

In [None]:
# Melatih model dengan menggunakan input yang sama sebagai target (autoencoder)
model.fit([X_category, X_description], model.predict([X_category, X_description]), epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7f5e16f77430>

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

# Mendapatkan embedding untuk semua tempat wisata
embeddings = model.predict([X_category, X_description])

# Fungsi untuk memberikan rekomendasi
def recommend(place_id, embeddings, top_k=5):
    place_idx = df.index[df['Place_Id'] == place_id].tolist()[0]
    place_embedding = embeddings[place_idx]
    similarities = cosine_similarity([place_embedding], embeddings)[0]
    similar_indices = similarities.argsort()[::-1][1:top_k+1]
    similar_places = df.iloc[similar_indices]['Place_Id'].values
    return similar_places



In [None]:
# Memuat data CSV untuk mapping prediction
data = pd.read_csv('https://raw.githubusercontent.com/PenditWiguna/Capstone/main/Machine%20Learning/Dataset/Dataset%20-%20tourismBali.csv')
df_convert = data[['Place_Id', 'Place_Name']]

df2 = df_convert.set_index('Place_Id').to_dict()['Place_Name']

In [None]:
# Contoh rekomendasi untuk tempat wisata dengan Place_Id
place_id_predict = 23
recommendations = recommend(place_id_predict, embeddings)
place_record = []
for i in recommendations:
  place_record.append(df2[i])

print(f"Rekomendasi untuk Place_Id {place_id_predict}: {place_record}")

Rekomendasi untuk Place_Id 23: ['Pura Taman Ayun', 'Tirta Gangga', 'Pura Puseh Batuan', 'Pura Besakih', 'Pura Saraswati']


#Model Saving

In [None]:
# Konversi model ke format TensorFlow Lite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Simpan model ke file .tflite
with open('recommender_model.tflite', 'wb') as f:
    f.write(tflite_model)

#Vatiable Saving

In [None]:
# Data input untuk model
X_category = df['Category_Encoded'].values
X_description = padded_sequences

# Gabungkan X_category dan X_description ke dalam satu DataFrame
X_data = np.hstack((X_category.reshape(-1, 1), X_description))

# Simpan ke file CSV
np.savetxt('X_data.csv', X_data, delimiter=',', fmt='%d')

#Variable Testing

In [None]:
X_category

array([0, 2, 1, 1, 1, 1, 0, 4, 3, 6, 3, 0, 1, 0, 1, 1, 1, 3, 3, 3, 2, 3,
       7, 1, 1, 7, 7, 0, 5, 2, 1, 2, 4, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 7, 5, 5, 5, 2, 7, 7, 7, 7, 7, 7, 7, 5, 7, 7,
       2, 1, 3, 3, 0, 7, 6, 0, 0])

In [None]:
X_description

array([[375, 376,  60, ...,   0,   0,   0],
       [380,  30,  75, ...,   0,   0,   0],
       [219,  12, 220, ...,   0,   0,   0],
       ...,
       [966,   9,  13, ...,   0,   0,   0],
       [373, 374,   4, ...,   0,   0,   0],
       [ 49,  73,  11, ...,   0,   0,   0]], dtype=int32)