In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
from sklearn.model_selection import train_test_split

# Load data
travel_data = pd.read_csv('travel_destinations_cleaned.csv')
user_preferences = pd.read_csv('https://raw.githubusercontent.com/AldiraPutri19/Locoveer/refs/heads/machine-learning/datasets/user_preferences.csv')

# Preprocess travel data
travel_data['Description'] = travel_data['Description'].astype(str)
travel_data['Category'] = travel_data['Category'].astype(str)
travel_data['content'] = travel_data['Description'] + " " + travel_data['Category']
travel_data['content'] = travel_data['content'].fillna('')

# Preprocess user preferences
user_preferences['Preferred_Category'] = user_preferences['Preferred_Category'].astype(str)
user_preferences['preferences'] = user_preferences['Preferred_Category'].apply(lambda x: x.lower())

# Compute TF-IDF matrices
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(travel_data['content'])
user_tfidf_matrix = tfidf.transform(user_preferences['preferences'])

# Generate user-destination pairs with ratings from Average_Rating
ratings = []
pairs = []

for user_id in range(user_tfidf_matrix.shape[0]):
    for dest_id in range(tfidf_matrix.shape[0]):
        rating = travel_data['Average_Rating'].iloc[dest_id]
        ratings.append(rating)
        pairs.append((user_id, dest_id))

ratings = np.array(ratings)

# Create feature matrix for pairs
user_features = np.array([user_tfidf_matrix[u].toarray().flatten() for u, d in pairs])
destination_features = np.array([tfidf_matrix[d].toarray().flatten() for u, d in pairs])

X = np.hstack([user_features, destination_features])
y = ratings

# Define the model
input_dim = tfidf_matrix.shape[1]
combined_input_dim = input_dim * 2  # because we are combining user and item features

def create_model(learning_rate=0.001):
    inputs = Input(shape=(combined_input_dim,), name='input_layer')
    hidden_layer = Dense(128, activation='relu')(inputs)
    hidden_layer = Dropout(0.5)(hidden_layer)
    hidden_layer = Dense(64, activation='relu')(hidden_layer)
    hidden_layer = Dropout(0.5)(hidden_layer)
    outputs = Dense(1, activation='linear')(hidden_layer)
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss=MeanSquaredError())
    return model

model = create_model()

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1, validation_data=(X_val, y_val))

# Define a function to get recommendations
def get_recommendations(user_id, top_n=5):
    user_pref = user_tfidf_matrix[user_id].toarray().flatten()
    user_pref = np.tile(user_pref, (tfidf_matrix.shape[0], 1))
    combined_features = np.hstack([user_pref, tfidf_matrix.toarray()])

    similarities = model.predict(combined_features)
    top_indices = np.argsort(similarities.flatten())[-top_n:][::-1]
    recommended_destinations = travel_data.iloc[top_indices]
    return recommended_destinations

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
# Get recommendations for a user
recommendations = get_recommendations(300)
print(recommendations)

     Destination_ID            Destination_Name  \
132           133.0  Puncak Kebun Buah Mangunan   
336           337.0             Kampung Pelangi   
294           295.0         Museum Nike Ardilla   
248           249.0   Upside Down World Bandung   
21             22.0             Masjid Istiqlal   

                                           Description Category        City  \
132  Berlibur di pegunungan memang menyenangkan. Da...        4  Yogyakarta   
336  Kampung pelangi atau dalam bahasa Inggris dise...        4    Semarang   
294  Museum Nike Ardilla diresmikan atau dibuka unt...        1     Bandung   
248  Upside Down World Bandung pertama kali dibuka ...        4     Bandung   
21   Masjid Istiqlal (arti harfiah: Masjid Merdeka)...        5     Jakarta   

        Price                                       Coordinate       Lat  \
132    5000.0  {'lat': -7.941371800000001, 'lng': 110.4247345} -7.941372   
336    3000.0  {'lat': -6.988881200000001, 'lng': 110.4083781} -6.

In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler

# Load data
travel_data = pd.read_csv('travel_destinations_cleaned.csv')
user_preferences = pd.read_csv('https://raw.githubusercontent.com/AldiraPutri19/Locoveer/refs/heads/machine-learning/datasets/user_preferences.csv')

# Preprocess travel data
travel_data['Description'] = travel_data['Description'].astype(str)
travel_data['Category'] = travel_data['Category'].astype(str)
travel_data['content'] = travel_data['Description'] + " " + travel_data['Category']
travel_data['content'] = travel_data['content'].fillna('')

# Preprocess user preferences
user_preferences['Preferred_Category'] = user_preferences['Preferred_Category'].astype(str)
user_preferences['preferences'] = user_preferences['Preferred_Category'].apply(lambda x: x.lower())

# Compute TF-IDF matrices
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(travel_data['content'])
user_tfidf_matrix = tfidf.transform(user_preferences['preferences'])

# Generate user-destination pairs with ratings from Average_Rating
ratings = []
pairs = []

for user_id in range(user_tfidf_matrix.shape[0]):
    for dest_id in range(tfidf_matrix.shape[0]):
        rating = travel_data['Average_Rating'].iloc[dest_id]
        ratings.append(rating)
        pairs.append((user_id, dest_id))

ratings = np.array(ratings)

# Create feature matrix for pairs
user_features = np.array([user_tfidf_matrix[u].toarray().flatten() for u, d in pairs])
destination_features = np.array([tfidf_matrix[d].toarray().flatten() for u, d in pairs])

X = np.hstack([user_features, destination_features])
y = ratings

# Scale features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Define the model
input_dim = tfidf_matrix.shape[1]
combined_input_dim = input_dim * 2  # because we are combining user and item features

def create_model(learning_rate=0.0001):
    inputs = Input(shape=(combined_input_dim,), name='input_layer')
    hidden_layer = Dense(128, activation='relu')(inputs)
    hidden_layer = Dropout(0.3)(hidden_layer)
    hidden_layer = Dense(64, activation='relu')(hidden_layer)
    hidden_layer = Dropout(0.3)(hidden_layer)
    outputs = Dense(1, activation='linear')(hidden_layer)
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss=MeanSquaredError())
    return model

# Hyperparameter tuning function
def hyperparameter_tuning(X_train, y_train, X_val, y_val, learning_rates, epochs=10, batch_size=32):
    best_learning_rate = None
    best_loss = float('inf')

    for lr in learning_rates:
        print(f"Training model with learning rate: {lr}")
        model = create_model(learning_rate=lr)

        # Early stopping callback
        early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

        # Train the model
        history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=1, validation_data=(X_val, y_val), callbacks=[early_stopping])

        # Evaluate on validation set
        val_loss = history.history['val_loss'][-1]
        print(f"Validation Loss: {val_loss}")

        # Update the best learning rate if this one performs better
        if val_loss < best_loss:
            best_loss = val_loss
            best_learning_rate = lr

    return best_learning_rate

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the learning rates to test
learning_rates = [0.0001, 0.005, 0.001, 0.05, 0.01, 0.5, 0.1]

# Perform hyperparameter tuning
best_learning_rate = hyperparameter_tuning(X_train, y_train, X_val, y_val, learning_rates)

# Output the best learning rate
print(f"Best Learning Rate: {best_learning_rate}")

# Train the final model with the best learning rate
model = create_model(learning_rate=best_learning_rate)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1, validation_data=(X_val, y_val), callbacks=[early_stopping])

# Define a function to get recommendations
def get_recommendations(user_id, top_n=5):
    user_pref = user_tfidf_matrix[user_id].toarray().flatten()
    user_pref = np.tile(user_pref, (tfidf_matrix.shape[0], 1))
    combined_features = np.hstack([user_pref, tfidf_matrix.toarray()])

    # Scale combined features
    combined_features = scaler.transform(combined_features)

    similarities = model.predict(combined_features)
    top_indices = np.argsort(similarities.flatten())[-top_n:][::-1]
    recommended_destinations = travel_data.iloc[top_indices]
    return recommended_destinations

Training model with learning rate: 0.0001
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Validation Loss: 0.0005214618286117911
Training model with learning rate: 0.005
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
 2535/10925 [=====>........................] - ETA: 5:40 - loss: 0.0096

In [None]:
# Get recommendations for a user
recommendations = get_recommendations(0)
print(recommendations)