In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Dense, Flatten, Concatenate, Dropout, BatchNormalization # Tambahkan BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers # Import regularizers

# Pastikan KerasTuner sudah terinstal
try:
    import keras_tuner as kt
except ImportError:
    print("KerasTuner not found. Installing...")
    import subprocess
    import sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "keras-tuner", "-q"])
    import keras_tuner as kt
    print("KerasTuner installed successfully.")


# --- Set Seed for Reproducibility ---
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

# --- Load Data ---
# GANTI DENGAN PATH FILE ANDA JIKA BERBEDA
# Pastikan file CSV ada di direktori yang sama atau sesuaikan path.
try:
    df_wisata = pd.read_csv("Cleaned Dataset Item (tambahin feature engineering).csv")
    df_user = pd.read_csv("Cleaned Dataset User.csv")
    print("Dataset berhasil dimuat dari file CSV.")
except FileNotFoundError:
    print("PERINGATAN: File CSV tidak ditemukan. Menggunakan data dummy untuk demonstrasi.")
    print("Pastikan file 'Cleaned Dataset Item (tambahin feature engineering).csv' dan 'Cleaned Dataset User.csv' ada.")
    # Membuat DataFrame dummy jika file tidak ditemukan
    data_wisata = {
        'ID Tempat': [f'item_{i}' for i in range(109)],
        'Nama Wisata': [f'Wisata Dummy {i}' for i in range(109)],
        'Kategori': ['budaya, seni', 'lingkungan', 'sejarah', 'budaya', 'teknologi'] * 20 + ['budaya'] * 9,
        'Kategori Umur': ['Semua Umur', 'Remaja', 'Anak-anak', 'Remaja', 'Semua Umur'] * 20 + ['Semua Umur'] * 9,
        'Deskripsi Cleaned': ['deskripsi dummy ' + str(i) for i in range(109)],
        'Aktivitas Cleaned': ['aktivitas dummy ' + str(i) for i in range(109)],
        'Fasilitas Cleaned': ['fasilitas dummy ' + str(i) for i in range(109)],
        'Overall Rating (Google Maps)': np.random.uniform(3.0, 5.0, 109).round(1),
        'Jumlah Ulasan (Google Maps)': np.random.randint(10, 1000, 109)
    }
    df_wisata = pd.DataFrame(data_wisata)
    # Buat beberapa user dan rating dummy
    user_ids_dummy = [f'user_dummy_{i}' for i in range(50)]
    item_ids_dummy = df_wisata['ID Tempat'].tolist()
    ratings_data_dummy = []
    for user_id in user_ids_dummy:
        num_ratings = random.randint(5, 20)
        rated_items = random.sample(item_ids_dummy, num_ratings)
        for item_id in rated_items:
            ratings_data_dummy.append({'ID User': user_id, 'ID Tempat': item_id, 'rating': random.randint(1, 5)})
    df_user = pd.DataFrame(ratings_data_dummy)

# --- Preprocessing df_user ---
df_user_cleaned = df_user[['ID User', 'ID Tempat', 'rating']].dropna()
df_user_cleaned = df_user_cleaned.drop_duplicates()

user_encoder = LabelEncoder()
item_encoder = LabelEncoder()

# Fit item_encoder pada SEMUA ID Tempat dari df_wisata
item_encoder.fit(df_wisata['ID Tempat'])

# Pastikan semua ID Tempat di df_user_cleaned dikenal oleh item_encoder
df_user_cleaned = df_user_cleaned[df_user_cleaned['ID Tempat'].isin(item_encoder.classes_)]

# Fit user_encoder hanya pada user yang memiliki rating (setelah filter item)
user_encoder.fit(df_user_cleaned['ID User'])

# Pastikan semua ID User di df_user_cleaned dikenal oleh user_encoder
df_user_cleaned = df_user_cleaned[df_user_cleaned['ID User'].isin(user_encoder.classes_)]


# Transform ID User dan ID Tempat ke integer
df_user_cleaned['user_id_int'] = user_encoder.transform(df_user_cleaned['ID User'])
df_user_cleaned['item_id_int'] = item_encoder.transform(df_user_cleaned['ID Tempat'])

num_users = len(user_encoder.classes_)
num_items = len(item_encoder.classes_) # Ini adalah jumlah item unik di df_wisata

# Normalisasi rating
min_rating = df_user_cleaned['rating'].min()
max_rating = df_user_cleaned['rating'].max()
if max_rating == min_rating: # Hindari pembagian dengan nol jika semua rating sama
    df_user_cleaned['rating_norm'] = 0.5 if min_rating > 0 else 0.0
else:
    df_user_cleaned['rating_norm'] = (df_user_cleaned['rating'] - min_rating) / (max_rating - min_rating)

print(f"\nJumlah User Unik setelah cleaning: {num_users}")
print(f"Jumlah Tempat Wisata Unik di df_wisata (digunakan item_encoder): {len(item_encoder.classes_)}")
print(f"Jumlah Tempat Wisata Unik yang memiliki rating di df_user_cleaned: {df_user_cleaned['item_id_int'].nunique()}")


# --- Feature Engineering dan Preprocessing df_wisata ---
# One-hot Encoding untuk kategori
df_encoded_kategori = df_wisata['Kategori'].str.get_dummies(sep=', ')
df_wisata = pd.concat([df_wisata, df_encoded_kategori], axis=1)
df_wisata.drop(columns=['Kategori'], inplace=True)

df_encoded_umur = df_wisata['Kategori Umur'].str.get_dummies(sep=', ')
df_wisata = pd.concat([df_wisata, df_encoded_umur], axis=1)
df_wisata.drop(columns=['Kategori Umur'], inplace=True)

df_wisata['text_features'] = (
    df_wisata['Deskripsi Cleaned'].fillna('') + ' ' +
    df_wisata['Aktivitas Cleaned'].fillna('') + ' ' +
    df_wisata['Fasilitas Cleaned'].fillna('')
)

# TF-IDF dan Cosine Similarity
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(df_wisata['text_features'])
cosine_sim_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Definisikan LIST_KATEGORI_WISATA berdasarkan kolom yang ada setelah get_dummies
ALL_GENERATED_KATEGORI_COLUMNS = list(df_encoded_kategori.columns)
ALL_GENERATED_UMUR_COLUMNS = list(df_encoded_umur.columns)

LIST_KATEGORI_WISATA_DEFINED = [ # Kategori yang secara eksplisit ingin digunakan jika ada
    'budaya', 'kreativitas', 'lingkungan', 'religi', 'sains',
    'sejarah', 'seni', 'teknologi'
]
# Filter hanya kategori yang memang ada di df_wisata setelah one-hot encoding
EXISTING_LIST_KATEGORI_WISATA = [col for col in LIST_KATEGORI_WISATA_DEFINED if col in df_wisata.columns]

feature_columns = (
    ['Overall Rating (Google Maps)'] +
    [col for col in EXISTING_LIST_KATEGORI_WISATA if col in df_wisata.columns] +
    [col for col in ALL_GENERATED_UMUR_COLUMNS if col in df_wisata.columns]
)
feature_columns = sorted(list(set(feature_columns))) # Hapus duplikat dan urutkan

df_wisata_features = df_wisata[['ID Tempat'] + feature_columns].copy()
# Transform ID Tempat di df_wisata_features menggunakan encoder yang sama
df_wisata_features['item_id_int'] = item_encoder.transform(df_wisata_features['ID Tempat'])
df_wisata_features = df_wisata_features.drop(columns=['ID Tempat'])

# Normalisasi 'Overall Rating'
scaler = MinMaxScaler()
if 'Overall Rating (Google Maps)' in df_wisata_features.columns:
    df_wisata_features['Overall Rating (Google Maps)'] = scaler.fit_transform(
        df_wisata_features[['Overall Rating (Google Maps)']]
    )
else:
    print("Peringatan: Kolom 'Overall Rating (Google Maps)' tidak ditemukan di df_wisata_features.")


# --- Menggabungkan Data ---
df_final = pd.merge(df_user_cleaned, df_wisata_features, on='item_id_int', how='left')
df_final.fillna(0, inplace=True) # Isi NaN dengan 0 untuk fitur numerik

# --- Membagi Data ---
train_df, test_df = train_test_split(df_final, test_size=0.2, random_state=SEED)

X_train = {
    'user_input': train_df['user_id_int'].values,
    'item_input': train_df['item_id_int'].values,
    'features_input': train_df[feature_columns].values.astype(np.float32)
}
y_train = train_df['rating_norm'].values.astype(np.float32)

X_test = {
    'user_input': test_df['user_id_int'].values,
    'item_input': test_df['item_id_int'].values,
    'features_input': test_df[feature_columns].values.astype(np.float32)
}
y_test = test_df['rating_norm'].values.astype(np.float32)

num_item_features = len(feature_columns)
print(f"\nJumlah Fitur Item (setelah validasi kolom): {num_item_features}")
print(f"Nama Kolom Fitur: {feature_columns}")


# --- Model Building Function for KerasTuner (dengan L2 Regularization & Batch Norm) ---
def build_hybrid_ncf_model_for_tuner_v2(hp, num_users_static, num_items_static, num_item_features_static):
    """Membangun model Hybrid NCF untuk KerasTuner dengan L2 Regularization dan Batch Norm."""

    # Hyperparameters to tune
    embedding_dim = hp.Int('embedding_dim', min_value=32, max_value=96, step=16)
    mlp_units_1 = hp.Int('mlp_units_1', min_value=32, max_value=96, step=16)
    mlp_units_2 = hp.Int('mlp_units_2', min_value=16, max_value=64, step=16)
    mlp_units_3 = hp.Int('mlp_units_3', min_value=8, max_value=32, step=8)
    dropout_rate = hp.Float('dropout_rate', min_value=0.2, max_value=0.5, step=0.05) # Penyesuaian step
    learning_rate = hp.Choice('learning_rate', values=[1e-3, 5e-4, 1e-4])

    feature_mlp_units_1 = hp.Int('feature_mlp_units_1', min_value=16, max_value=48, step=16)
    feature_mlp_units_2 = hp.Int('feature_mlp_units_2', min_value=8, max_value=32, step=8)

    l2_reg_factor = hp.Choice('l2_reg_factor', values=[1e-4, 1e-5, 1e-6, 0.0])

    # Input Layers
    user_input_layer = Input(shape=(1,), name='user_input')
    item_input_layer = Input(shape=(1,), name='item_input')
    features_input_layer = Input(shape=(num_item_features_static,), name='features_input')

    # Embedding Layers
    user_embedding_layer = Embedding(
        input_dim=num_users_static,
        output_dim=embedding_dim,
        name='user_embedding',
        embeddings_regularizer=regularizers.l2(l2_reg_factor)
    )(user_input_layer)
    item_embedding_layer = Embedding(
        input_dim=num_items_static, # Seharusnya num_items_static (jumlah item unik dari encoder)
        output_dim=embedding_dim,
        name='item_embedding',
        embeddings_regularizer=regularizers.l2(l2_reg_factor)
    )(item_input_layer)

    user_vec = Flatten(name='flatten_user')(user_embedding_layer)
    item_vec = Flatten(name='flatten_item')(item_embedding_layer)

    # MLP untuk Fitur Item
    feature_mlp = Dense(
        feature_mlp_units_1, activation='relu', kernel_regularizer=regularizers.l2(l2_reg_factor)
    )(features_input_layer)
    feature_mlp = Dropout(dropout_rate)(feature_mlp)
    feature_mlp = Dense(
        feature_mlp_units_2, activation='relu', kernel_regularizer=regularizers.l2(l2_reg_factor)
    )(feature_mlp)

    concat_layer = Concatenate()([user_vec, item_vec, feature_mlp])
    concat_bn = BatchNormalization()(concat_layer)

    # MLP Layers
    mlp = Dense(
        mlp_units_1, activation='relu', kernel_regularizer=regularizers.l2(l2_reg_factor)
    )(concat_bn)
    mlp = BatchNormalization()(mlp)
    mlp = Dropout(dropout_rate)(mlp)

    mlp = Dense(
        mlp_units_2, activation='relu', kernel_regularizer=regularizers.l2(l2_reg_factor)
    )(mlp)
    mlp = BatchNormalization()(mlp)
    mlp = Dropout(dropout_rate)(mlp)

    mlp = Dense(
        mlp_units_3, activation='relu', kernel_regularizer=regularizers.l2(l2_reg_factor)
    )(mlp)

    output_layer = Dense(1, activation='sigmoid', name='output')(mlp)

    model = Model(inputs=[user_input_layer, item_input_layer, features_input_layer], outputs=output_layer)
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='mean_squared_error',
                  metrics=['mae'])
    return model

# --- Hyperparameter Tuning Setup ---
if num_users == 0 or num_items == 0:
    print("Peringatan: num_users atau num_items adalah nol. Embedding layer mungkin error.")
    best_model_tuned_v2 = None # Inisialisasi jika tidak bisa lanjut
else:
    build_fn_with_static_args_v2 = lambda hp: build_hybrid_ncf_model_for_tuner_v2(
        hp,
        num_users_static=num_users,
        num_items_static=num_items, # Pastikan ini adalah jumlah item unik dari item_encoder
        num_item_features_static=num_item_features
    )

    tuner_v2 = kt.RandomSearch(
        build_fn_with_static_args_v2,
        objective='val_mae',
        max_trials=15,  # Kurangi sedikit untuk iterasi lebih cepat, bisa dinaikkan lagi
        executions_per_trial=1,
        directory='ncf_tuning_v2_final',
        project_name='hybrid_ncf_reg_bn_final',
        overwrite=True
    )

    tuner_v2.search_space_summary()

    early_stopping_cb = tf.keras.callbacks.EarlyStopping(
        monitor='val_mae',
        patience=10,
        restore_best_weights=True,
        verbose=1
    )

    print("\nMemulai Hyperparameter Tuning (Versi 2 dengan Regularisasi & Batch Norm)...")
    if X_train['user_input'].shape[0] > 0 and y_train.shape[0] > 0:
        tuner_v2.search(
            X_train,
            y_train,
            epochs=60, # Naikkan sedikit epoch per trial
            validation_data=(X_test, y_test),
            callbacks=[early_stopping_cb],
            batch_size=128,
            verbose=1
        )

        best_hps_v2 = tuner_v2.get_best_hyperparameters(num_trials=1)[0]

        print(f"""
        Hyperparameter terbaik (V2 Final) yang ditemukan:
        Embedding Dim: {best_hps_v2.get('embedding_dim')}
        MLP Unit 1: {best_hps_v2.get('mlp_units_1')}
        MLP Unit 2: {best_hps_v2.get('mlp_units_2')}
        MLP Unit 3: {best_hps_v2.get('mlp_units_3')}
        Feature MLP Unit 1: {best_hps_v2.get('feature_mlp_units_1')}
        Feature MLP Unit 2: {best_hps_v2.get('feature_mlp_units_2')}
        Dropout Rate: {best_hps_v2.get('dropout_rate'):.3f}
        Learning Rate: {best_hps_v2.get('learning_rate')}
        L2 Reg Factor: {best_hps_v2.get('l2_reg_factor')}
        """)

        best_model_tuned_v2 = tuner_v2.hypermodel.build(best_hps_v2)

        print("\nMelatih model terbaik (V2 Final) dengan hyperparameter yang ditemukan...")
        final_early_stopping_cb = tf.keras.callbacks.EarlyStopping(
            monitor='val_mae', patience=15, restore_best_weights=True, verbose=1
        )

        history_best_tuned_v2 = best_model_tuned_v2.fit(
            X_train,
            y_train,
            batch_size=128,
            epochs=120, # Latih lebih lama untuk model final
            verbose=1,
            validation_data=(X_test, y_test),
            callbacks=[final_early_stopping_cb]
        )

        loss_tuned_v2, mae_tuned_v2 = best_model_tuned_v2.evaluate(X_test, y_test, verbose=0)
        print(f"\nModel Tuned (V2 Final) - Test Loss: {loss_tuned_v2:.4f}")
        print(f"Model Tuned (V2 Final) - Test MAE: {mae_tuned_v2:.4f}")

        # Simpan model
        # best_model_tuned_v2.save("best_hybrid_ncf_model_v2_final.keras")
        # print("Model terbaik (V2 Final) disimpan sebagai 'best_hybrid_ncf_model_v2_final.keras'")
    else:
        print("Tidak ada data training yang cukup untuk memulai tuning V2.")
        best_model_tuned_v2 = None


# --- Fungsi Rekomendasi (Sama seperti sebelumnya, hanya pastikan menggunakan model yang tepat) ---
def normalize_series_min_max(series):
    min_val = series.min()
    max_val = series.max()
    if max_val == min_val:
        return pd.Series(np.zeros_like(series, dtype=float), index=series.index) if min_val == 0 else pd.Series(np.ones_like(series, dtype=float), index=series.index)
    return (series - min_val) / (max_val - min_val)

def get_unified_recommendations(
    raw_user_id=None,
    preferred_categories=None,
    n=10,
    ncf_model=None,
    user_encoder_passed=None,
    item_encoder_passed=None,
    df_user_cleaned_passed=None,
    df_wisata_features_passed=None,
    feature_columns_ncf_passed=None,
    cosine_sim_matrix_passed=None,
    df_wisata_source_passed=None, # df_wisata asli untuk info detail & filter kategori
    k_existing=20,
    liked_rating_threshold=4.0, # disesuaikan dengan skala rating asli
    ncf_weight=0.7,
    content_weight=0.3,
    probe_user_raw_ids=None,
    weight_google_rating_new=0.5,
    weight_ncf_appeal_new=0.5,
    list_kategori_wisata_valid_passed=None # Untuk memastikan kategori valid
):
    if ncf_model is None:
        print("Error: Model NCF belum dilatih atau tidak disediakan.")
        return pd.DataFrame()
    if user_encoder_passed is None or item_encoder_passed is None or df_user_cleaned_passed is None or \
       df_wisata_features_passed is None or feature_columns_ncf_passed is None or df_wisata_source_passed is None or \
       list_kategori_wisata_valid_passed is None:
        print("Error: Satu atau lebih dataframes/encoders/list kategori penting tidak disediakan.")
        return pd.DataFrame()

    is_known_user_with_reviews = False
    user_id_int = -1

    if raw_user_id:
        try:
            user_id_int = user_encoder_passed.transform([raw_user_id])[0]
            if not df_user_cleaned_passed[df_user_cleaned_passed['user_id_int'] == user_id_int].empty:
                is_known_user_with_reviews = True
            else:
                print(f"Info: User ID '{raw_user_id}' dikenal encoder, tapi tidak ada riwayat review.")
        except ValueError:
            print(f"Info: User ID '{raw_user_id}' tidak dikenal encoder.")

    # --- Jalur 1: Pengguna Dikenal dan Punya Riwayat Review ---
    if is_known_user_with_reviews:
        print(f"Membuat rekomendasi untuk pengguna yang sudah dikenal: {raw_user_id}")
        items_rated_by_user_int = df_user_cleaned_passed[df_user_cleaned_passed['user_id_int'] == user_id_int]['item_id_int'].unique()
        all_possible_item_ids_int = df_wisata_features_passed['item_id_int'].unique()
        items_to_predict_int = np.setdiff1d(all_possible_item_ids_int, items_rated_by_user_int, assume_unique=True)

        if len(items_to_predict_int) == 0:
            print(f"User '{raw_user_id}' sudah memberi rating semua item atau tidak ada item lain untuk diprediksi.")
            return pd.DataFrame()

        items_to_predict_df = pd.DataFrame({'item_id_int': items_to_predict_int})
        batch_features_df = pd.merge(items_to_predict_df, df_wisata_features_passed, on='item_id_int', how='left')
        batch_features_df.fillna(0, inplace=True) # Pastikan fitur diisi jika ada NaN
        features_input_batch = batch_features_df[feature_columns_ncf_passed].values.astype(np.float32)

        model_input_batch = {
            'user_input': np.full(len(items_to_predict_int), user_id_int),
            'item_input': items_to_predict_int,
            'features_input': features_input_batch
        }
        predicted_norm_ratings_batch = ncf_model.predict(model_input_batch, verbose=0).flatten()

        ncf_results_df = pd.DataFrame({
            'item_id_int': items_to_predict_int,
            'ncf_score_norm': predicted_norm_ratings_batch
        })
        ncf_top_k_candidates = ncf_results_df.sort_values(by='ncf_score_norm', ascending=False).head(k_existing)

        liked_items_df = df_user_cleaned_passed[
            (df_user_cleaned_passed['user_id_int'] == user_id_int) &
            (df_user_cleaned_passed['rating'] >= liked_rating_threshold) # Gunakan rating asli
        ]
        liked_item_ids_int = liked_items_df['item_id_int'].unique()

        content_affinity_scores = []
        if len(liked_item_ids_int) > 0 and cosine_sim_matrix_passed is not None:
            for candidate_item_id_int in ncf_top_k_candidates['item_id_int']:
                if not (0 <= candidate_item_id_int < cosine_sim_matrix_passed.shape[0]):
                    avg_sim = 0.0
                else:
                    valid_liked_indices = liked_item_ids_int[(liked_item_ids_int >= 0) & (liked_item_ids_int < cosine_sim_matrix_passed.shape[1])]
                    if len(valid_liked_indices) > 0:
                        sim_scores_for_candidate = cosine_sim_matrix_passed[candidate_item_id_int, valid_liked_indices]
                        sim_scores_for_candidate = sim_scores_for_candidate[np.isfinite(sim_scores_for_candidate)]
                        avg_sim = np.mean(sim_scores_for_candidate) if len(sim_scores_for_candidate) > 0 else 0.0
                    else: avg_sim = 0.0
                content_affinity_scores.append(avg_sim)
        else:
            content_affinity_scores = [0.0] * len(ncf_top_k_candidates)

        ncf_top_k_candidates['content_affinity_score'] = content_affinity_scores
        ncf_top_k_candidates['combined_score %'] = (
            (ncf_weight * ncf_top_k_candidates['ncf_score_norm'] +
            content_weight * ncf_top_k_candidates['content_affinity_score']) * 100
        )
        final_recommendations_df = ncf_top_k_candidates.sort_values(by='combined_score %', ascending=False)
        top_n_final = final_recommendations_df.head(n)

        # Map item_id_int kembali ke ID Tempat asli
        item_id_int_to_raw_map = pd.Series(item_encoder_passed.classes_, index=item_encoder_passed.transform(item_encoder_passed.classes_))
        top_n_final['ID Tempat'] = top_n_final['item_id_int'].map(item_id_int_to_raw_map)

        recommendations = pd.merge(top_n_final, df_wisata_source_passed[['ID Tempat', 'Nama Wisata']], on='ID Tempat', how='left')
        print_columns = ['ID Tempat', 'Nama Wisata', 'ncf_score_norm', 'content_affinity_score', 'combined_score %']
        print(f"\nTop {n} Rekomendasi Gabungan (NCF + Cosine Boost) untuk User '{raw_user_id}':")
        print(recommendations[[col for col in print_columns if col in recommendations.columns]].to_string())
        return recommendations

    # --- Jalur 2: Pengguna Baru/Tidak Dikenal dengan Preferensi Kategori (Menggunakan NCF Probe) ---
    elif preferred_categories and probe_user_raw_ids:
        print(f"Membuat rekomendasi untuk pengguna baru dengan preferensi: {preferred_categories} (NCF probe)")
        valid_preferred_categories = [cat for cat in preferred_categories if cat in df_wisata_source_passed.columns and cat in list_kategori_wisata_valid_passed]
        if not valid_preferred_categories:
            print(f"Tidak ada kategori valid dari preferensi: {preferred_categories}")
            return pd.DataFrame()

        category_match_mask = df_wisata_source_passed[valid_preferred_categories].sum(axis=1) > 0
        candidate_wisata_df = df_wisata_source_passed[category_match_mask].copy()

        if candidate_wisata_df.empty:
            print(f"Tidak ditemukan tempat wisata yang cocok dengan kategori: {valid_preferred_categories}")
            return pd.DataFrame()

        if 'Overall Rating (Google Maps)' in candidate_wisata_df.columns:
             candidate_wisata_df['google_rating_norm'] = normalize_series_min_max(candidate_wisata_df['Overall Rating (Google Maps)'])
        else:
             candidate_wisata_df['google_rating_norm'] = 0.0

        valid_probe_user_ids_int = []
        for raw_id_probe in probe_user_raw_ids:
            try:
                valid_probe_user_ids_int.append(user_encoder_passed.transform([raw_id_probe])[0])
            except ValueError:
                print(f"Warning: Probe user ID '{raw_id_probe}' tidak dikenal oleh user_encoder.")

        if not valid_probe_user_ids_int:
            print("Warning: Tidak ada probe user ID yang valid. NCF appeal score akan 0.")
            candidate_wisata_df['avg_ncf_appeal_score'] = 0.0
        else:
            avg_ncf_appeal_scores = []
            for index, row_cand in candidate_wisata_df.iterrows():
                item_id_raw_cand = row_cand['ID Tempat']
                try:
                    item_id_int_cand = item_encoder_passed.transform([item_id_raw_cand])[0]
                except ValueError:
                    avg_ncf_appeal_scores.append(0.0); continue

                item_features_series = df_wisata_features_passed[df_wisata_features_passed['item_id_int'] == item_id_int_cand]
                if item_features_series.empty:
                    avg_ncf_appeal_scores.append(0.0); continue
                item_features_array_cand = item_features_series[feature_columns_ncf_passed].iloc[[0]].values.astype(np.float32)

                ncf_predictions_for_item = []
                for probe_user_id_int_curr in valid_probe_user_ids_int:
                    model_input_probe = {
                        'user_input': np.array([probe_user_id_int_curr]),
                        'item_input': np.array([item_id_int_cand]),
                        'features_input': item_features_array_cand
                    }
                    pred = ncf_model.predict(model_input_probe, verbose=0)[0][0]
                    ncf_predictions_for_item.append(pred)
                avg_ncf_appeal_scores.append(np.mean(ncf_predictions_for_item) if ncf_predictions_for_item else 0.0)
            candidate_wisata_df['avg_ncf_appeal_score'] = avg_ncf_appeal_scores

        candidate_wisata_df['final_score %'] = (
            (weight_google_rating_new * candidate_wisata_df['google_rating_norm'].fillna(0) +
            weight_ncf_appeal_new * candidate_wisata_df['avg_ncf_appeal_score']) * 100
        )
        ranked_wisata = candidate_wisata_df.sort_values(by='final_score %', ascending=False).head(n)

        print_columns_new = ['ID Tempat', 'Nama Wisata', 'Overall Rating (Google Maps)', 'google_rating_norm', 'avg_ncf_appeal_score', 'final_score %'] + valid_preferred_categories
        recommendations = ranked_wisata[[col for col in print_columns_new if col in ranked_wisata.columns]]
        print(f"\nTop {n} Rekomendasi Hybrid (Kategori + NCF Probe) untuk Pengguna Baru:")
        print(recommendations.to_string())
        return recommendations

    # --- Jalur 3: Pengguna Baru hanya dengan Preferensi Kategori (tanpa NCF probe/fallback sederhana) ---
    elif preferred_categories:
        print(f"Membuat rekomendasi untuk pengguna baru HANYA berdasarkan kategori: {preferred_categories} (tanpa NCF probe).")
        valid_preferred_categories = [cat for cat in preferred_categories if cat in df_wisata_source_passed.columns and cat in list_kategori_wisata_valid_passed]
        if not valid_preferred_categories:
            print(f"Tidak ada kategori valid dari preferensi: {preferred_categories}")
            return pd.DataFrame()

        category_match_mask = df_wisata_source_passed[valid_preferred_categories].sum(axis=1) > 0
        filtered_wisata = df_wisata_source_passed[category_match_mask].copy()

        if filtered_wisata.empty:
            print(f"Tidak ditemukan tempat wisata yang cocok dengan kategori: {valid_preferred_categories}")
            return pd.DataFrame()

        sort_cols = []
        ascending_orders = []
        if 'Overall Rating (Google Maps)' in filtered_wisata.columns:
            sort_cols.append('Overall Rating (Google Maps)')
            ascending_orders.append(False)
        if 'Jumlah Ulasan (Google Maps)' in filtered_wisata.columns: # Jika ada kolom ini
            sort_cols.append('Jumlah Ulasan (Google Maps)')
            ascending_orders.append(False)

        if not sort_cols: # Jika tidak ada kolom rating/ulasan, ambil saja head
             ranked_wisata = filtered_wisata.head(n)
        else:
            ranked_wisata = filtered_wisata.sort_values(
                by=sort_cols, ascending=ascending_orders
            ).head(n)

        print_cols_to_check = ['ID Tempat', 'Nama Wisata', 'Overall Rating (Google Maps)', 'Jumlah Ulasan (Google Maps)'] + valid_preferred_categories
        recommendations = ranked_wisata[[col for col in print_cols_to_check if col in ranked_wisata.columns]]
        print(f"\nTop {n} Rekomendasi Berdasarkan Kategori (Simple Filter):")
        print(recommendations.to_string())
        return recommendations
    else:
        print("Tidak ada User ID atau preferensi kategori yang diberikan. Tidak dapat membuat rekomendasi.")
        return pd.DataFrame()


# --- Contoh Penggunaan dengan Model yang Sudah Di-tuning (V2 Final) ---
if 'best_model_tuned_v2' in locals() and best_model_tuned_v2 is not None:
    if not df_user_cleaned.empty and num_users > 0:
        # Contoh 1: Pengguna yang sudah dikenal
        if len(user_encoder.classes_) > 0:
            known_user_id_example = user_encoder.classes_[0]
            print(f"\n>>> SKENARIO 1 (V2 Final): Rekomendasi untuk pengguna dikenal '{known_user_id_example}'")
            recs_known_v2 = get_unified_recommendations(
                raw_user_id=known_user_id_example,
                n=5,
                ncf_model=best_model_tuned_v2,
                user_encoder_passed=user_encoder,
                item_encoder_passed=item_encoder,
                df_user_cleaned_passed=df_user_cleaned,
                df_wisata_features_passed=df_wisata_features,
                feature_columns_ncf_passed=feature_columns,
                cosine_sim_matrix_passed=cosine_sim_matrix,
                df_wisata_source_passed=df_wisata,
                liked_rating_threshold=3.5, # Sesuaikan threshold rating asli
                list_kategori_wisata_valid_passed=EXISTING_LIST_KATEGORI_WISATA
            )
        else:
            print("Tidak ada user yang dikenal di user_encoder untuk Skenario 1.")

        # Contoh 2: Pengguna baru dengan preferensi kategori (menggunakan NCF probe)
        new_user_prefs_example_1 = ['budaya', 'seni'] # Gunakan kategori dari EXISTING_LIST_KATEGORI_WISATA
        num_samples_for_probe = min(3, len(user_encoder.classes_))
        if num_samples_for_probe > 0 :
            probe_user_ids_for_new_example = random.sample(list(user_encoder.classes_), num_samples_for_probe)
            print(f"\n>>> SKENARIO 2 (V2 Final): Rekomendasi untuk pengguna baru, preferensi: {new_user_prefs_example_1} (dengan NCF Probe)")
            print(f"Probe User IDs: {probe_user_ids_for_new_example}")
            recs_new_ncf_probe_v2 = get_unified_recommendations(
                preferred_categories=new_user_prefs_example_1,
                n=5,
                ncf_model=best_model_tuned_v2,
                user_encoder_passed=user_encoder,
                item_encoder_passed=item_encoder,
                df_user_cleaned_passed=df_user_cleaned,
                df_wisata_features_passed=df_wisata_features,
                feature_columns_ncf_passed=feature_columns,
                cosine_sim_matrix_passed=cosine_sim_matrix,
                df_wisata_source_passed=df_wisata,
                probe_user_raw_ids=probe_user_ids_for_new_example,
                list_kategori_wisata_valid_passed=EXISTING_LIST_KATEGORI_WISATA
            )
        else:
            print("Tidak cukup user di encoder untuk NCF Probe pada Skenario 2.")
    else:
        print("\nModel V2 Final telah dilatih, tetapi tidak ada data user yang valid untuk contoh rekomendasi.")
else:
    print("\nModel V2 Final belum dilatih atau tidak tersedia, contoh rekomendasi tidak dapat dijalankan.")

# Contoh 3: Pengguna baru hanya preferensi kategori (fallback sederhana)
# Ini tidak memerlukan model NCF secara aktif, jadi bisa dijalankan
new_user_prefs_example_2 = ['teknologi', 'sains'] # Gunakan kategori dari EXISTING_LIST_KATEGORI_WISATA
print(f"\n>>> SKENARIO 3 (V2 Final): Rekomendasi untuk pengguna baru, preferensi: {new_user_prefs_example_2} (filter kategori sederhana)")
recs_new_simple_v2 = get_unified_recommendations(
    preferred_categories=new_user_prefs_example_2,
    n=3,
    ncf_model=best_model_tuned_v2 if 'best_model_tuned_v2' in locals() and best_model_tuned_v2 is not None else Model(), # Berikan model dummy jika tidak ada
    user_encoder_passed=user_encoder,
    item_encoder_passed=item_encoder,
    df_user_cleaned_passed=df_user_cleaned, # Diperlukan untuk konsistensi argumen
    df_wisata_features_passed=df_wisata_features, # Diperlukan untuk konsistensi argumen
    feature_columns_ncf_passed=feature_columns, # Diperlukan untuk konsistensi argumen
    cosine_sim_matrix_passed=cosine_sim_matrix, # Diperlukan untuk konsistensi argumen
    df_wisata_source_passed=df_wisata,
    list_kategori_wisata_valid_passed=EXISTING_LIST_KATEGORI_WISATA
)

Trial 15 Complete [00h 00m 47s]
val_mae: 0.16359244287014008

Best val_mae So Far: 0.1254303753376007
Total elapsed time: 00h 28m 35s

        Hyperparameter terbaik (V2 Final) yang ditemukan:
        Embedding Dim: 48
        MLP Unit 1: 48
        MLP Unit 2: 16
        MLP Unit 3: 16
        Feature MLP Unit 1: 32
        Feature MLP Unit 2: 24
        Dropout Rate: 0.350
        Learning Rate: 0.0005
        L2 Reg Factor: 1e-05
        

Melatih model terbaik (V2 Final) dengan hyperparameter yang ditemukan...
Epoch 1/120
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 12ms/step - loss: 0.2002 - mae: 0.3699 - val_loss: 0.0572 - val_mae: 0.1758
Epoch 2/120
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 0.0590 - mae: 0.1659 - val_loss: 0.0553 - val_mae: 0.1562
Epoch 3/120
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 0.0545 - mae: 0.1521 - val_loss: 0.0568 - val_mae: 0.1517
Epoch 4/120
[1m511

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_n_final['ID Tempat'] = top_n_final['item_id_int'].map(item_id_int_to_raw_map)



Top 5 Rekomendasi Gabungan (NCF + Cosine Boost) untuk User 'U000001':
  ID Tempat                                     Nama Wisata  ncf_score_norm  content_affinity_score  combined_score %
0      T020                              Pura Mangkunagaran        0.998111                0.053349         71.468259
1      T044                          Rumah Atsiri Indonesia        0.999681                0.045276         71.335913
2      T016  Museum Manusia Purba Sangiran Klaster Krikilan        0.999228                0.041849         71.201419
3      T009                               Taman Balekambang        0.999547                0.040549         71.184781
4      T043                                     Candi Sukuh        0.999989                0.038272         71.147385

>>> SKENARIO 2 (V2 Final): Rekomendasi untuk pengguna baru, preferensi: ['budaya', 'seni'] (dengan NCF Probe)
Probe User IDs: ['U001505', 'U015015', 'U034974']
Membuat rekomendasi untuk pengguna baru dengan preferensi: [

## Save Model H5


In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Dense, Flatten, Concatenate, Dropout, BatchNormalization # Tambahkan BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers # Import regularizers

# Pastikan KerasTuner sudah terinstal
try:
    import keras_tuner as kt
except ImportError:
    print("KerasTuner not found. Installing...")
    import subprocess
    import sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "keras-tuner", "-q"])
    import keras_tuner as kt
    print("KerasTuner installed successfully.")


# --- Set Seed for Reproducibility ---
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

# --- Load Data ---
# GANTI DENGAN PATH FILE ANDA JIKA BERBEDA
# Pastikan file CSV ada di direktori yang sama atau sesuaikan path.
try:
    df_wisata = pd.read_csv("Cleaned Dataset Item (tambahin feature engineering).csv")
    df_user = pd.read_csv("Cleaned Dataset User.csv")
    print("Dataset berhasil dimuat dari file CSV.")
except FileNotFoundError:
    print("PERINGATAN: File CSV tidak ditemukan. Menggunakan data dummy untuk demonstrasi.")
    print("Pastikan file 'Cleaned Dataset Item (tambahin feature engineering).csv' dan 'Cleaned Dataset User.csv' ada.")
    # Membuat DataFrame dummy jika file tidak ditemukan
    data_wisata = {
        'ID Tempat': [f'item_{i}' for i in range(109)],
        'Nama Wisata': [f'Wisata Dummy {i}' for i in range(109)],
        'Kategori': ['budaya, seni', 'lingkungan', 'sejarah', 'budaya', 'teknologi'] * 20 + ['budaya'] * 9,
        'Kategori Umur': ['Semua Umur', 'Remaja', 'Anak-anak', 'Remaja', 'Semua Umur'] * 20 + ['Semua Umur'] * 9,
        'Deskripsi Cleaned': ['deskripsi dummy ' + str(i) for i in range(109)],
        'Aktivitas Cleaned': ['aktivitas dummy ' + str(i) for i in range(109)],
        'Fasilitas Cleaned': ['fasilitas dummy ' + str(i) for i in range(109)],
        'Overall Rating (Google Maps)': np.random.uniform(3.0, 5.0, 109).round(1),
        'Jumlah Ulasan (Google Maps)': np.random.randint(10, 1000, 109)
    }
    df_wisata = pd.DataFrame(data_wisata)
    # Buat beberapa user dan rating dummy
    user_ids_dummy = [f'user_dummy_{i}' for i in range(50)]
    item_ids_dummy = df_wisata['ID Tempat'].tolist()
    ratings_data_dummy = []
    for user_id in user_ids_dummy:
        num_ratings = random.randint(5, 20)
        rated_items = random.sample(item_ids_dummy, num_ratings)
        for item_id in rated_items:
            ratings_data_dummy.append({'ID User': user_id, 'ID Tempat': item_id, 'rating': random.randint(1, 5)})
    df_user = pd.DataFrame(ratings_data_dummy)

# --- Preprocessing df_user ---
df_user_cleaned = df_user[['ID User', 'ID Tempat', 'rating']].dropna()
df_user_cleaned = df_user_cleaned.drop_duplicates()

user_encoder = LabelEncoder()
item_encoder = LabelEncoder()

# Fit item_encoder pada SEMUA ID Tempat dari df_wisata
item_encoder.fit(df_wisata['ID Tempat'])

# Pastikan semua ID Tempat di df_user_cleaned dikenal oleh item_encoder
df_user_cleaned = df_user_cleaned[df_user_cleaned['ID Tempat'].isin(item_encoder.classes_)]

# Fit user_encoder hanya pada user yang memiliki rating (setelah filter item)
user_encoder.fit(df_user_cleaned['ID User'])

# Pastikan semua ID User di df_user_cleaned dikenal oleh user_encoder
df_user_cleaned = df_user_cleaned[df_user_cleaned['ID User'].isin(user_encoder.classes_)]


# Transform ID User dan ID Tempat ke integer
df_user_cleaned['user_id_int'] = user_encoder.transform(df_user_cleaned['ID User'])
df_user_cleaned['item_id_int'] = item_encoder.transform(df_user_cleaned['ID Tempat'])

num_users = len(user_encoder.classes_)
num_items = len(item_encoder.classes_) # Ini adalah jumlah item unik di df_wisata

# Normalisasi rating
min_rating = df_user_cleaned['rating'].min()
max_rating = df_user_cleaned['rating'].max()
if max_rating == min_rating: # Hindari pembagian dengan nol jika semua rating sama
    df_user_cleaned['rating_norm'] = 0.5 if min_rating > 0 else 0.0
else:
    df_user_cleaned['rating_norm'] = (df_user_cleaned['rating'] - min_rating) / (max_rating - min_rating)

print(f"\nJumlah User Unik setelah cleaning: {num_users}")
print(f"Jumlah Tempat Wisata Unik di df_wisata (digunakan item_encoder): {len(item_encoder.classes_)}")
print(f"Jumlah Tempat Wisata Unik yang memiliki rating di df_user_cleaned: {df_user_cleaned['item_id_int'].nunique()}")


# --- Feature Engineering dan Preprocessing df_wisata ---
# One-hot Encoding untuk kategori
df_encoded_kategori = df_wisata['Kategori'].str.get_dummies(sep=', ')
df_wisata = pd.concat([df_wisata, df_encoded_kategori], axis=1)
df_wisata.drop(columns=['Kategori'], inplace=True)

df_encoded_umur = df_wisata['Kategori Umur'].str.get_dummies(sep=', ')
df_wisata = pd.concat([df_wisata, df_encoded_umur], axis=1)
df_wisata.drop(columns=['Kategori Umur'], inplace=True)

df_wisata['text_features'] = (
    df_wisata['Deskripsi Cleaned'].fillna('') + ' ' +
    df_wisata['Aktivitas Cleaned'].fillna('') + ' ' +
    df_wisata['Fasilitas Cleaned'].fillna('')
)

# TF-IDF dan Cosine Similarity
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(df_wisata['text_features'])
cosine_sim_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Definisikan LIST_KATEGORI_WISATA berdasarkan kolom yang ada setelah get_dummies
ALL_GENERATED_KATEGORI_COLUMNS = list(df_encoded_kategori.columns)
ALL_GENERATED_UMUR_COLUMNS = list(df_encoded_umur.columns)

LIST_KATEGORI_WISATA_DEFINED = [ # Kategori yang secara eksplisit ingin digunakan jika ada
    'budaya', 'kreativitas', 'lingkungan', 'religi', 'sains',
    'sejarah', 'seni', 'teknologi'
]
# Filter hanya kategori yang memang ada di df_wisata setelah one-hot encoding
EXISTING_LIST_KATEGORI_WISATA = [col for col in LIST_KATEGORI_WISATA_DEFINED if col in df_wisata.columns]

feature_columns = (
    ['Overall Rating (Google Maps)'] +
    [col for col in EXISTING_LIST_KATEGORI_WISATA if col in df_wisata.columns] +
    [col for col in ALL_GENERATED_UMUR_COLUMNS if col in df_wisata.columns]
)
feature_columns = sorted(list(set(feature_columns))) # Hapus duplikat dan urutkan

df_wisata_features = df_wisata[['ID Tempat'] + feature_columns].copy()
# Transform ID Tempat di df_wisata_features menggunakan encoder yang sama
df_wisata_features['item_id_int'] = item_encoder.transform(df_wisata_features['ID Tempat'])
df_wisata_features = df_wisata_features.drop(columns=['ID Tempat'])

# Normalisasi 'Overall Rating'
scaler = MinMaxScaler()
if 'Overall Rating (Google Maps)' in df_wisata_features.columns:
    df_wisata_features['Overall Rating (Google Maps)'] = scaler.fit_transform(
        df_wisata_features[['Overall Rating (Google Maps)']]
    )
else:
    print("Peringatan: Kolom 'Overall Rating (Google Maps)' tidak ditemukan di df_wisata_features.")


# --- Menggabungkan Data ---
df_final = pd.merge(df_user_cleaned, df_wisata_features, on='item_id_int', how='left')
df_final.fillna(0, inplace=True) # Isi NaN dengan 0 untuk fitur numerik

# --- Membagi Data ---
train_df, test_df = train_test_split(df_final, test_size=0.2, random_state=SEED)

X_train = {
    'user_input': train_df['user_id_int'].values,
    'item_input': train_df['item_id_int'].values,
    'features_input': train_df[feature_columns].values.astype(np.float32)
}
y_train = train_df['rating_norm'].values.astype(np.float32)

X_test = {
    'user_input': test_df['user_id_int'].values,
    'item_input': test_df['item_id_int'].values,
    'features_input': test_df[feature_columns].values.astype(np.float32)
}
y_test = test_df['rating_norm'].values.astype(np.float32)

num_item_features = len(feature_columns)
print(f"\nJumlah Fitur Item (setelah validasi kolom): {num_item_features}")
print(f"Nama Kolom Fitur: {feature_columns}")


# --- Model Building Function for KerasTuner (dengan L2 Regularization & Batch Norm) ---
def build_hybrid_ncf_model_for_tuner_v2(hp, num_users_static, num_items_static, num_item_features_static):
    """Membangun model Hybrid NCF untuk KerasTuner dengan L2 Regularization dan Batch Norm."""

    # Hyperparameters to tune
    embedding_dim = hp.Int('embedding_dim', min_value=32, max_value=96, step=16)
    mlp_units_1 = hp.Int('mlp_units_1', min_value=32, max_value=96, step=16)
    mlp_units_2 = hp.Int('mlp_units_2', min_value=16, max_value=64, step=16)
    mlp_units_3 = hp.Int('mlp_units_3', min_value=8, max_value=32, step=8)
    dropout_rate = hp.Float('dropout_rate', min_value=0.2, max_value=0.5, step=0.05) # Penyesuaian step
    learning_rate = hp.Choice('learning_rate', values=[1e-3, 5e-4, 1e-4])

    feature_mlp_units_1 = hp.Int('feature_mlp_units_1', min_value=16, max_value=48, step=16)
    feature_mlp_units_2 = hp.Int('feature_mlp_units_2', min_value=8, max_value=32, step=8)

    l2_reg_factor = hp.Choice('l2_reg_factor', values=[1e-4, 1e-5, 1e-6, 0.0])

    # Input Layers
    user_input_layer = Input(shape=(1,), name='user_input')
    item_input_layer = Input(shape=(1,), name='item_input')
    features_input_layer = Input(shape=(num_item_features_static,), name='features_input')

    # Embedding Layers
    user_embedding_layer = Embedding(
        input_dim=num_users_static,
        output_dim=embedding_dim,
        name='user_embedding',
        embeddings_regularizer=regularizers.l2(l2_reg_factor)
    )(user_input_layer)
    item_embedding_layer = Embedding(
        input_dim=num_items_static, # Seharusnya num_items_static (jumlah item unik dari encoder)
        output_dim=embedding_dim,
        name='item_embedding',
        embeddings_regularizer=regularizers.l2(l2_reg_factor)
    )(item_input_layer)

    user_vec = Flatten(name='flatten_user')(user_embedding_layer)
    item_vec = Flatten(name='flatten_item')(item_embedding_layer)

    # MLP untuk Fitur Item
    feature_mlp = Dense(
        feature_mlp_units_1, activation='relu', kernel_regularizer=regularizers.l2(l2_reg_factor)
    )(features_input_layer)
    feature_mlp = Dropout(dropout_rate)(feature_mlp)
    feature_mlp = Dense(
        feature_mlp_units_2, activation='relu', kernel_regularizer=regularizers.l2(l2_reg_factor)
    )(feature_mlp)

    concat_layer = Concatenate()([user_vec, item_vec, feature_mlp])
    concat_bn = BatchNormalization()(concat_layer)

    # MLP Layers
    mlp = Dense(
        mlp_units_1, activation='relu', kernel_regularizer=regularizers.l2(l2_reg_factor)
    )(concat_bn)
    mlp = BatchNormalization()(mlp)
    mlp = Dropout(dropout_rate)(mlp)

    mlp = Dense(
        mlp_units_2, activation='relu', kernel_regularizer=regularizers.l2(l2_reg_factor)
    )(mlp)
    mlp = BatchNormalization()(mlp)
    mlp = Dropout(dropout_rate)(mlp)

    mlp = Dense(
        mlp_units_3, activation='relu', kernel_regularizer=regularizers.l2(l2_reg_factor)
    )(mlp)

    output_layer = Dense(1, activation='sigmoid', name='output')(mlp)

    model = Model(inputs=[user_input_layer, item_input_layer, features_input_layer], outputs=output_layer)
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='mean_squared_error',
                  metrics=['mae'])
    return model

# --- Hyperparameter Tuning Setup ---
if num_users == 0 or num_items == 0:
    print("Peringatan: num_users atau num_items adalah nol. Embedding layer mungkin error.")
    best_model_tuned_v2 = None # Inisialisasi jika tidak bisa lanjut
else:
    build_fn_with_static_args_v2 = lambda hp: build_hybrid_ncf_model_for_tuner_v2(
        hp,
        num_users_static=num_users,
        num_items_static=num_items, # Pastikan ini adalah jumlah item unik dari item_encoder
        num_item_features_static=num_item_features
    )

    tuner_v2 = kt.RandomSearch(
        build_fn_with_static_args_v2,
        objective='val_mae',
        max_trials=15,  # Kurangi sedikit untuk iterasi lebih cepat, bisa dinaikkan lagi
        executions_per_trial=1,
        directory='ncf_tuning_v2_final',
        project_name='hybrid_ncf_reg_bn_final',
        overwrite=True
    )

    tuner_v2.search_space_summary()

    early_stopping_cb = tf.keras.callbacks.EarlyStopping(
        monitor='val_mae',
        patience=10,
        restore_best_weights=True,
        verbose=1
    )

    print("\nMemulai Hyperparameter Tuning (Versi 2 dengan Regularisasi & Batch Norm)...")
    if X_train['user_input'].shape[0] > 0 and y_train.shape[0] > 0:
        tuner_v2.search(
            X_train,
            y_train,
            epochs=60, # Naikkan sedikit epoch per trial
            validation_data=(X_test, y_test),
            callbacks=[early_stopping_cb],
            batch_size=128,
            verbose=1
        )

        best_hps_v2 = tuner_v2.get_best_hyperparameters(num_trials=1)[0]

        print(f"""
        Hyperparameter terbaik (V2 Final) yang ditemukan:
        Embedding Dim: {best_hps_v2.get('embedding_dim')}
        MLP Unit 1: {best_hps_v2.get('mlp_units_1')}
        MLP Unit 2: {best_hps_v2.get('mlp_units_2')}
        MLP Unit 3: {best_hps_v2.get('mlp_units_3')}
        Feature MLP Unit 1: {best_hps_v2.get('feature_mlp_units_1')}
        Feature MLP Unit 2: {best_hps_v2.get('feature_mlp_units_2')}
        Dropout Rate: {best_hps_v2.get('dropout_rate'):.3f}
        Learning Rate: {best_hps_v2.get('learning_rate')}
        L2 Reg Factor: {best_hps_v2.get('l2_reg_factor')}
        """)

        best_model_tuned_v2 = tuner_v2.hypermodel.build(best_hps_v2)

        print("\nMelatih model terbaik (V2 Final) dengan hyperparameter yang ditemukan...")
        final_early_stopping_cb = tf.keras.callbacks.EarlyStopping(
            monitor='val_mae', patience=15, restore_best_weights=True, verbose=1
        )

        history_best_tuned_v2 = best_model_tuned_v2.fit(
            X_train,
            y_train,
            batch_size=128,
            epochs=120, # Latih lebih lama untuk model final
            verbose=1,
            validation_data=(X_test, y_test),
            callbacks=[final_early_stopping_cb]
        )

        loss_tuned_v2, mae_tuned_v2 = best_model_tuned_v2.evaluate(X_test, y_test, verbose=0)
        print(f"\nModel Tuned (V2 Final) - Test Loss: {loss_tuned_v2:.4f}")
        print(f"Model Tuned (V2 Final) - Test MAE: {mae_tuned_v2:.4f}")

        # Simpan model
        # best_model_tuned_v2.save("best_hybrid_ncf_model_v2_final.keras")
        # print("Model terbaik (V2 Final) disimpan sebagai 'best_hybrid_ncf_model_v2_final.keras'")
    else:
        print("Tidak ada data training yang cukup untuk memulai tuning V2.")
        best_model_tuned_v2 = None


# --- Fungsi Rekomendasi (Sama seperti sebelumnya, hanya pastikan menggunakan model yang tepat) ---
def normalize_series_min_max(series):
    min_val = series.min()
    max_val = series.max()
    if max_val == min_val:
        return pd.Series(np.zeros_like(series, dtype=float), index=series.index) if min_val == 0 else pd.Series(np.ones_like(series, dtype=float), index=series.index)
    return (series - min_val) / (max_val - min_val)

def get_unified_recommendations(
    raw_user_id=None,
    preferred_categories=None,
    n=10,
    ncf_model=None,
    user_encoder_passed=None,
    item_encoder_passed=None,
    df_user_cleaned_passed=None,
    df_wisata_features_passed=None,
    feature_columns_ncf_passed=None,
    cosine_sim_matrix_passed=None,
    df_wisata_source_passed=None, # df_wisata asli untuk info detail & filter kategori
    k_existing=20,
    liked_rating_threshold=4.0, # disesuaikan dengan skala rating asli
    ncf_weight=0.7,
    content_weight=0.3,
    probe_user_raw_ids=None,
    weight_google_rating_new=0.5,
    weight_ncf_appeal_new=0.5,
    list_kategori_wisata_valid_passed=None # Untuk memastikan kategori valid
):
    if ncf_model is None:
        print("Error: Model NCF belum dilatih atau tidak disediakan.")
        return pd.DataFrame()
    if user_encoder_passed is None or item_encoder_passed is None or df_user_cleaned_passed is None or \
       df_wisata_features_passed is None or feature_columns_ncf_passed is None or df_wisata_source_passed is None or \
       list_kategori_wisata_valid_passed is None:
        print("Error: Satu atau lebih dataframes/encoders/list kategori penting tidak disediakan.")
        return pd.DataFrame()

    is_known_user_with_reviews = False
    user_id_int = -1

    if raw_user_id:
        try:
            user_id_int = user_encoder_passed.transform([raw_user_id])[0]
            if not df_user_cleaned_passed[df_user_cleaned_passed['user_id_int'] == user_id_int].empty:
                is_known_user_with_reviews = True
            else:
                print(f"Info: User ID '{raw_user_id}' dikenal encoder, tapi tidak ada riwayat review.")
        except ValueError:
            print(f"Info: User ID '{raw_user_id}' tidak dikenal encoder.")

    # --- Jalur 1: Pengguna Dikenal dan Punya Riwayat Review ---
    if is_known_user_with_reviews:
        print(f"Membuat rekomendasi untuk pengguna yang sudah dikenal: {raw_user_id}")
        items_rated_by_user_int = df_user_cleaned_passed[df_user_cleaned_passed['user_id_int'] == user_id_int]['item_id_int'].unique()
        all_possible_item_ids_int = df_wisata_features_passed['item_id_int'].unique()
        items_to_predict_int = np.setdiff1d(all_possible_item_ids_int, items_rated_by_user_int, assume_unique=True)

        if len(items_to_predict_int) == 0:
            print(f"User '{raw_user_id}' sudah memberi rating semua item atau tidak ada item lain untuk diprediksi.")
            return pd.DataFrame()

        items_to_predict_df = pd.DataFrame({'item_id_int': items_to_predict_int})
        batch_features_df = pd.merge(items_to_predict_df, df_wisata_features_passed, on='item_id_int', how='left')
        batch_features_df.fillna(0, inplace=True) # Pastikan fitur diisi jika ada NaN
        features_input_batch = batch_features_df[feature_columns_ncf_passed].values.astype(np.float32)

        model_input_batch = {
            'user_input': np.full(len(items_to_predict_int), user_id_int),
            'item_input': items_to_predict_int,
            'features_input': features_input_batch
        }
        predicted_norm_ratings_batch = ncf_model.predict(model_input_batch, verbose=0).flatten()

        ncf_results_df = pd.DataFrame({
            'item_id_int': items_to_predict_int,
            'ncf_score_norm': predicted_norm_ratings_batch
        })
        ncf_top_k_candidates = ncf_results_df.sort_values(by='ncf_score_norm', ascending=False).head(k_existing)

        liked_items_df = df_user_cleaned_passed[
            (df_user_cleaned_passed['user_id_int'] == user_id_int) &
            (df_user_cleaned_passed['rating'] >= liked_rating_threshold) # Gunakan rating asli
        ]
        liked_item_ids_int = liked_items_df['item_id_int'].unique()

        content_affinity_scores = []
        if len(liked_item_ids_int) > 0 and cosine_sim_matrix_passed is not None:
            for candidate_item_id_int in ncf_top_k_candidates['item_id_int']:
                if not (0 <= candidate_item_id_int < cosine_sim_matrix_passed.shape[0]):
                    avg_sim = 0.0
                else:
                    valid_liked_indices = liked_item_ids_int[(liked_item_ids_int >= 0) & (liked_item_ids_int < cosine_sim_matrix_passed.shape[1])]
                    if len(valid_liked_indices) > 0:
                        sim_scores_for_candidate = cosine_sim_matrix_passed[candidate_item_id_int, valid_liked_indices]
                        sim_scores_for_candidate = sim_scores_for_candidate[np.isfinite(sim_scores_for_candidate)]
                        avg_sim = np.mean(sim_scores_for_candidate) if len(sim_scores_for_candidate) > 0 else 0.0
                    else: avg_sim = 0.0
                content_affinity_scores.append(avg_sim)
        else:
            content_affinity_scores = [0.0] * len(ncf_top_k_candidates)

        ncf_top_k_candidates['content_affinity_score'] = content_affinity_scores
        ncf_top_k_candidates['combined_score %'] = (
            (ncf_weight * ncf_top_k_candidates['ncf_score_norm'] +
            content_weight * ncf_top_k_candidates['content_affinity_score']) * 100
        )
        final_recommendations_df = ncf_top_k_candidates.sort_values(by='combined_score %', ascending=False)
        top_n_final = final_recommendations_df.head(n)

        # Map item_id_int kembali ke ID Tempat asli
        item_id_int_to_raw_map = pd.Series(item_encoder_passed.classes_, index=item_encoder_passed.transform(item_encoder_passed.classes_))
        top_n_final['ID Tempat'] = top_n_final['item_id_int'].map(item_id_int_to_raw_map)

        recommendations = pd.merge(top_n_final, df_wisata_source_passed[['ID Tempat', 'Nama Wisata']], on='ID Tempat', how='left')
        print_columns = ['ID Tempat', 'Nama Wisata', 'ncf_score_norm', 'content_affinity_score', 'combined_score %']
        print(f"\nTop {n} Rekomendasi Gabungan (NCF + Cosine Boost) untuk User '{raw_user_id}':")
        print(recommendations[[col for col in print_columns if col in recommendations.columns]].to_string())
        return recommendations

    # --- Jalur 2: Pengguna Baru/Tidak Dikenal dengan Preferensi Kategori (Menggunakan NCF Probe) ---
    elif preferred_categories and probe_user_raw_ids:
        print(f"Membuat rekomendasi untuk pengguna baru dengan preferensi: {preferred_categories} (NCF probe)")
        valid_preferred_categories = [cat for cat in preferred_categories if cat in df_wisata_source_passed.columns and cat in list_kategori_wisata_valid_passed]
        if not valid_preferred_categories:
            print(f"Tidak ada kategori valid dari preferensi: {preferred_categories}")
            return pd.DataFrame()

        category_match_mask = df_wisata_source_passed[valid_preferred_categories].sum(axis=1) > 0
        candidate_wisata_df = df_wisata_source_passed[category_match_mask].copy()

        if candidate_wisata_df.empty:
            print(f"Tidak ditemukan tempat wisata yang cocok dengan kategori: {valid_preferred_categories}")
            return pd.DataFrame()

        if 'Overall Rating (Google Maps)' in candidate_wisata_df.columns:
             candidate_wisata_df['google_rating_norm'] = normalize_series_min_max(candidate_wisata_df['Overall Rating (Google Maps)'])
        else:
             candidate_wisata_df['google_rating_norm'] = 0.0

        valid_probe_user_ids_int = []
        for raw_id_probe in probe_user_raw_ids:
            try:
                valid_probe_user_ids_int.append(user_encoder_passed.transform([raw_id_probe])[0])
            except ValueError:
                print(f"Warning: Probe user ID '{raw_id_probe}' tidak dikenal oleh user_encoder.")

        if not valid_probe_user_ids_int:
            print("Warning: Tidak ada probe user ID yang valid. NCF appeal score akan 0.")
            candidate_wisata_df['avg_ncf_appeal_score'] = 0.0
        else:
            avg_ncf_appeal_scores = []
            for index, row_cand in candidate_wisata_df.iterrows():
                item_id_raw_cand = row_cand['ID Tempat']
                try:
                    item_id_int_cand = item_encoder_passed.transform([item_id_raw_cand])[0]
                except ValueError:
                    avg_ncf_appeal_scores.append(0.0); continue

                item_features_series = df_wisata_features_passed[df_wisata_features_passed['item_id_int'] == item_id_int_cand]
                if item_features_series.empty:
                    avg_ncf_appeal_scores.append(0.0); continue
                item_features_array_cand = item_features_series[feature_columns_ncf_passed].iloc[[0]].values.astype(np.float32)

                ncf_predictions_for_item = []
                for probe_user_id_int_curr in valid_probe_user_ids_int:
                    model_input_probe = {
                        'user_input': np.array([probe_user_id_int_curr]),
                        'item_input': np.array([item_id_int_cand]),
                        'features_input': item_features_array_cand
                    }
                    pred = ncf_model.predict(model_input_probe, verbose=0)[0][0]
                    ncf_predictions_for_item.append(pred)
                avg_ncf_appeal_scores.append(np.mean(ncf_predictions_for_item) if ncf_predictions_for_item else 0.0)
            candidate_wisata_df['avg_ncf_appeal_score'] = avg_ncf_appeal_scores

        candidate_wisata_df['final_score %'] = (
            (weight_google_rating_new * candidate_wisata_df['google_rating_norm'].fillna(0) +
            weight_ncf_appeal_new * candidate_wisata_df['avg_ncf_appeal_score']) * 100
        )
        ranked_wisata = candidate_wisata_df.sort_values(by='final_score %', ascending=False).head(n)

        print_columns_new = ['ID Tempat', 'Nama Wisata', 'Overall Rating (Google Maps)', 'google_rating_norm', 'avg_ncf_appeal_score', 'final_score %'] + valid_preferred_categories
        recommendations = ranked_wisata[[col for col in print_columns_new if col in ranked_wisata.columns]]
        print(f"\nTop {n} Rekomendasi Hybrid (Kategori + NCF Probe) untuk Pengguna Baru:")
        print(recommendations.to_string())
        return recommendations

    # --- Jalur 3: Pengguna Baru hanya dengan Preferensi Kategori (tanpa NCF probe/fallback sederhana) ---
    elif preferred_categories:
        print(f"Membuat rekomendasi untuk pengguna baru HANYA berdasarkan kategori: {preferred_categories} (tanpa NCF probe).")
        valid_preferred_categories = [cat for cat in preferred_categories if cat in df_wisata_source_passed.columns and cat in list_kategori_wisata_valid_passed]
        if not valid_preferred_categories:
            print(f"Tidak ada kategori valid dari preferensi: {preferred_categories}")
            return pd.DataFrame()

        category_match_mask = df_wisata_source_passed[valid_preferred_categories].sum(axis=1) > 0
        filtered_wisata = df_wisata_source_passed[category_match_mask].copy()

        if filtered_wisata.empty:
            print(f"Tidak ditemukan tempat wisata yang cocok dengan kategori: {valid_preferred_categories}")
            return pd.DataFrame()

        sort_cols = []
        ascending_orders = []
        if 'Overall Rating (Google Maps)' in filtered_wisata.columns:
            sort_cols.append('Overall Rating (Google Maps)')
            ascending_orders.append(False)
        if 'Jumlah Ulasan (Google Maps)' in filtered_wisata.columns: # Jika ada kolom ini
            sort_cols.append('Jumlah Ulasan (Google Maps)')
            ascending_orders.append(False)

        if not sort_cols: # Jika tidak ada kolom rating/ulasan, ambil saja head
             ranked_wisata = filtered_wisata.head(n)
        else:
            ranked_wisata = filtered_wisata.sort_values(
                by=sort_cols, ascending=ascending_orders
            ).head(n)

        print_cols_to_check = ['ID Tempat', 'Nama Wisata', 'Overall Rating (Google Maps)', 'Jumlah Ulasan (Google Maps)'] + valid_preferred_categories
        recommendations = ranked_wisata[[col for col in print_cols_to_check if col in ranked_wisata.columns]]
        print(f"\nTop {n} Rekomendasi Berdasarkan Kategori (Simple Filter):")
        print(recommendations.to_string())
        return recommendations
    else:
        print("Tidak ada User ID atau preferensi kategori yang diberikan. Tidak dapat membuat rekomendasi.")
        return pd.DataFrame()


# --- Contoh Penggunaan dengan Model yang Sudah Di-tuning (V2 Final) ---
if 'best_model_tuned_v2' in locals() and best_model_tuned_v2 is not None:
    if not df_user_cleaned.empty and num_users > 0:
        # Contoh 1: Pengguna yang sudah dikenal
        if len(user_encoder.classes_) > 0:
            known_user_id_example = user_encoder.classes_[0]
            print(f"\n>>> SKENARIO 1 (V2 Final): Rekomendasi untuk pengguna dikenal '{known_user_id_example}'")
            recs_known_v2 = get_unified_recommendations(
                raw_user_id=known_user_id_example,
                n=5,
                ncf_model=best_model_tuned_v2,
                user_encoder_passed=user_encoder,
                item_encoder_passed=item_encoder,
                df_user_cleaned_passed=df_user_cleaned,
                df_wisata_features_passed=df_wisata_features,
                feature_columns_ncf_passed=feature_columns,
                cosine_sim_matrix_passed=cosine_sim_matrix,
                df_wisata_source_passed=df_wisata,
                liked_rating_threshold=3.5, # Sesuaikan threshold rating asli
                list_kategori_wisata_valid_passed=EXISTING_LIST_KATEGORI_WISATA
            )
        else:
            print("Tidak ada user yang dikenal di user_encoder untuk Skenario 1.")

        # Contoh 2: Pengguna baru dengan preferensi kategori (menggunakan NCF probe)
        new_user_prefs_example_1 = ['budaya', 'seni'] # Gunakan kategori dari EXISTING_LIST_KATEGORI_WISATA
        num_samples_for_probe = min(3, len(user_encoder.classes_))
        if num_samples_for_probe > 0 :
            probe_user_ids_for_new_example = random.sample(list(user_encoder.classes_), num_samples_for_probe)
            print(f"\n>>> SKENARIO 2 (V2 Final): Rekomendasi untuk pengguna baru, preferensi: {new_user_prefs_example_1} (dengan NCF Probe)")
            print(f"Probe User IDs: {probe_user_ids_for_new_example}")
            recs_new_ncf_probe_v2 = get_unified_recommendations(
                preferred_categories=new_user_prefs_example_1,
                n=5,
                ncf_model=best_model_tuned_v2,
                user_encoder_passed=user_encoder,
                item_encoder_passed=item_encoder,
                df_user_cleaned_passed=df_user_cleaned,
                df_wisata_features_passed=df_wisata_features,
                feature_columns_ncf_passed=feature_columns,
                cosine_sim_matrix_passed=cosine_sim_matrix,
                df_wisata_source_passed=df_wisata,
                probe_user_raw_ids=probe_user_ids_for_new_example,
                list_kategori_wisata_valid_passed=EXISTING_LIST_KATEGORI_WISATA
            )
        else:
            print("Tidak cukup user di encoder untuk NCF Probe pada Skenario 2.")
    else:
        print("\nModel V2 Final telah dilatih, tetapi tidak ada data user yang valid untuk contoh rekomendasi.")
else:
    print("\nModel V2 Final belum dilatih atau tidak tersedia, contoh rekomendasi tidak dapat dijalankan.")

# Contoh 3: Pengguna baru hanya preferensi kategori (fallback sederhana)
# Ini tidak memerlukan model NCF secara aktif, jadi bisa dijalankan
new_user_prefs_example_2 = ['teknologi', 'sains'] # Gunakan kategori dari EXISTING_LIST_KATEGORI_WISATA
print(f"\n>>> SKENARIO 3 (V2 Final): Rekomendasi untuk pengguna baru, preferensi: {new_user_prefs_example_2} (filter kategori sederhana)")
recs_new_simple_v2 = get_unified_recommendations(
    preferred_categories=new_user_prefs_example_2,
    n=3,
    ncf_model=best_model_tuned_v2 if 'best_model_tuned_v2' in locals() and best_model_tuned_v2 is not None else Model(), # Berikan model dummy jika tidak ada
    user_encoder_passed=user_encoder,
    item_encoder_passed=item_encoder,
    df_user_cleaned_passed=df_user_cleaned, # Diperlukan untuk konsistensi argumen
    df_wisata_features_passed=df_wisata_features, # Diperlukan untuk konsistensi argumen
    feature_columns_ncf_passed=feature_columns, # Diperlukan untuk konsistensi argumen
    cosine_sim_matrix_passed=cosine_sim_matrix, # Diperlukan untuk konsistensi argumen
    df_wisata_source_passed=df_wisata,
    list_kategori_wisata_valid_passed=EXISTING_LIST_KATEGORI_WISATA
)


# Save the best model
if 'best_model_tuned_v2' in locals() and best_model_tuned_v2 is not None:
    best_model_tuned_v2.save("best_hybrid_ncf_model.h5")
    print("Model saved as 'best_hybrid_ncf_model.h5'")

    # Save the encoders and other necessary data
    import pickle

    model_data = {
        'user_encoder': user_encoder,
        'item_encoder': item_encoder,
        'feature_columns': feature_columns,
        'df_wisata': df_wisata,
        'df_wisata_features': df_wisata_features,
        'df_user_cleaned': df_user_cleaned,
        'cosine_sim_matrix': cosine_sim_matrix,
        'EXISTING_LIST_KATEGORI_WISATA': EXISTING_LIST_KATEGORI_WISATA
    }

    with open('model_data.pkl', 'wb') as f:
        pickle.dump(model_data, f)
    print("Auxiliary model data saved as 'model_data.pkl'")

Trial 15 Complete [00h 01m 03s]
val_mae: 0.16356784105300903

Best val_mae So Far: 0.12522290647029877
Total elapsed time: 00h 29m 47s

        Hyperparameter terbaik (V2 Final) yang ditemukan:
        Embedding Dim: 96
        MLP Unit 1: 48
        MLP Unit 2: 16
        MLP Unit 3: 8
        Feature MLP Unit 1: 48
        Feature MLP Unit 2: 16
        Dropout Rate: 0.350
        Learning Rate: 0.001
        L2 Reg Factor: 0.0001
        

Melatih model terbaik (V2 Final) dengan hyperparameter yang ditemukan...
Epoch 1/120
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 17ms/step - loss: 0.2777 - mae: 0.3515 - val_loss: 0.0960 - val_mae: 0.1705
Epoch 2/120
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 0.0896 - mae: 0.1589 - val_loss: 0.0956 - val_mae: 0.1511
Epoch 3/120
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 0.0858 - mae: 0.1410 - val_loss: 0.1038 - val_mae: 0.1442
Epoch 4/120
[1m511

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_n_final['ID Tempat'] = top_n_final['item_id_int'].map(item_id_int_to_raw_map)



Top 5 Rekomendasi Gabungan (NCF + Cosine Boost) untuk User 'U000001':
  ID Tempat              Nama Wisata  ncf_score_norm  content_affinity_score  combined_score %
0      T002     Kampung Batik Kauman        0.997911                0.752798         92.437701
1      T004  Museum Batik Danar Hadi        0.997249                0.639852         89.002947
2      T020       Pura Mangkunagaran        0.999615                0.053349         71.573502
3      T081      Girimanik Waterfall        0.997724                0.052146         71.405032
4      T049        Kemuning Skyhills        0.997070                0.051111         71.328206

>>> SKENARIO 2 (V2 Final): Rekomendasi untuk pengguna baru, preferensi: ['budaya', 'seni'] (dengan NCF Probe)
Probe User IDs: ['U001505', 'U015015', 'U034974']
Membuat rekomendasi untuk pengguna baru dengan preferensi: ['budaya', 'seni'] (NCF probe)





Top 5 Rekomendasi Hybrid (Kategori + NCF Probe) untuk Pengguna Baru:
   ID Tempat                   Nama Wisata  Overall Rating (Google Maps)  google_rating_norm  avg_ncf_appeal_score  final_score %  budaya  seni
23      T024         Museum Astana Oentara                           4.9               1.000              0.961869      98.093441       1     0
10      T011        Tumurun Private Museum                           4.8               0.875              0.998430      93.671492       1     1
20      T021              Museum Lokananta                           4.8               0.875              0.973669      92.433453       1     1
90      T091  Masjid Agung Al-Aqsha Klaten                           4.8               0.875              0.952536      91.376781       0     1
19      T020            Pura Mangkunagaran                           4.7               0.750              0.993062      87.153092       1     0

>>> SKENARIO 3 (V2 Final): Rekomendasi untuk pengguna baru, prefe