In [1]:
import numpy as np
import pandas as pd
import pickle
import ast
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
hotel = pd.read_csv("/content/Hotel DATASET.csv")
booking = pd.read_csv("/content/Booking.csv")

In [3]:
user_id = 1
top_n = 10

In [4]:
#Load the pickle models at the top to avoid redundant loading
with open('scaler_model.pkl', 'rb') as f:
    scaler = pickle.load(f)

In [5]:
#NEW USER
def get_best_hotel_by_new(user_id, top_n):
    hotel_copy = hotel.copy()  # Create a copy of the hotel dataset

    # Normalize rating, penjualan_3months, and price using the pre-trained scaler
    columns_to_normalize = ['rating', 'penjualan_3months', 'price']
    hotel_copy[columns_to_normalize] = scaler.transform(hotel_copy[columns_to_normalize])

    # Normalize 'jarak' separately using a new scaler
    jarak_scaler = MinMaxScaler()
    hotel_copy['jarak'] = jarak_scaler.fit_transform(hotel_copy[['jarak']])

    # Invert 'jarak' and 'price' so that lower values are better
    hotel_copy['jarak_new'] = 1 - hotel_copy['jarak']
    hotel_copy['price_new'] = 1 - hotel_copy['price']

    # Calculate the overall score using weighted features
    weights = {'rating': 0.25, 'penjualan_3months': 0.25, 'jarak_new': 0.3, 'price_new': 0.2}
    hotel_copy['score_new'] = (
        hotel_copy['rating'] * weights['rating'] +
        hotel_copy['penjualan_3months'] * weights['penjualan_3months'] +
        hotel_copy['jarak_new'] * weights['jarak_new'] +
        hotel_copy['price_new'] * weights['price_new']
    )

    # Sort the hotels based on the new score and return the top N recommendations
    recommended_hotel = hotel_copy.sort_values(by=['score_new', 'name'], ascending=[False, True]).head(top_n)
    recommended_names = recommended_hotel['name'].tolist()  # Extract the recommended hotel names

    # Retrieve and return detailed data for the recommended hotels
    all_data_for_recommended_hotel = hotel[hotel['name'].isin(recommended_names)]
    all_data_for_recommended_hotel = all_data_for_recommended_hotel.set_index('name').loc[recommended_hotel['name']].reset_index()

    return all_data_for_recommended_hotel
    #return recommended_hotel[['name', 'city', 'score_new', 'jarak_new', 'rating', 'penjualan_3months']]

In [6]:
#OLD USER

def recommend_by_city_and_similarity(user_id, top_n):
    user_booking = booking[booking['user_id'] == user_id].copy()
    user_booking = user_booking[user_booking['hotel_id'].notnull()]
    user_booking = user_booking.sort_values(by='booking_date', ascending=False)

    # Hitung persebaran frekuensi kota favorit berdasarkan booking sebelumnya
    city_counts = user_booking.merge(hotel, on='hotel_id')['city'].value_counts(normalize=True)

    # Ambil data hotel yang belum dikunjungi
    visited_hotel_ids = user_booking['hotel_id'].tolist()
    unseen_hotel = hotel[~hotel['hotel_id'].isin(visited_hotel_ids)].copy()

    # Normalisasi fitur 'rating', 'penjualan_3months', dan 'price' menggunakan scaler yang disimpan
    unseen_hotel[['rating', 'penjualan_3months', 'price']] = scaler.transform(unseen_hotel[['rating', 'penjualan_3months', 'price']])

    # Normalisasi 'jarak' secara terpisah
    jarak_scaler = MinMaxScaler()
    unseen_hotel['jarak'] = jarak_scaler.fit_transform(unseen_hotel[['jarak']])

    # Inversi 'jarak' dan 'price'
    unseen_hotel['jarak'] = 1 - unseen_hotel['jarak']  # Jarak lebih dekat mendapatkan skor lebih tinggi
    unseen_hotel['price'] = 1 - unseen_hotel['price']  # Harga lebih rendah mendapatkan skor lebih tinggi

    # Hitung kemiripan berdasarkan cosine similarity
    unseen_features = unseen_hotel[['rating', 'penjualan_3months', 'price', 'jarak']].values
    seen_hotel = hotel[hotel['hotel_id'].isin(visited_hotel_ids)].copy()
    seen_features = seen_hotel[['rating', 'penjualan_3months', 'price', 'jarak']].values
    similarity_scores = cosine_similarity(seen_features, unseen_features)
    unseen_hotel['similarity_score'] = similarity_scores.mean(axis=0)

    # Tambahkan skor kota favorit
    unseen_hotel['city_rank'] = unseen_hotel['city'].apply(lambda city: city_counts.get(city, 0))

    # Buat rekomendasi per kota sesuai proporsi
    recommendations = []
    for city, proportion in city_counts.items():
        city_hotels = unseen_hotel[unseen_hotel['city'] == city].copy()
        city_count = int(round(proportion * top_n))
        if city_count > 0:
            city_hotels = city_hotels.sort_values(by=['similarity_score', 'rating'], ascending=[False, False])
            recommendations.append(city_hotels.head(city_count))

    # Gabungkan rekomendasi dari semua kota
    recommended_hotel = pd.concat(recommendations).head(top_n)
    recommended_names = recommended_hotel['name'].tolist()
    all_data_for_recommended_hotel = hotel[hotel['name'].isin(recommended_names)]
    all_data_for_recommended_hotel = all_data_for_recommended_hotel.set_index('name').loc[recommended_hotel['name']].reset_index()

    return all_data_for_recommended_hotel
    #return recommended_hotel[['name', 'city', 'rating', 'price', 'jarak', 'similarity_score']]

In [8]:
#PENGGABUNGAN FUNGSI

def combined_recommendation(user_id, top_n):
    #Cek apakah pengguna memiliki booking
    recent_hotel = booking[booking['user_id'] == user_id].copy()

    #Jika tidak ada riwayat, gunakan rekomendasi berdasarkan kategori
    if recent_hotel.empty:
        print("Tidak ada booking untuk user ini. Menggunakan rekomendasi berdasarkan kategori.")
        recommended_hotel = get_best_hotel_by_new(user_id, top_n)
    else:
        #Jika ada booking, gunakan rekomendasi berdasarkan deskripsi
        print("Booking ditemukan. Menggunakan rekomendasi berdasarkan kemiripan deskripsi.")
        recommended_hotel = recommend_by_city_and_similarity(user_id, top_n)

    return recommended_hotel

In [9]:
# Contoh penggunaan fungsi gabungan
recommended_hotel = combined_recommendation(user_id, top_n)
print(recommended_hotel)

Booking ditemukan. Menggunakan rekomendasi berdasarkan kemiripan deskripsi.
                                                name  hotel_id      city  \
0                  OYO 91473 Wisma Husada 1 Semarang       119  Semarang   
1                           Sleep and Sleep Semarang       126  Semarang   
2                OYO 90758 D Kost In Kandri Semarang       107  Semarang   
3               The Backpacker Simpang Lima Semarang       105  Semarang   
4                                 SPOT ON 2281 Omajo       108  Semarang   
5                                Wisma Purba Danarta       122  Semarang   
6                                  InnJoy Guesthouse       129  Semarang   
7                       OYO 1808 New Bandungan Indah       135  Semarang   
8              OYO 2562 Pondok Tajlibu Syifa Syariah        66   Bandung   
9  Lape Resort Syariah at Desa Wisata Pulau Untun...         5   Jakarta   

                                                 des     price  rating  \
0  OYO 91473 