In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse.linalg import svds
import json
from itertools import chain
from rich import print
import sys
import os
import utils

# ƒê∆∞·ªùng d·∫´n ƒë·∫øn th∆∞ m·ª•c mini_model
mini_model_path = os.path.abspath(os.path.join(os.getcwd(), ''))

# L·∫•y danh s√°ch t·∫•t c·∫£ th∆∞ m·ª•c con trong mini_model
for folder in os.listdir(mini_model_path):
    folder_path = os.path.join(mini_model_path, folder)
    # Ch·ªâ th√™m n·∫øu l√† th∆∞ m·ª•c v√† kh√¥ng ph·∫£i file
    if os.path.isdir(folder_path):
        if folder_path not in sys.path:
            sys.path.append(folder_path)

import function_get_score_amenities as fga
import function_get_score_rate as fgr
import CF_user_review.CF_user_scores as cfus
import recommenr_review_data as rhr
import review_quality
import recommend_location as rl
import recommend_polices as rp
import recommed_room as rr
import function_get_score_price as fgsp


In [2]:
from pathlib import Path
import config
DEFAULT_BASE_DIR = Path("D:/graduate_dissertation/final")

In [3]:
with open(config.get_path("feature_sub_rate.json"), 'r', encoding='utf-8') as f:
    feature_sub_rate = json.load(f)

with open(config.get_path("weights_criteria_utilities.json"), 'r', encoding='utf-8') as f:
    weights_criteria_utilities = json.load(f)

with open(config.get_path("feature_popular_facilities.json"), 'r', encoding='utf-8') as f:
    feature_popular_facilities = json.load(f)
    
with open(config.get_path("feature_facilities.json"), 'r', encoding='utf-8') as f:
    feature_facilities = json.load(f)

with open(config.get_path("feature_url.json"), 'r', encoding='utf-8') as f:
    feature_url = json.load(f)

with open(config.get_path("feature_allRoom.json"), 'r', encoding='utf-8') as f:
    feature_allRoom = json.load(f)
    
with open(config.get_path("review_processing.json"), 'r', encoding='utf-8') as f:
    result_groupby_id= json.load(f)

with open(config.get_path("feature_location.json"), 'r', encoding='utf-8') as f:
    feature_location = json.load(f)

with open(config.get_path("feature_policies.json"), 'r', encoding='utf-8') as f:
    feature_policies = json.load(f)

with open(config.get_path('hotel_data_room.json'), 'r', encoding='utf-8') as f:
    feature_detail_room = json.load(f)

with open(config.get_path('feature_star_rating.json'), 'r', encoding='utf-8') as f:
    feature_star_rating = json.load(f)


#### Load model

In [4]:
recommend_faclities_hotel = fga.HotelSimilarityRecommender(    
    model_name='paraphrase-multilingual-MiniLM-L12-v2',
    use_gpu=True,
    model_dir=DEFAULT_BASE_DIR.joinpath("mini_model\\function_get_score_amenities\\model_similarity_amenities\\hotel"),
    type='hotel',
    batch_size=256,
    faiss_metric='IP'
)
recommend_faclities_hotel.load_model()
recommend_faclities_room = fga.HotelSimilarityRecommender(
    model_name='paraphrase-multilingual-MiniLM-L12-v2',
    use_gpu=True,
    model_dir=DEFAULT_BASE_DIR.joinpath("mini_model\\function_get_score_amenities\\model_similarity_amenities\\room"),
    type='room',
    batch_size=256,
    faiss_metric='IP'
)
recommend_faclities_room.load_model()


Initializing Sentence-BERT model: paraphrase-multilingual-MiniLM-L12-v2
Using CPU for Sentence-BERT model: paraphrase-multilingual-MiniLM-L12-v2
SafeTensors file D:\graduate_dissertation\final\mini_model\function_get_score_amenities\model_similarity_amenities\hotel\model.safetensors not found, initializing new model
Initialized new model paraphrase-multilingual-MiniLM-L12-v2 on CPU
Loaded hotel data from D:\graduate_dissertation\final\mini_model\function_get_score_amenities\model_similarity_amenities\hotel\hotel_data.pkl
Loaded vectors from D:\graduate_dissertation\final\mini_model\function_get_score_amenities\model_similarity_amenities\hotel\hotel_vectors.npy
Loaded Faiss index from D:\graduate_dissertation\final\mini_model\function_get_score_amenities\model_similarity_amenities\hotel\faiss_index.bin
Initializing Sentence-BERT model: paraphrase-multilingual-MiniLM-L12-v2
Using CPU for Sentence-BERT model: paraphrase-multilingual-MiniLM-L12-v2
SafeTensors file D:\graduate_dissertation\

In [5]:
CF = cfus()
state_model = CF.load_model(DEFAULT_BASE_DIR.joinpath('mini_model\\CF_user_review\\CF_state_for_user.pkl'))
country_model = CF.load_model(DEFAULT_BASE_DIR.joinpath('mini_model\\CF_user_review\\CF_country_for_user.pkl'))

In [None]:
hotel_review_stats = review_quality.HotelReviewStats()
hotel_review_stats.load_stats()

In [4]:
query = {
    # ====== location ======
    'province': 'Th√†nh ph·ªë H·ªì Ch√≠ Minh',
    'nearby_places': ["C·ªë ƒë√¥ Hoa L∆∞", "V∆∞·ªùn Chim Thung Nham", "Ninh Binh Train Station"],
    'is_near_center': True,
    'public_transport_access': ['ga t√†u'], # ƒë√£ x·ª≠ l√Ω 50%, 
    # ======
    'price_range': (300000, 1000000),  
    'rating': (3, 5),
    'stars_rating': 3,
    'services': ["D·ªãch v·ª• s·ª± ki·ªán & h·ªó tr·ª£ ƒë·∫∑c bi·ªát"], 
    'criteria': ['V·ªã tr√≠ & m√¥i tr∆∞·ªùng'],
    'amenities': ["TV m√†n h√¨nh ph·∫≥ng", "k√©t an to√†n"],
    'room_type': ['ph√≤ng ƒë∆°n', 'ph√≤ng ƒë√¥i'], # t·∫°m ch∆∞a x·ª≠ l√Ω
    'booking_flexibility': ['h·ªßy mi·ªÖn ph√≠', 'thanh to√°n khi nh·∫≠n ph√≤ng'], # t·∫°m ch∆∞a x·ª≠ l√Ω
    'distance_to_city_center': 5,
    
    'country': 'Vi√™Ã£t Nam',
    'state': 'C·∫∑p ƒë√¥i',

    # ====== policies ======
    'policies': {
        "Nh·∫≠n ph√≤ng": '11:00-12:00',
        "Tr·∫£ ph√≤ng": '12:00-13:00',
        "Ch·ªâ thanh to√°n b·∫±ng ti·ªÅn m·∫∑t": None,
        "C√°c ph∆∞∆°ng th·ª©c thanh to√°n ƒë∆∞·ª£c ch·∫•p nh·∫≠n": 'Bankcard',
        "Gi·ªõi h·∫°n ƒë·ªô tu·ªïi": None,
        "Gi·ªù gi·ªõi nghi√™m": 'C·ªïng v√†o ch·ªó ngh·ªâ s·∫Ω ƒë√≥ng trong kho·∫£ng 00:00-5:00',
        "H√∫t thu·ªëc": 'Kh√¥ng cho ph√©p h√∫t thu·ªëc.',
        "H·ªßy ƒë·∫∑t ph√≤ng/ Tr·∫£ tr∆∞·ªõc": 'C√°c ch√≠nh s√°ch h·ªßy v√† thanh to√°n tr∆∞·ªõc',
        "Kh√¥ng gi·ªõi h·∫°n ƒë·ªô tu·ªïi": 'Kh√¥ng c√≥ y√™u c·∫ßu v·ªÅ ƒë·ªô tu·ªïi',
        "Nh√≥m": None,
        "Th·∫ª ƒë∆∞·ª£c ch·∫•p nh·∫≠n t·∫°i ch·ªó ngh·ªâ n√†y": None,
        "Th·∫ª ƒë∆∞·ª£c ch·∫•p nh·∫≠n t·∫°i kh√°ch s·∫°n n√†y": 'Bankcard',
        "Th·ªùi gian y√™n l·∫∑ng": None,
        "Ti·ªác t√πng": 'Cho ph√©p t·ªï ch·ª©c',
        "Tr·∫ª em v√† gi∆∞·ªùng": 'Tr·∫ª em d∆∞·ªõi 5 tu·ªïi',
        "V·∫≠t nu√¥i": 'Cho ph√©p mang v·∫≠t nu√¥i',
        "ƒê·∫∑t c·ªçc ƒë·ªÅ ph√≤ng h∆∞ h·∫°i c√≥ th·ªÉ ho√†n l·∫°i": 'Y√™u c·∫ßu VND 1.000.000 ti·ªÅn ƒë·∫∑t c·ªçc ƒë·ªÅ ph√≤ng h∆∞ h·∫°i khi ƒë·∫øn ngh·ªâ.',
        'room_service_included': 'Kh√¥ng c·∫ßn thanh to√°n tr∆∞·ªõc - thanh to√°n t·∫°i ch·ªó ngh·ªâ\nKh√¥ng c·∫ßn th·∫ª t√≠n d·ª•ng'
    },

    # ====== room ======
    'capacity': 2,
    'room_type': 'Family',
    'room_level': 'Standard', 
    'area': 50,
    'bed_type': {'type': 'gi∆∞·ªùng ƒë√¥i', 'count': 1},
    'included_breakfast': True,
    'room_amenties':[],
    'room_facilities':[],
    'room_view': [],

}


In [5]:
ids_result = []
for id in feature_url[0:1]:
    ids_result.append(id['id'])


In [36]:
print( f"üîµ B·∫Øt ƒë·∫ßu t√¨m ki·∫øm c√°c kh√°ch s·∫°n ƒë∆∞·ª£c ƒë√°nh gi√° cao v·ªÅ d·ªãch v·ª• c·ªßa b·∫°n.") 
if query['services']:
    print(f"‚úé D·ªãch v·ª•: [bold #80CFFF]{', '.join(query['services'])}[/bold #80CFFF].")
    services = set()
    for criteria in query['services']:
        services.update(utils.convert_group(criteria, "services", "expand"))
    result_services = fgr.get_score_sub_rate(services, utils.filter_matching_elements(ids_result, feature_sub_rate))
    score_sub_rate_services = {item['id']: item['score'] for item in result_services}

In [None]:
import numpy as np
import json

import sys
from pathlib import Path


import config


try:
    with open(config.get_path("feature_sub_rate.json"), 'r', encoding='utf-8') as f:
        feature_sub_rate = json.load(f)
except FileNotFoundError:
    print("File feature_sub_rate.json not found. Please check the file path.")
    feature_sub_rate = []

try:
    with open(config.get_path("weights_criteria_utilities.json"), 'r', encoding='utf-8') as f:
        weights_criteria_utilities = json.load(f)
except FileNotFoundError:
    print("File weights_criteria_utilities.json not found. Please check the file path.")
    weights_criteria_utilities = {}



def get_dynamic_weights(selected_utilities, utility_scores=weights_criteria_utilities):
    """
    T·∫°o tr·ªçng s·ªë ƒë·ªông d·ª±a tr√™n c√°c ti·ªán √≠ch m√† ng∆∞·ªùi d√πng ch·ªçn.

    Parameters:
    - selected_utilities (list of str): Danh s√°ch c√°c ti·ªán √≠ch ng∆∞·ªùi d√πng quan t√¢m.
    - utility_scores (dict): B·∫£ng tr·ªçng s·ªë gi·ªØa ti·ªán √≠ch v√† h·∫°ng m·ª•c ƒë√°nh gi√° (m·∫∑c ƒë·ªãnh l·∫•y t·ª´ file JSON).

    Returns:
    - dict: Tr·ªçng s·ªë trung b√¨nh c·ªßa c√°c h·∫°ng m·ª•c ƒë√°nh gi√° li√™n quan ƒë·∫øn ti·ªán √≠ch ƒë√£ ch·ªçn.
    """
    category_weights = {}
    
    for utility in selected_utilities:
        if utility in utility_scores:
            for category, weight in utility_scores[utility].items():
                if category in category_weights:
                    category_weights[category].append(weight)
                else:
                    category_weights[category] = [weight]
    
    return {category: np.mean(weights) for category, weights in category_weights.items()}


def normalize_hotel_scores(hotel_data):
    """
    Chu·∫©n h√≥a ƒëi·ªÉm ƒë√°nh gi√° c·ªßa t·ª´ng kh√°ch s·∫°n v·ªÅ kho·∫£ng [0, 1].

    Parameters:
    - hotel_data (dict): D·ªØ li·ªáu ƒëi·ªÉm ƒë√°nh gi√° t·ª´ng h·∫°ng m·ª•c c·ªßa m·ªôt kh√°ch s·∫°n.

    Returns:
    - dict: D·ªØ li·ªáu ƒë√£ ƒë∆∞·ª£c chu·∫©n h√≥a v·ªÅ kho·∫£ng 0-1 cho c√°c tr∆∞·ªùng s·ªë.
    """
    normalized_data = {}
    for key, value in hotel_data.items():
        try:
            normalized_data[key] = float(value.replace(",", ".")) / 10
        except (ValueError, AttributeError):
            normalized_data[key] = value / 10
    return normalized_data


def compute_hotel_score(hotel_data, selected_utilities):
    """
    T√≠nh ƒëi·ªÉm t·ªïng h·ª£p cho kh√°ch s·∫°n d·ª±a tr√™n tr·ªçng s·ªë ƒë·ªông t·ª´ ti·ªán √≠ch ƒë∆∞·ª£c ch·ªçn.

    Parameters:
    - hotel_data (dict): D·ªØ li·ªáu th√¥ v·ªÅ ƒëi·ªÉm s·ªë t·ª´ng ti√™u ch√≠ c·ªßa kh√°ch s·∫°n.
    - selected_utilities (list of str): Danh s√°ch ti·ªán √≠ch ng∆∞·ªùi d√πng quan t√¢m.

    Returns:
    - float: ƒêi·ªÉm t·ªïng h·ª£p c·ªßa kh√°ch s·∫°n.
    """
    category_weights = get_dynamic_weights(selected_utilities)
    normalized_data = normalize_hotel_scores(hotel_data)
    
    total_weight = sum(category_weights.values())
    final_score = 0
    for k in normalized_data.keys():
        if k in category_weights.keys():
            final_score += normalized_data[k] * (category_weights[k])
    return final_score / total_weight


def get_score_sub_rate(query: list[str], feature_sub_rate=feature_sub_rate):
    """
    T√≠nh ƒëi·ªÉm v√† x·∫øp h·∫°ng kh√°ch s·∫°n d·ª±a tr√™n truy v·∫•n ti·ªán √≠ch ng∆∞·ªùi d√πng.

    Parameters:
    - query (list of str): Danh s√°ch ti·ªán √≠ch ng∆∞·ªùi d√πng mu·ªën ∆∞u ti√™n.
    - feature_sub_rate (list of dict): Danh s√°ch th√¥ng tin kh√°ch s·∫°n k√®m theo ƒëi·ªÉm c√°c h·∫°ng m·ª•c.

    Returns:
    - list of dict: Danh s√°ch kh√°ch s·∫°n g·ªìm 'id' v√† 'score', ƒë∆∞·ª£c s·∫Øp x·∫øp gi·∫£m d·∫ßn theo ƒëi·ªÉm.
    """
    result = []
    for data in feature_sub_rate:
        result.append({'id': data['id'], 'score': compute_hotel_score(data, query)})
    
    return sorted(result, key=lambda x: x['score'], reverse=True)


In [7]:
print( f"üîµ B·∫Øt ƒë·∫ßu t√¨m ki·∫øm c√°c kh√°ch s·∫°n ƒë∆∞·ª£c ƒë√°nh gi√° cao v·ªÅ d·ªãch v·ª• c·ªßa b·∫°n.") 
if query['services']:
    print(f"‚úé D·ªãch v·ª•: [bold #80CFFF]{', '.join(query['services'])}[/bold #80CFFF].")
    services = set()
    for criteria in query['services']:
        services.update(utils.convert_group(criteria, "services", "expand"))
    result_services = get_score_sub_rate(services, utils.filter_matching_elements(ids_result, feature_sub_rate))

In [8]:
result_services

[{'id': '10000593', 'score': 0.9259756097560976}]

In [8]:

def get_best_hotels(query): 
    """    
    from geopy.geocoders import Nominatim
    from geopy.distance import geodesic
    import numpy as np

    max_distance_km = 18

    if query['location'] and query['distance_to_city_center']:
        max_distance_km = query['distance_to_city_center']
        print( f"üîµ Th·ª±c hi·ªán t√¨m ki·∫øm kh√°ch s·∫°n c√°ch trung t√¢m th√†nh ph·ªë {', '.join(query['location'])} {max_distance_km} km ")
        result_location = address_nearplaces_recommendation.find_hotels_near_location(places = query['location'], max_distance_km = max_distance_km + 2)
    else:
        result_location = data_list

    if query['nearby_places']:
        print( f"üîµ Th·ª±c hi·ªán t√¨m ki·∫øm kh√°ch s·∫°n g·∫ßn {', '.join(query['nearby_places'])}")
        result_location = address_nearplaces_recommendation.find_hotels_near_location(data_list = result_location, places = query['nearby_places'], max_distance_km = 20)

    ids_resulf = {item['id'] for item in result_location}  # T·∫°o t·∫≠p h·ª£p ch·ª©a c√°c id t·ª´ A
    
    if query['public_transport_access']:
        print( f"üîµ Th·ª±c hi·ªán l·ªçc kh√°ch s·∫°n g·∫ßn {', '.join(query['public_transport_access'])}")
    result_location = filter_hotels_by_distance(filter_matching_elements(ids_resulf, feature_address_nearplaces), 10)
    ids_resulf = {item['id'] for item in result_location} 
    print( f"‚úÖ Ho√†n th√†nh thu th·∫≠p kh√°ch s·∫°n th·ªèa v·ªã tr√≠")

    del geodesic
    del Nominatim"""

    # ============================================================
    print(f"üîµ B·∫Øt ƒë·∫ßu t√¨m ki·∫øm kh√°ch s·∫°n thoe ƒë·ªãa ƒëi·ªÉm")
    if query.get('nearby_places') or query.get('province'):
        filter_location_hotels = rl.find_hotels_near_location(
            feature_location, query.get('nearby_places', []), query.get('province'), max_distance_km=20
        )
        result_location_hotels_score = rl.get_location_score(
            filter_location_hotels, query.get('nearby_places', []),
            query.get('province'), query.get('is_near_center', False)
        )
    else:
        result_location_hotels_score = feature_location
        for hotel in result_location_hotels_score:
            hotel['location_score'] = 0

    # l·∫•y ra danh s√°ch c√°c id kh√°ch s·∫°n
    ids_result = []
    for id in result_location_hotels_score:
        ids_result.append(id['id'])

    
    print( f"üîµ ƒê√£ thu ƒë∆∞·ª£c danh s√°ch c√°c ID hotel th·ªèa ƒëi·ªÅu ki·ªán ƒë·ªãa l√Ω.")


    # ============================================================
    feature_star = []
    for data in feature_star_rating:
        if data['id'] in ids_result:
            feature_star.append(data)

    # 2. Filter star rating
    if query.get('stars_rating') not in [None, '', []]:
        result_stars_hotels_score = fgsp.get_score_rating(query, feature_star)
    else:
        result_stars_hotels_score = []
        for hotel in feature_star_rating:
            result_stars_hotels_score.append({
                'id': hotel['id'],
                'score_rating': 1
            })
    # OUTPUT: [{'id': '10000593', 'score_rating': 0.5}, {'id': '10000594', 'score_rating': 0.8}, ...]
    # lo·∫°i b·ªè c√°c kh√°ch s·∫°n c√≥ ƒëi·ªÉm ƒë√°nh gi√° < 0.7. (ch∆∞a l·ªçc)

    # ============================================================

    print( f"üîµ Xem nh∆∞ ƒë√£ thu ƒë∆∞·ª£c danh s√°ch c√°c ID hotel_room th·ªèa ƒëi·ªÅu ki·ªán lo·∫°i ph√≤ng v√† s·ªë l∆∞·ª£ng.")

    # mapping
    detail_room = []
    for detail_room_ in feature_detail_room:
        if detail_room_['id'] in ids_result:
            detail_room.append(detail_room_)

    # 3. Filter theo gi√°
    price = query.get('price')
    if price not in [None, '', [], (None, None)]:
        result_room_price_score = fgsp.get_price_score(query, detail_room)
        score_price = fgsp.get_max_room_score_per_hotel(result_room_price_score)
    else: 
        score_price = []
        for price in detail_room:
            score_price.append({
                'id': price['id'], 
                'hotel_score_price' : 1
            })
        # OUTPUT [{'id': '10000593', 'hotel_score_price': 0.9847}, ...]
        # kie·∫ªm tra gi√° th·ª±c t·∫ø so v·ªõi gi√° y√™u c·∫ßu,  nh·ªè h∆°n th√¨ v·∫´n l·∫•y l·ªõn h∆°n th√¨ cƒÉn nhƒÉc 
    
    score_room = rr.get_room_info_score(query, detail_room)
    # OUTPUT : {'id': '10000593', 'room_id': 'RD1000059301', 'room_score': 0.6}
    

    # ============================================================
    print( f" Ki·ªÉm tra ch√≠nh s√°ch ph√π h·ª£p.")
    filtered_policies = {} 
    for id in ids_result:
        if id in feature_policies:
            filtered_policies[id] = feature_policies[id]

    score_policies = {} 
    if query.get('policies'):
        for id, data in filtered_policies.items():
            score_policies[id] = rp.find_similar_hotel_policies(query['policies'], data)
            # OUTPUT: {'id':'score', ...}
    
    print( f" ƒê√£ t√¨m ki·∫øm c√°c ch√≠nh s√°ch ph√π h·ª£p v·ªõi y√™u c·∫ßu c·ªßa b·∫°n.")

    # =============================================================
    print( f"üîµ B·∫Øt ƒë·∫ßu t√¨m ki·∫øm c√°c kh√°ch s·∫°n ƒë∆∞·ª£c ƒë√°nh gi√° cao v·ªÅ d·ªãch v·ª• c·ªßa b·∫°n.") 
    if query['services']:
        print(f"‚úé D·ªãch v·ª•: [bold #80CFFF]{', '.join(query['services'])}[/bold #80CFFF].")
        services = set()
        for criteria in query['services']:
            services.update(utils.convert_group(criteria, "services", "expand"))
        result_services = fgr.get_score_sub_rate(services, utils.filter_matching_elements(ids_result, feature_sub_rate))
        score_sub_rate_services = {item['id']: item['score'] for item in result_services}
        # OUTPUT: {'id':'score', ...}

    else:
        print(f"‚úé D·ªãch v·ª•: R·ªóng")
        print(f"‚úé D·ª±a v√†o c√°c ti·ªán √≠ch m√† b·∫°n y√™u c·∫ßu: [bold #80CFFF]{', '.join(query['amenities'])}[/bold #80CFFF].")
        designated_utility = set()
        for amenity in query['amenities']:
            cluster = fga.find_clusters([amenity], threshold=0.01)
            designated_utility.update(cluster)
        designated_utility = list(designated_utility)
        print(f"‚úé N√™n s·∫Ω ∆∞u ti√™n c√°c kh√°ch s·∫°n ƒë∆∞·ª£c ƒë√°nh cao v·ªõi c√°c ti√™u ch√≠ ƒë√£ ph·ª•c thu·ªôc c√°c ti·ªán √≠ch tr√™n.")
        score_sub_rate_services = fgr.get_score_sub_rate(designated_utility, 
                                                  utils.filter_matching_elements(ids_result, feature_sub_rate))
        # OUTPUT: {'id':'score', ...}
    
    print(" ƒê√£ t√¨m ki·∫øm c√°c kh√°ch s·∫°n ƒë∆∞·ª£c ƒë√°nh gi√° cao v·ªÅ d·ªãch v·ª• c·ªßa b·∫°n.")


    # ============================================================
    if query['services']:
        print(f"üîµ B·∫Øt ƒë·∫ßu t√¨m ki·∫øm c√°c kh√°ch s·∫°n c√≥ d·ªãch v·ª•: [bold #80CFFF]{', '.join(query['services'])}[/bold #80CFFF]")
        services = set()
        for criteria in query['services']:
            services.update(utils.convert_group(criteria, "services", "expand"))
        score_services = fga.get_score_services(user_input = list(services), 
                                    List_ids = ids_result, 
                                    weights=[0.5, 0.5])
        score_services = {item['id']: item['final_score'] for item in score_services}
        # OUTPUT: {'id':'score', ...} score n√†y ch∆∞a chu·∫©n h√≥a 
        
        print(f" ƒê√£ t√¨m ki·∫øm c√°c kh√°ch s·∫°n kh·ªõp d·ªãch v·ª• b·∫°n quan t√¢m.")

    # ============================================================
    if query['amenities']:
        print(f"üîµ B·∫Øt ƒë·∫ßu t√¨m ki·∫øm kh√°ch s·∫°n ch·ª©a c√°c ti·ªán nghi [bold #80CFFF]{', '.join(query['amenities'])}[/bold #80CFFF]")
        result_facilities_hotel = recommend_faclities_hotel.predict_assignID(
            input_amenities= query['amenities'],
            hotel_ids=ids_result,
            similarity_threshold=0.9, 
            normalization_factor_base=10
        )
        result_facilities_room = recommend_faclities_room.predict_assignID(
            input_amenities= query['amenities'],
            hotel_ids=ids_result,
            similarity_threshold=0.9, 
            normalization_factor_base=10
        )

        score_facilities = fga.calculate_hotel_scores(result_facilities_hotel, 
                                   result_facilities_room, 
                                   threshold=0.9, 
                                   weight=[0.5, 0.5])
        
        print(f" ƒê√£ t√¨m ki·∫øm c√°c kh√°ch s·∫°n c√≥ ti·ªán nghi b·∫°n quan t√¢m.")

    print(f"Danh s√°ch kh√°ch s·∫°n ƒë√£ t√¨m ki·∫øm ƒë∆∞·ª£c:")
    result_final_info_score = utils.compute_total_score(score_facilities, 
                                                        score_sub_rate_services, 
                                                        score_services,
                                                        score_policies,
                                                        weights=(0.3, 0.3, 0.3, 0.1))
    result_final_info_score = dict(result_final_info_score)
    # OUTPUT : {'id':'score', ...}


    # ============================================================
    print("============================================================")
    print("Giai ƒëo·∫°n 2:")
    print("Ki·ªÉm tra ph·∫£n h√≤i c·ªßa kh√°ch h√†ng")
    score_review_quality = hotel_review_stats.calculate_final_score_local_data(ids_result, print_warning=False)
    if not query['country'] and not query['state']: # TH kh√¥ng c√≥ th√¥ng tin v·ªÅ qu·ªëc gia v√† tr·∫°ng th√°i
        if not query['criteria']: # TH kh√¥ng c√≥ th√¥ng tin v·ªÅ ti√™u ch√≠
            score_review = rhr.calculate_weighted_bayesian_score(
                result_groupby_id = {id: result_groupby_id[id] for id in ids_result if id in result_groupby_id},
                #user_weights = rhr.allocate_weights_with_ratios(), 
                C=100
                )
            score_review = dict(zip(score_review['hotel_id'], score_review['bayesian_score']))

            # T√≠nh ƒëi·ªÉm cu·ªëi c√πng
            result_final_review_score = rhr.calculate_final_score_from_reviews(
                score_review, 
                score_review_quality, 
                w=0.8, 
                q_default=0.5, 
                s_global=0.7, 
                threshold=0.3
            )
            result_final_review_score = dict(zip(result_final_review_score['id'], result_final_review_score['final_score']))
            # OUTPUT : {'id':'score', ...}
        else: # TH c√≥ th√¥ng tin v·ªÅ ti√™u ch√≠
            print(f"üîµ T·∫≠p trung m·∫°nh v√†o kh√°ch s·∫°n c√≥ ti√™u ch√≠: [bold #80CFFF]{', '.join(query['criteria'])}[/bold #80CFFF]")
            criterion = set()
            for criteria in query['criteria']:
                criterion.update(utils.convert_group(criteria, "criteria", "expand"))
            criterion = {key: 1 for key in criterion}
            weights = rhr.allocate_weights_with_ratios(criterion)
            score_review = rhr.calculate_weighted_bayesian_score(
                result_groupby_id = {id: result_groupby_id[id] for id in ids_result if id in result_groupby_id},
                user_weights = weights, 
                C=100
                )
            score_review = dict(zip(score_review['hotel_id'], score_review['bayesian_score']))
            result_final_review_score = rhr.calculate_final_score_from_reviews(
                score_review, 
                score_review_quality, 
                w=0.8, 
                q_default=0.5, 
                s_global=0.7, 
                threshold=0.2
            )
            result_final_review_score = dict(zip(result_final_review_score['id'], result_final_review_score['final_score']))
            # OUTPUT : {'id':'score', ...}

    else: # TH c√≥ th√¥ng tin v·ªÅ qu·ªëc gia v√† tr·∫°ng th√°i
        print(f"üîµ C·∫£m ∆°n b·∫°n ƒë√£ cung c·∫•p th√¥ng tin v·ªÅ qu·ªëc gia: {query['country']} v√† tr·∫°ng th√°i: {query['state']}")
        result = CF.aggregate_prediction(hotel_ids = ids_result, 
                                            country = query['country'], 
                                            status = query['state'], 
                                            state_model=state_model, 
                                            country_model=country_model, 
                                            print_result=False, 
                                            print_warning=False,
                                            )
        if query['criteria']: # TH c√≥ th√¥ng tin v·ªÅ ti√™u ch√≠
            print(f"üîµ T·∫≠p trung m·∫°nh v√†o kh√°ch s·∫°n c√≥ ti√™u ch√≠: [bold #80CFFF]{', '.join(query['criteria'])}[/bold #80CFFF]")
            criterion = set()
            for criteria in query['criteria']:
                criterion.update(utils.convert_group(criteria, "criteria", "expand"))
            criterion = {key: 1 for key in criterion}
            weights = rhr.allocate_weights_with_ratios(criterion)
        else:
            weights = rhr.allocate_weights_with_ratios()

        score_review = {}
        for key, value in result.items():
            score_review[key] = sum(value[key1] * weights[key1] for key1 in value) / sum(weights.values())
        
        result_final_review_score = rhr.calculate_final_score_from_reviews(
            score_review, 
            score_review_quality, 
            w=0.8, 
            q_default=0.5, 
            s_global=0.7, 
            threshold=0.2
        )
        result_final_review_score = dict(zip(result_final_review_score['id'], result_final_review_score['final_score']))
        # OUTPUT : {'id':'score', ...}


    # K·∫øt h·ª£p c√°c ƒëi·ªÉm s·ªë
    result_final_score = utils.compute_total_score(
        result_final_info_score, 
        result_final_review_score, 
        weights=(0.7, 0.3)
    )
        

    return result_final_score

In [9]:
get_best_hotels(query)

[('4464775', 0.5354375230431713),
 ('3563537', 0.4442356053324338),
 ('2956757', 0.4334844944477081),
 ('6921137', 0.43210306904939877),
 ('4824897', 0.43202227132250176),
 ('2028376', 0.4115760876873565),
 ('6536748', 0.41047774126212344),
 ('5173273', 0.4093007479626751),
 ('3147128', 0.4022864566763647),
 ('4852870', 0.39707279918399274),
 ('3919497', 0.39273467721874117),
 ('2125472', 0.38932973522976827),
 ('4946994', 0.3861880995604653),
 ('3133832', 0.3786680902881274),
 ('3384511', 0.3747964093663718),
 ('8803524', 0.373585602929558),
 ('3249623', 0.3721161115500971),
 ('331758', 0.363682682264757),
 ('2967978', 0.36361446522073915),
 ('13191337', 0.36340379135706363),
 ('4822969', 0.3628400072934315),
 ('2812915', 0.3614096718160698),
 ('5937836', 0.35717180093625595),
 ('5051247', 0.35682178884744636),
 ('6120309', 0.3564530233462774),
 ('6588380', 0.3553599119854238),
 ('2648923', 0.35340445306897156),
 ('262297', 0.35224991873064854),
 ('4181231', 0.3516486028944006),
 ('54