In [2]:
import numpy as np
import pandas as pd
import plotly.express as px
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse.linalg import svds
import json
from itertools import chain
from rich import print


In [3]:
with open("../../dataset\\info_hotel\\feature_sub_rate.json", 'r', encoding='utf-8') as f:
    feature_sub_rate = json.load(f)

with open("../../dataset\\sub_model\\weights_criteria_utilities.json", 'r', encoding='utf-8') as f:
    weights_criteria_utilities = json.load(f)

with open("../../dataset\\info_hotel\\feature_popular_facilities.json", 'r', encoding='utf-8') as f:
    feature_popular_facilities = json.load(f)
    
with open("../../dataset\\info_hotel\\feature_facilities.json", 'r', encoding='utf-8') as f:
    feature_facilities = json.load(f)

with open('../../dataset\\info_hotel\\feature_url.json', 'r', encoding='utf-8') as f:
    feature_url = json.load(f)

with open("../../dataset\\info_hotel\\feature_allRoom.json", 'r', encoding='utf-8') as f:
    feature_allRoom = json.load(f)

with open("../../dataset\\info_hotel\\feature_location.json", 'r', encoding='utf-8') as f:
    feature_location = json.load(f)

with open("../../dataset\\info_hotel\\feature_policies.json", 'r', encoding='utf-8') as f:
    feature_policies = json.load(f)


In [3]:
print(len(feature_sub_rate))
print(len(feature_popular_facilities))
print(len(feature_facilities))
print(len(feature_url))
print(len(feature_allRoom))


In [3]:
def convert_amenity(value):
    amenities = {
        1: "Ti·ªán √≠ch ph√≤ng",
        2: "Ti·ªán √≠ch c√¥ng ngh·ªá & k·∫øt n·ªëi",
        3: "Ti·ªán √≠ch ph√≤ng t·∫Øm",
        4: "D·ªãch v·ª• ·∫©m th·ª±c",
        5: "Ti·ªán √≠ch th∆∞ gi√£n & gi·∫£i tr√≠",
        6: "Ti·ªán √≠ch d√†nh cho tr·∫ª em",
        7: "D·ªãch v·ª• kh√°ch s·∫°n",
        8: "D·ªãch v·ª• b·∫£o m·∫≠t & an to√†n",
        9: "D·ªãch v·ª• t·ªï ch·ª©c s·ª± ki·ªán",
        10: "Ti·ªán nghi h·ªó tr·ª£ ƒë·∫∑c bi·ªát",
        11: "D·ªãch v·ª• gi·∫£i tr√≠ ngo√†i tr·ªùi",
        12: "D·ªãch v·ª• b√£i ƒë·ªó xe",
        13: "Ti·ªán √≠ch thi√™n nhi√™n",
        14: "H·ªá th·ªëng ƒëi·ªÅu h√≤a & s∆∞·ªüi ·∫•m",
        15: "C∆° s·ªü v·∫≠t ch·∫•t chung",
        16: "D·ªãch v·ª• chƒÉm s√≥c s·ª©c kh·ªèe & spa",
        17: "D·ªãch v·ª• ƒë∆∞a ƒë√≥n & ph∆∞∆°ng ti·ªán di chuy·ªÉn",
        18: "Ti·ªán √≠ch c√¥ng vi·ªác"
    }
    
    if isinstance(value, int):
        return amenities.get(value, "Kh√¥ng t√¨m th·∫•y")
    elif isinstance(value, str):
        return next((k for k, v in amenities.items() if v == value), "Kh√¥ng t√¨m th·∫•y")
    return "ƒê·ªãnh d·∫°ng kh√¥ng h·ª£p l·ªá"

In [4]:
import sys
import os

# ƒê∆∞·ªùng d·∫´n ƒë·∫øn th∆∞ m·ª•c mini_model
mini_model_path = os.path.abspath(os.path.join(os.getcwd(), '..'))

# L·∫•y danh s√°ch t·∫•t c·∫£ th∆∞ m·ª•c con trong mini_model
for folder in os.listdir(mini_model_path):
    folder_path = os.path.join(mini_model_path, folder)
    # Ch·ªâ th√™m n·∫øu l√† th∆∞ m·ª•c v√† kh√¥ng ph·∫£i file
    if os.path.isdir(folder_path):
        if folder_path not in sys.path:
            sys.path.append(folder_path)

# Import module
# import get_score_sub_rate as gssr
# import get_score_amenities as gsa
# import json

In [5]:
import location_recommend 
import policies_recommend

In [7]:
best_hotels[0]

{'province/ city': 'Ninh B√¨nh',
 'nearby_places': [{'title': 'Xung quanh c√≥ g√¨?',
   'detail': {'Cuc Phuong National Park': '0 m'}},
  {'title': 'Nh√† h√†ng & qu√°n c√† ph√™',
   'detail': {'Cafe/qu√°n bar ‚ùÇ Karaoke ƒê√¥n Ph∆∞∆°ng': '12 km',
    'Nh√† h√†ng ‚ùÇ Nh√† H√†ng Qu√Ω Hi·ªáp': '13 km',
    'Nh√† h√†ng ‚ùÇ Nh√† H√†ng Ho√†ng Giang - Ninh B√¨nh': '21 km'}},
  {'title': 'C√°c s√¢n bay g·∫ßn nh·∫•t', 'detail': {'S√¢n bay Th·ªç Xu√¢n': '61 km'}},
  {'title': 'C√°c trung t√¢m g·∫ßn nh·∫•t',
   'detail': {'Th√†nh ph·ªë Ninh B√¨nh, Ninh B√¨nh': 18.27,
    'Th√†nh ph·ªë Tam ƒêi·ªáp, Ninh B√¨nh': 10.4}},
  {'title': 'C√°c b·∫øn xe kh√°ch & b·∫øn c·∫£ng g·∫ßn nh·∫•t',
   'detail': {'B·∫øn xe kh√°ch Ninh B√¨nh': 18.31,
    'B·∫øn xe Nho Quan': 14.53,
    'B·∫øn xe Kim S∆°n': 29.62,
    'B·∫øn xe Tam ƒêi·ªáp': 4.83}},
  {'title': 'C√°c trung t√¢m g·∫ßn nh·∫•t',
   'detail': {'Th√†nh ph·ªë Ninh B√¨nh, Ninh B√¨nh': 18.27,
    'Th√†nh ph·ªë Tam ƒêi·ªáp, Ninh B√¨nh': 10.4}},
  {'title': 'C

In [8]:
# Input v√≠ d·ª•
user_places = []
province = "Ninh B√¨nh"
is_near_center = True


best_hotels = location_recommend.find_hotels_near_location(feature_location, user_places, location = province, max_distance_km=20)
# Th√™m ƒëi·ªÉm v√† x·∫øp h·∫°ng
ranked_hotels =  location_recommend.get_location_score(best_hotels, user_places, province, is_near_center)

# In k·∫øt qu·∫£
for hotel in ranked_hotels[:5]:  # Top 5 kh√°ch s·∫°n
    print('Score:', hotel['location_score'])

In [6]:
user_input = {
    # Input ƒë·ªãa ƒëi·ªÉm
    'province': 'TP H·ªì Ch√≠ Minh',
    'district': None,
    'nearby_places': ["Ch·ª£ B·∫øn Th√†nh", "Dinh ƒê·ªôc L·∫≠p", "Nh√† th·ªù ƒê·ª©c B√†"], # Bao g·ªìm ƒë·ªãa ƒëi·ªÉm tham quan + ƒë·ªãa ƒëi·ªÉm di chuy·ªÉn
    'is_near_center': True,
    # Input th√¥ng tin t·ªïng qu√°t
    'stars_rating': 3,
    'price': (800000, 2000000),
    'rating': 8,
    'sub_rate': {
        "staff": 9.0,
        "facilities": 8.3,
        "cleanliness": 8.7,
        "comfort": 8.7,
        "value": 8.4,
        "location": 8.4,
        "free WiFi": 8.3
    },
    # Input th√¥ng tin v·ªÅ c∆° s·ªü v·∫≠t ch·∫•t v√† ti·ªán nghi
    'services': [],  # 1 trong 18 lo·∫°i d·ªãch v·ª• ƒë∆∞·ª£c gi·ªõi thi·ªáu tr∆∞·ªõc
    'amenities': ["TV m√†n h√¨nh ph·∫≥ng, k√©t an to√†n"],
    # Input th√¥ng tin v·ªÅ ph√≤ng
    'capacity': 2,
    'room_type': 'Family',
    'area': 50,
    'bed_type': '',
    'included_breakfast': True,
    'room_amenties':[],
    'room_facilities':[],
    'room_view': [],

    # Input th√¥ng tin v·ªÅ ch√≠nh s√°ch
    'policies': {
        "Nh·∫≠n ph√≤ng": '11:00-12:00',
        "Tr·∫£ ph√≤ng": '12:00-13:00',
        "Ch·ªâ thanh to√°n b·∫±ng ti·ªÅn m·∫∑t": None,
        "C√°c ph∆∞∆°ng th·ª©c thanh to√°n ƒë∆∞·ª£c ch·∫•p nh·∫≠n": 'Bankcard',
        "Gi·ªõi h·∫°n ƒë·ªô tu·ªïi": None,
        "Gi·ªù gi·ªõi nghi√™m": 'C·ªïng v√†o ch·ªó ngh·ªâ s·∫Ω ƒë√≥ng trong kho·∫£ng 00:00-5:00',
        "H√∫t thu·ªëc": 'Kh√¥ng cho ph√©p h√∫t thu·ªëc.',
        "H·ªßy ƒë·∫∑t ph√≤ng/ Tr·∫£ tr∆∞·ªõc": 'C√°c ch√≠nh s√°ch h·ªßy v√† thanh to√°n tr∆∞·ªõc',
        "Kh√¥ng gi·ªõi h·∫°n ƒë·ªô tu·ªïi": 'Kh√¥ng c√≥ y√™u c·∫ßu v·ªÅ ƒë·ªô tu·ªïi',
        "Nh√≥m": None,
        "Th·∫ª ƒë∆∞·ª£c ch·∫•p nh·∫≠n t·∫°i ch·ªó ngh·ªâ n√†y": None,
        "Th·∫ª ƒë∆∞·ª£c ch·∫•p nh·∫≠n t·∫°i kh√°ch s·∫°n n√†y": 'Bankcard',
        "Th·ªùi gian y√™n l·∫∑ng": None,
        "Ti·ªác t√πng": 'Cho ph√©p t·ªï ch·ª©c',
        "Tr·∫ª em v√† gi∆∞·ªùng": 'Tr·∫ª em d∆∞·ªõi 5 tu·ªïi',
        "V·∫≠t nu√¥i": 'Cho ph√©p mang v·∫≠t nu√¥i',
        "ƒê·∫∑t c·ªçc ƒë·ªÅ ph√≤ng h∆∞ h·∫°i c√≥ th·ªÉ ho√†n l·∫°i": 'Y√™u c·∫ßu VND 1.000.000 ti·ªÅn ƒë·∫∑t c·ªçc ƒë·ªÅ ph√≤ng h∆∞ h·∫°i khi ƒë·∫øn ngh·ªâ.',
        'room_service_included': 'Kh√¥ng c·∫ßn thanh to√°n tr∆∞·ªõc - thanh to√°n t·∫°i ch·ªó ngh·ªâ\nKh√¥ng c·∫ßn th·∫ª t√≠n d·ª•ng'
    }

    # Input th√¥ng tin quan t√¢m v·ªÅ review

}

In [7]:
# 1. Filter ƒë·ªãa ƒëi·ªÉm
if user_input.get('nearby_places') or user_input.get('province'):
    filter_location_hotels = location_recommend.find_hotels_near_location(
        feature_location, user_input.get('nearby_places', []), user_input.get('province'), max_distance_km=20
    )
    result_location_hotels_score = location_recommend.get_location_score(
        filter_location_hotels, user_input.get('nearby_places', []),
        user_input.get('province'), user_input.get('is_near_center', False)
    )
else:
    result_location_hotels_score = feature_location
    for hotel in result_location_hotels_score:
        hotel['location_score'] = 0

In [8]:
len(result_location_hotels_score)

890

In [None]:
policies_recommend

In [9]:
ids_result = []
for id in result_location_hotels_score:
    ids_result.append(id['id'])

In [10]:
ids_result

['257543',
 '264198',
 '10128837',
 '294771',
 '2597342',
 '11971962',
 '264986',
 '1312425',
 '10012516',
 '10052199',
 '10059536',
 '10059845',
 '10112773',
 '10121408',
 '10175818',
 '10230604',
 '10257028',
 '10257358',
 '10322238',
 '10335019',
 '10367451',
 '10367890',
 '10391254',
 '10410565',
 '10424009',
 '10474624',
 '10505705',
 '10527088',
 '10533390',
 '10577278',
 '10581778',
 '10600192',
 '10618677',
 '10636046',
 '1063721',
 '10668578',
 '10734309',
 '10868832',
 '1087399',
 '10874751',
 '10915959',
 '10941335',
 '10967940',
 '10973516',
 '10978493',
 '1097973',
 '10980611',
 '1102388',
 '11058407',
 '11061532',
 '1108901',
 '11099373',
 '11108037',
 '11110404',
 '11161251',
 '11186552',
 '11192005',
 '11192379',
 '11203515',
 '11205316',
 '11205740',
 '11212692',
 '11219033',
 '11223889',
 '11239396',
 '11242837',
 '11257100',
 '11259332',
 '11265827',
 '11278843',
 '11279232',
 '11280666',
 '11284666',
 '11286914',
 '11303700',
 '11320708',
 '11334623',
 '11336786',
 

In [11]:
filtered_policies = {}
for id in ids_result:
    if id in feature_policies:
        filtered_policies[id] = feature_policies[id]
    else:
        print(f"ID {id} kh√¥ng c√≥ trong feature_policies")

In [12]:
result = {}# 4. Ch√≠nh s√°ch
if user_input.get('policies'):
    for id, data in filtered_policies.items():
        result[id] = policies_recommend.find_similar_hotel_policies(user_input['policies'], data)


In [13]:
result

{'257543': 0.17857142857142858,
 '264198': 0.03571428571428571,
 '10128837': 0.03571428571428571,
 '294771': 0.03571428571428571,
 '2597342': 0.03571428571428571,
 '11971962': 0.050400251966700664,
 '264986': 0.03571428571428571,
 '1312425': 0.03571428571428571,
 '10012516': 0.03571428571428571,
 '10052199': 0.03571428571428571,
 '10059536': 0.33611453768098637,
 '10059845': 0.03571428571428571,
 '10112773': 0.05040025196670068,
 '10121408': 0.014685966252414964,
 '10175818': 0.19325739482384355,
 '10230604': 0.22897168053812925,
 '10257028': 0.05040025196670068,
 '10257358': 0.014685966252414964,
 '10322238': 0.050400251966700664,
 '10335019': 0.07142857142857142,
 '10367451': 0.014685966252414953,
 '10367890': 0.014685966252414953,
 '10391254': 0.014685966252414953,
 '10410565': 0.05040025196670068,
 '10424009': 0.014685966252414953,
 '10474624': 0.014685966252414953,
 '10505705': 0.014685966252414953,
 '10527088': 0.014685966252414953,
 '10533390': 0.014685966252414953,
 '10577278':

In [14]:
def filter_matching_elements(ids_A, B):
    try:
        return [item for item in B if item['id'] in ids_A]  # L·ªçc c√°c ph·∫ßn t·ª≠ B c√≥ id trong A
    except:
        return [item for item in B if item['id_room'] in ids_A]

In [4]:
query = {
    'location': ['Th√†nh ph·ªë H·ªì Ch√≠ Minh'],
    'nearby_places': [],
    'price_range': (300000, 1000000),  
    'rating': (3, 5),
    'services': ["D·ªãch v·ª• t·ªï ch·ª©c s·ª± ki·ªán"], 
    'amenities': ["TV m√†n h√¨nh ph·∫≥ng, k√©t an to√†n"],
    'room_type': ['ph√≤ng ƒë∆°n', 'ph√≤ng ƒë√¥i'], # t·∫°m ch∆∞a x·ª≠ l√Ω
    'booking_flexibility': ['h·ªßy mi·ªÖn ph√≠', 'thanh to√°n khi nh·∫≠n ph√≤ng'], # t·∫°m ch∆∞a x·ª≠ l√Ω
    'distance_to_city_center': 5,
    'public_transport_access': ['ga t√†u'], # ƒë√£ x·ª≠ l√Ω 50%, 
    'capacity': 2, 
}


In [7]:
designated_utility = []
for amenity in query['amenities']:
    cluster = gsa.find_clusters(amenity, threshold=0.01)
    print(f"Ti·ªán nghi: [bold #80CFFF]{amenity}[/bold #80CFFF] => D·ªãch v·ª•: [bold #80CFFF]{' v√† '.join(cluster)}[/bold #80CFFF]")
    designated_utility.append(cluster)

In [7]:
def processing_query(*results, weights):
    """
    T·ªïng h·ª£p v√† chu·∫©n h√≥a ƒëi·ªÉm s·ªë t·ª´ nhi·ªÅu nh√≥m k·∫øt qu·∫£ v·ªõi tr·ªçng s·ªë t∆∞∆°ng ·ª©ng.

    Parameters:
    - results: Danh s√°ch kh√¥ng gi·ªõi h·∫°n s·ªë l∆∞·ª£ng k·∫øt qu·∫£ [{ 'id': str, 'score': float ho·∫∑c tuple }]
    - weights: Danh s√°ch tr·ªçng s·ªë t∆∞∆°ng ·ª©ng

    Returns:
    - Danh s√°ch k·∫øt qu·∫£ cu·ªëi c√πng [{'id': str, 'final_score': float}], s·∫Øp x·∫øp gi·∫£m d·∫ßn theo ƒëi·ªÉm
    """
    if len(results) != len(weights):
        raise ValueError("S·ªë l∆∞·ª£ng k·∫øt qu·∫£ v√† tr·ªçng s·ªë ph·∫£i b·∫±ng nhau!")

    all_ids = set()
    normalized_scores = []
    extracted_ids = []

    # Tr√≠ch xu·∫•t d·ªØ li·ªáu & chu·∫©n h√≥a ƒëi·ªÉm s·ªë cho t·ª´ng nh√≥m
    for result in results:
        ids, scores = gsa.extract_scores(result)
        extracted_ids.append(ids)
        normalized_scores.append(gsa.normalize_scores(scores))
        all_ids.update(ids)

    # T√≠nh t·ªïng ƒëi·ªÉm c√≥ tr·ªçng s·ªë
    final_scores = {id_: 0 for id_ in all_ids}

    for i, ids in enumerate(extracted_ids):
        for idx, id_ in enumerate(ids):
            final_scores[id_] += normalized_scores[i][idx] * weights[i]

    # Chuy·ªÉn k·∫øt qu·∫£ th√†nh danh s√°ch v√† s·∫Øp x·∫øp theo ƒëi·ªÉm gi·∫£m d·∫ßn
    final_results = [{'id': k, 'score': v} for k, v in final_scores.items()]
    final_results.sort(key=lambda x: x['score'], reverse=True)

    return final_results

In [8]:
recommend_faclities_hotel = gsa.HotelSimilarityRecommender(    
    model_name='paraphrase-multilingual-MiniLM-L12-v2',
    use_gpu=True,
    model_dir="D:\\graduate_dissertation\\final\\mini_model\\function_get_score_amenities\\model_similarity_amenities\\hotel",
    type='hotel',
    batch_size=256,
    faiss_metric='IP'
)
recommend_faclities_hotel.load_model()

recommend_faclities_room = gsa.HotelSimilarityRecommender(
    model_name='paraphrase-multilingual-MiniLM-L12-v2',
    use_gpu=True,
    model_dir="D:\\graduate_dissertation\\final\\mini_model\\function_get_score_amenities\\model_similarity_amenities\\room",
    type='room',
    batch_size=256,
    faiss_metric='IP'
)
recommend_faclities_room.load_model()



Loading Sentence-BERT model from D:\graduate_dissertation\final\mini_model\function_get_score_amenities\model_similarity_amenities\hotel\model.safetensors
Loaded model on CPU
Loaded model on CPU
Loaded hotel data from D:\graduate_dissertation\final\mini_model\function_get_score_amenities\model_similarity_amenities\hotel\hotel_data.pkl
Loaded vectors from D:\graduate_dissertation\final\mini_model\function_get_score_amenities\model_similarity_amenities\hotel\hotel_vectors.npy
Loaded Faiss index from D:\graduate_dissertation\final\mini_model\function_get_score_amenities\model_similarity_amenities\hotel\faiss_index.bin
Loading Sentence-BERT model from D:\graduate_dissertation\final\mini_model\function_get_score_amenities\model_similarity_amenities\room\model.safetensors
Loaded model on CPU
Loaded model on CPU
Loaded hotel data from D:\graduate_dissertation\final\mini_model\function_get_score_amenities\model_similarity_amenities\room\hotel_data.pkl
Loaded vectors from D:\graduate_dissertati

In [26]:
def compute_total_score(source1, source2, source3, weights=(1/3, 1/3, 1/3)):
    from collections import defaultdict

    def normalize_minmax(values_dict):
        values = list(values_dict.values())
        min_v, max_v = min(values), max(values)
        if max_v == min_v:
            return {k: 0.0 for k in values_dict}  # tr√°nh chia 0
        return {k: (v - min_v) / (max_v - min_v) for k, v in values_dict.items()}

    # B∆∞·ªõc 1: Chuy·ªÉn c√°c source v·ªÅ dict {id: score}
    def to_dict_source(source, key='score'):
        if isinstance(source, dict):
            return source
        elif isinstance(source, list):
            return {item['id']: item.get(key, 0.0) for item in source}
        else:
            raise ValueError("Unsupported data format")

    s1_dict = to_dict_source(source1, key='score')
    s2_dict = to_dict_source(source2, key='final_score')
    s3_dict = to_dict_source(source3)

    # B∆∞·ªõc 2: Chu·∫©n h√≥a t·ª´ng lo·∫°i score
    s1_norm = normalize_minmax(s1_dict)
    s2_norm = normalize_minmax(s2_dict)
    s3_norm = normalize_minmax(s3_dict)

    # B∆∞·ªõc 3: G·ªôp ƒëi·ªÉm theo id, t√≠nh ƒëi·ªÉm t·ªïng
    total_scores = defaultdict(float)
    all_ids = set(s1_norm) | set(s2_norm) | set(s3_norm)
    w1, w2, w3 = weights

    for id_ in all_ids:
        total = (
            s1_norm.get(id_, 0.0) * w1 +
            s2_norm.get(id_, 0.0) * w2 +
            s3_norm.get(id_, 0.0) * w3
        )
        total_scores[id_] = total

    # S·∫Øp x·∫øp theo ƒëi·ªÉm gi·∫£m d·∫ßn
    sorted_scores = sorted(total_scores.items(), key=lambda x: x[1], reverse=True)
    return sorted_scores


In [10]:

def get_best_hotels(query): 
    """    
    from geopy.geocoders import Nominatim
    from geopy.distance import geodesic
    import numpy as np

    max_distance_km = 18

    if query['location'] and query['distance_to_city_center']:
        max_distance_km = query['distance_to_city_center']
        print( f"üîµ Th·ª±c hi·ªán t√¨m ki·∫øm kh√°ch s·∫°n c√°ch trung t√¢m th√†nh ph·ªë {', '.join(query['location'])} {max_distance_km} km ")
        result_location = address_nearplaces_recommendation.find_hotels_near_location(places = query['location'], max_distance_km = max_distance_km + 2)
    else:
        result_location = data_list

    if query['nearby_places']:
        print( f"üîµ Th·ª±c hi·ªán t√¨m ki·∫øm kh√°ch s·∫°n g·∫ßn {', '.join(query['nearby_places'])}")
        result_location = address_nearplaces_recommendation.find_hotels_near_location(data_list = result_location, places = query['nearby_places'], max_distance_km = 20)

    ids_resulf = {item['id'] for item in result_location}  # T·∫°o t·∫≠p h·ª£p ch·ª©a c√°c id t·ª´ A
    
    if query['public_transport_access']:
        print( f"üîµ Th·ª±c hi·ªán l·ªçc kh√°ch s·∫°n g·∫ßn {', '.join(query['public_transport_access'])}")
    result_location = filter_hotels_by_distance(filter_matching_elements(ids_resulf, feature_address_nearplaces), 10)
    ids_resulf = {item['id'] for item in result_location} 
    print( f"‚úÖ Ho√†n th√†nh thu th·∫≠p kh√°ch s·∫°n th·ªèa v·ªã tr√≠")

    del geodesic
    del Nominatim"""

    # =====================================================================
    print( f"üîµ Xem nh∆∞ ƒë√£ thu ƒë∆∞·ª£c danh s√°ch c√°c ID hotel th·ªèa ƒëi·ªÅu ki·ªán ƒë·ªãa l√Ω.")
    print( f"üîµ Xem nh∆∞ ƒë√£ thu ƒë∆∞·ª£c danh s√°ch c√°c ID hotel_room th·ªèa ƒëi·ªÅu ki·ªán lo·∫°i ph√≤ng v√† s·ªë l∆∞·ª£ng.")
    print( f"üîµ Xem nh∆∞ ƒë√£ Th·ª±c hi·ªán t√≠nh to√°n c√°c kh√°ch s·∫°n ph√π h·ª£p v·ªõi ph√¢n kh√∫c gi√°")
    # result_price = get_score_price(filter_matching_elements(ids_resulf, feature_allRoom), 
    #                                 (float(query['price_range'][1]) - float(query['price_range'][0])) / 2)
    
    print( f"üîµ B·∫Øt ƒë·∫ßu t√¨m ki·∫øm c√°c kh√°ch s·∫°n ƒë∆∞·ª£c ƒë√°nh gi√° cao v·ªÅ d·ªãch v·ª• c·ªßa b·∫°n.") 
    if query['services']:
        print(f"‚úé D·ªãch v·ª•: [bold #80CFFF]{', '.join(query['services'])}[/bold #80CFFF].")
        result_services = gssr.get_score_sub_rate(query['services'], 
                                                  filter_matching_elements(ids_resulf, feature_sub_rate))
        # OUTPUT: [{'id': '1', 'score': 0.8}, {'id': '2', 'score': 0.7}, ...]

    else:
        print(f"‚úé D·ªãch v·ª•: R·ªóng")
        print(f"‚úé D·ª±a v√†o c√°c ti·ªán √≠ch m√† b·∫°n y√™u c·∫ßu: [bold #80CFFF]{', '.join(query['amenities'])}[/bold #80CFFF].")
        designated_utility = []
        for amenity in query['amenities']:
            cluster = gsa.find_clusters(amenity, threshold=0.01)
            print(f"Ti·ªán nghi: [bold #80CFFF]{amenity}[/bold #80CFFF] => D·ªãch v·ª•: [bold #80CFFF]{' v√† '.join(cluster)}[/bold #80CFFF]")
            designated_utility.append(cluster)
        designated_utility = list(set(chain(*designated_utility)))
        print(f"‚úé N√™n s·∫Ω ∆∞u ti√™n c√°c kh√°ch s·∫°n ƒë∆∞·ª£c ƒë√°nh cao v·ªõi c√°c ti√™u ch√≠ ƒë√£ ph·ª•c thu·ªôc c√°c ti·ªán √≠ch tr√™n.")
        result_services = gssr.get_score_sub_rate(designated_utility, 
                                                  filter_matching_elements(ids_resulf, feature_sub_rate))
        # OUTPUT: [{'id': '1', 'score': 0.8}, {'id': '2', 'score': 0.7}, ...]
    
    print(" ƒê√£ t√¨m ki·∫øm c√°c kh√°ch s·∫°n ƒë∆∞·ª£c ƒë√°nh gi√° cao v·ªÅ d·ªãch v·ª• c·ªßa b·∫°n.")

    if query['services']:
        print(f"üîµ B·∫Øt ƒë·∫ßu t√¨m ki·∫øm c√°c kh√°ch s·∫°n c√≥ d·ªãch v·ª•: [bold #80CFFF]{', '.join(query['services'])}[/bold #80CFFF]")
        score_services = gsa.get_score_services(user_input = query['services'], 
                                    List_ids = ids_resulf, 
                                    weights=[0.5, 0.5])
        
        print(f" ƒê√£ t√¨m ki·∫øm c√°c kh√°ch s·∫°n kh·ªõp d·ªãch v·ª• b·∫°n quan t√¢m.")

    if query['amenities']:
        print(f"üîµ B·∫Øt ƒë·∫ßu t√¨m ki·∫øm kh√°ch s·∫°n ch·ª©a c√°c ti·ªán nghi [bold #80CFFF]{', '.join(query['amenities'])}[/bold #80CFFF]")
        result_facilities_hotel = recommend_faclities_hotel.predict_assignID(
            input_amenities= query['amenities'],
            hotel_ids=ids_resulf,
            similarity_threshold=0.9, 
            normalization_factor_base=10
        )
        result_facilities_room = recommend_faclities_room.predict_assignID(
            input_amenities= query['amenities'],
            hotel_ids=ids_resulf,
            similarity_threshold=0.9, 
            normalization_factor_base=10
        )

        facilities = gsa.calculate_hotel_scores(result_facilities_hotel, 
                                   result_facilities_room, 
                                   threshold=0.9, 
                                   weight=[0.5, 0.5])
        
        print(f" ƒê√£ t√¨m ki·∫øm c√°c kh√°ch s·∫°n c√≥ ti·ªán nghi b·∫°n quan t√¢m.")

    print(f"Danh s√°ch kh√°ch s·∫°n ƒë√£ t√¨m ki·∫øm ƒë∆∞·ª£c:")
    result = compute_total_score(facilities, result_services, score_services, weights=(0.33, 0.33, 0.33))
    return result

In [22]:
result = get_best_hotels(query)

C·∫£nh b√°o: room '10000593' kh√¥ng c√≥ trong d·ªØ li·ªáu, tr·∫£ v·ªÅ k·∫øt qu·∫£ r·ªóng
C·∫£nh b√°o: room '10000937' kh√¥ng c√≥ trong d·ªØ li·ªáu, tr·∫£ v·ªÅ k·∫øt qu·∫£ r·ªóng
C·∫£nh b√°o: room '10001167' kh√¥ng c√≥ trong d·ªØ li·ªáu, tr·∫£ v·ªÅ k·∫øt qu·∫£ r·ªóng
C·∫£nh b√°o: room '10001434' kh√¥ng c√≥ trong d·ªØ li·ªáu, tr·∫£ v·ªÅ k·∫øt qu·∫£ r·ªóng
C·∫£nh b√°o: room '10001679' kh√¥ng c√≥ trong d·ªØ li·ªáu, tr·∫£ v·ªÅ k·∫øt qu·∫£ r·ªóng
C·∫£nh b√°o: room '10003086' kh√¥ng c√≥ trong d·ªØ li·ªáu, tr·∫£ v·ªÅ k·∫øt qu·∫£ r·ªóng
C·∫£nh b√°o: room '10003813' kh√¥ng c√≥ trong d·ªØ li·ªáu, tr·∫£ v·ªÅ k·∫øt qu·∫£ r·ªóng
C·∫£nh b√°o: room '10004054' kh√¥ng c√≥ trong d·ªØ li·ªáu, tr·∫£ v·ªÅ k·∫øt qu·∫£ r·ªóng
C·∫£nh b√°o: room '10006504' kh√¥ng c√≥ trong d·ªØ li·ªáu, tr·∫£ v·ªÅ k·∫øt qu·∫£ r·ªóng
C·∫£nh b√°o: room '10006885' kh√¥ng c√≥ trong d·ªØ li·ªáu, tr·∫£ v·ªÅ k·∫øt qu·∫£ r·ªóng
C·∫£nh b√°o: room '10006969' kh√¥ng c√≥ trong d·ªØ li·ªáu, tr·∫£ v·ªÅ k·∫øt qu·∫£ r·ªóng
C·∫£nh b√°o: room '10

In [27]:
sorted_scores = sorted(result.items(), key=lambda x: x[1], reverse=True)

In [28]:
sorted_scores

[('10034269', 0.33),
 ('1005432', 0.32783257991013676),
 ('10086551', 0.32655797017267485),
 ('10012325', 0.31211705032255627),
 ('10031751', 0.3107297347404215),
 ('10073188', 0.3072295513648391),
 ('10012226', 0.30518077251620257),
 ('10006885', 0.3044690251036159),
 ('10039464', 0.29674270600204466),
 ('10014032', 0.29623663008352386),
 ('10045249', 0.28736624662374954),
 ('10067823', 0.28728495169791746),
 ('1002711', 0.28603224957582535),
 ('10031734', 0.2846866484941894),
 ('1005271', 0.28400271074860184),
 ('10003813', 0.2793582594566764),
 ('10072786', 0.279232886029302),
 ('1001739', 0.2751487329916616),
 ('10006969', 0.2721001399280332),
 ('1003155', 0.2694922333860609),
 ('1007578', 0.265231252214012),
 ('10053483', 0.2640223653226597),
 ('10018040', 0.26392736563287467),
 ('10052842', 0.2635114543900922),
 ('10020191', 0.2610461974349572),
 ('1002738', 0.26066239735442914),
 ('10014903', 0.25977292149447373),
 ('10051917', 0.2576168053340023),
 ('10004054', 0.25735301363660