In [2]:
import pandas as pd
import numpy as np
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Ensure you have downloaded the necessary NLTK data
# nltk.download('punkt')
# nltk.download('stopwords')
# nltk.download('wordnet')

# Sample hotel data (replace with your actual data loading)
df = pd.read_csv(r'C:\Users\itw\Downloads\all csv\hotel_info.csv')

def preprocess_facilities(facilities):
    # Convert list to lowercase and lemmatize
    lemm = WordNetLemmatizer()
    facilities_tokens = [lemm.lemmatize(w.lower()) for w in facilities]
    return set(facilities_tokens)

def recommender(location, price, facilities):
    df['hotel_city'] = df['hotel_city'].str.lower()
    df['facilities'] = df['facilities'].str.lower()
    
    # Handle list input for facilities
    if isinstance(facilities, list):
        f_set = set(preprocess_facilities(facilities))
    else:
        # Preprocess facilities input
#         if type(facilities) != str :
#             facilities = preprocess_facilities(word_tokenize(facilities.str.lower()))
#         else:
        facilities = preprocess_facilities(word_tokenize(facilities.lower()))
        f_set = set(facilities)
    
    # Filter by location and price
    reqbased = df[(df['hotel_city'] == location.lower()) & (df['hotel_price'] <= price)]
    reqbased = reqbased.sort_values(by='review_score', ascending=False)
    reqbased = reqbased.reset_index(drop=True)
    
    # Calculate similarity based on facilities
    cos = []
    for i in range(reqbased.shape[0]):
        hotel_facilities_tokens = word_tokenize(reqbased['facilities'][i])
        hotel_facilities_set = set(hotel_facilities_tokens)
        
        # Check if all required facilities are present in hotel facilities
        if f_set.issubset(hotel_facilities_set):
            cos.append(len(f_set))  # Use the count of required facilities as similarity score
        else:
            cos.append(0)
    
    reqbased['similarity'] = cos
    reqbased = reqbased[reqbased['similarity'] > 0]  # Filter out hotels with no matching facilities
    reqbased = reqbased.sort_values(by='similarity', ascending=False).drop_duplicates(subset=['hotel_name'])
    
    return reqbased[['hotel_name', 'hotel_link', 'hotel_type', 'review_score', 'hotel_price', 'hotel_city', 'facilities']].head(10)

# Test the function with list input


In [3]:
facilities_list = ['parking']
facilities = 'spa wi-fi smoke-free'
df = recommender('Islamabad', 23000, facilities_list)
df

Unnamed: 0,hotel_name,hotel_link,hotel_type,review_score,hotel_price,hotel_city,facilities
0,Avari Xpress Residences islamabad,https://www.google.com/maps/place/Avari+Xpress...,Hotel,4.6,22900,islamabad,wifi free breakfast free parking air-condit...
2,Grand Islamabad Hotel,https://www.google.com/maps/place/Grand+Islama...,4-star hotel,4.4,18700,islamabad,free wifi free breakfast free parking air-c...
3,margalla hotel islamabad,https://www.google.com/maps/place/The+Margala/...,3-star hotel,4.2,22100,islamabad,free wifi free breakfast free parking air-...
