In [22]:
import numpy as np
import pandas as pd
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import warnings
import joblib
# Setup
warnings.filterwarnings('ignore')

In [2]:
hotels_data = pd.read_csv('../Data/hotels.csv')

In [3]:
hotels_data.drop(['travelCode', 'userCode'], axis=1, inplace=True)

In [4]:
hotels_data['features'] = (
    hotels_data['name'].astype(str) + ' ' +
    hotels_data['place'].astype(str) + ' ' +
    hotels_data['days'].astype(str) + ' days ' +
    hotels_data['price'].astype(str) + ' price ' +
    hotels_data['total'].astype(str) + ' total'
)

In [5]:
# Vectorize features
cv = CountVectorizer(max_features=5000, stop_words='english')
features_matrix = cv.fit_transform(hotels_data['features']).toarray()

In [6]:
# Cosine similarity
cos_sim = cosine_similarity(features_matrix)

In [7]:
# Rename columns in final output
hotels_data.rename(columns={'name': 'Hotel_name', 'place': 'Places'}, inplace=True)

In [None]:
# Final unified recommendation function
def recommend_hotels(input_value, num_recommendations=5):
    input_value = input_value.strip()

    # Case 1: Input is a Hotel Name
    if input_value in hotels_data['Hotel_name'].values:
        print(f"\nYou entered a Hotel Name: '{input_value}'\n")

        hotel_index = hotels_data[hotels_data['Hotel_name'] == input_value].index[0]
        input_hotel_name = hotels_data.loc[hotel_index, 'Hotel_name']

        similar_hotels = list(enumerate(cos_sim[hotel_index]))
        similar_hotels = sorted(similar_hotels, key=lambda x: x[1], reverse=True)

        # Remove same hotel name from results
        filtered_hotels = [i for i in similar_hotels if hotels_data.loc[i[0], 'Hotel_name'] != input_hotel_name]

        # Get top unique hotel names with different names
        seen = set()
        top_indices = []
        for idx, _ in filtered_hotels:
            name = hotels_data.loc[idx, 'Hotel_name']
            if name not in seen:
                seen.add(name)
                top_indices.append(idx)
            if len(top_indices) >= num_recommendations:
                break

        return hotels_data.iloc[top_indices][['Hotel_name', 'Places']]

    # Case 2: Input is a Place
    elif input_value in hotels_data['Places'].values:
        print(f"\n You entered a Place: '{input_value}'\n")

        place_indices = hotels_data[hotels_data['Places'] == input_value].index.tolist()
        avg_similarity = np.mean(cos_sim[place_indices], axis=0)

        similar_hotels = list(enumerate(avg_similarity))
        similar_hotels = sorted(similar_hotels, key=lambda x: x[1], reverse=True)

        # Filter out same place and duplicate hotel names
        filtered_hotels = [
            i for i in similar_hotels
            if hotels_data.loc[i[0], 'Places'] != input_value
        ]

        seen_places = set()
        seen_hotels = set()
        top_indices = []

        for idx, _ in filtered_hotels:
            name = hotels_data.loc[idx, 'Hotel_name']
            place = hotels_data.loc[idx, 'Places']
            if (name, place) not in seen_hotels and place not in seen_places:
                seen_hotels.add((name, place))
                seen_places.add(place)
                top_indices.append(idx)
            if len(top_indices) >= num_recommendations:
                break

        return hotels_data.iloc[top_indices][['Hotel_name', 'Places']]

    else:
        return f" No hotel or place found for: '{input_value}'"

In [20]:
# Hotel-based recommendation
print(recommend_hotels("Hotel K", num_recommendations=5))


🔍 You entered a Hotel Name: 'Hotel K'

  Hotel_name              Places
0    Hotel A  Florianopolis (SC)
6    Hotel Z        Aracaju (SE)
8   Hotel AF      Sao Paulo (SP)
5   Hotel BD          Natal (RN)
7   Hotel AU         Recife (PE)


In [21]:
# Place-based recommendation
print(recommend_hotels("Salvador (BH)", num_recommendations=5))


🌍 You entered a Place: 'Salvador (BH)'

   Hotel_name              Places
48   Hotel BD          Natal (RN)
0     Hotel A  Florianopolis (SC)
6     Hotel Z        Aracaju (SE)
8    Hotel AF      Sao Paulo (SP)
7    Hotel AU         Recife (PE)
