In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
#Load the dataset
file_path = '/content/hotels_in_nepali_city.csv'
hotels_df = pd.read_csv(file_path)
hotels_df

Unnamed: 0,city,name,address,latitude,longitude,rating,total_ratings,place_id,phone_number
0,Kathmandu,Nepal Pavilion Inn,"Amrit Marg, Thamel Post Box 6062, Kathmandu 44...",27.711529,85.312063,4.2,209,ChIJeUwGJHIZ6zkReIMr3o1MB_M,01-5320383
1,Kathmandu,Hotel Shanker Kathmandu,"Kathmandu 44600, Nepal",27.718895,85.319468,4.3,2904,ChIJw3uqaBsZ6zkRjS-DDyXqWyk,01-4510151
2,Kathmandu,Hotel Nepalaya,"Maitripura Mahavihara, Thamel Marg, Kathmandu ...",27.711982,85.311349,4.4,1093,ChIJIS1HLPwY6zkRfpeS9we8v-k,01-5369141
3,Kathmandu,Royal Empire Boutique Hotel,"Kathmandu 44600, Nepal",27.722913,85.328600,4.2,656,ChIJ0SM5UhEZ6zkRjWY16XUeu2E,01-4000542
4,Kathmandu,Ramada Encore by Wyndham Kathmandu Thamel,"P885+GFG, Z Street 1, Kathmandu 44600, Nepal",27.716730,85.309217,4.4,644,ChIJn8E2t5AZ6zkRMn-jYUOMKbY,01-4700404
...,...,...,...,...,...,...,...,...,...
597,Bharatpur,Hotel Everest Inn Chitwan,"CMS ROAD, Bharatpur Opposite of CMS Hospital, ...",27.684554,84.435130,2.9,30,ChIJWXvoxKP7lDkRl5ixxl0F4f0,985-5067464
598,Bharatpur,Hotel Image Palace,"Bharatpur 44200, Nepal",27.703148,84.431410,3.8,95,ChIJV-vAwE77lDkRA3dtgUYPuKo,056-530731
599,Bharatpur,Hotel Garden of Dreams,"Ratnanagar 44200, Nepal",27.577139,84.504164,4.7,26,ChIJ9RxXid7ulDkR7TmBDzWakrU,984-5049155
600,Bharatpur,Into The Wild Eco Resort - bharatpur 22 Patihani,"Bharatpur 44200, Nepal",27.569688,84.360463,4.7,60,ChIJ6x2PVyHxlDkRbALhwzMY2ts,984-1230163


In [3]:
#Display the first few rows of the dataset
hotels_df.head()

Unnamed: 0,city,name,address,latitude,longitude,rating,total_ratings,place_id,phone_number
0,Kathmandu,Nepal Pavilion Inn,"Amrit Marg, Thamel Post Box 6062, Kathmandu 44...",27.711529,85.312063,4.2,209,ChIJeUwGJHIZ6zkReIMr3o1MB_M,01-5320383
1,Kathmandu,Hotel Shanker Kathmandu,"Kathmandu 44600, Nepal",27.718895,85.319468,4.3,2904,ChIJw3uqaBsZ6zkRjS-DDyXqWyk,01-4510151
2,Kathmandu,Hotel Nepalaya,"Maitripura Mahavihara, Thamel Marg, Kathmandu ...",27.711982,85.311349,4.4,1093,ChIJIS1HLPwY6zkRfpeS9we8v-k,01-5369141
3,Kathmandu,Royal Empire Boutique Hotel,"Kathmandu 44600, Nepal",27.722913,85.3286,4.2,656,ChIJ0SM5UhEZ6zkRjWY16XUeu2E,01-4000542
4,Kathmandu,Ramada Encore by Wyndham Kathmandu Thamel,"P885+GFG, Z Street 1, Kathmandu 44600, Nepal",27.71673,85.309217,4.4,644,ChIJn8E2t5AZ6zkRMn-jYUOMKbY,01-4700404


In [4]:
#Check for missing values and data types
hotels_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 602 entries, 0 to 601
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   city           602 non-null    object 
 1   name           602 non-null    object 
 2   address        602 non-null    object 
 3   latitude       602 non-null    float64
 4   longitude      602 non-null    float64
 5   rating         602 non-null    float64
 6   total_ratings  602 non-null    int64  
 7   place_id       602 non-null    object 
 8   phone_number   602 non-null    object 
dtypes: float64(3), int64(1), object(5)
memory usage: 42.5+ KB


In [5]:
#Display summary statistics
hotels_df.describe()

Unnamed: 0,latitude,longitude,rating,total_ratings
count,602.0,602.0,602.0,602.0
mean,27.516576,84.936909,4.143189,255.594684
std,0.465876,1.289137,0.680312,430.367194
min,26.42708,81.233691,0.0,0.0
25%,27.499646,83.960603,3.9,40.0
50%,27.670401,85.301185,4.3,120.0
75%,27.699458,85.429137,4.5,297.0
max,28.454614,87.413285,5.0,4972.0


<h2>Content Based Filtering</h2>

In [6]:
#Normalize the ratings
scaler = MinMaxScaler()
hotels_df['normalized_rating'] = scaler.fit_transform(hotels_df[['rating']])

In [7]:
#Log-transform the total ratings to handle skewness
hotels_df['log_total_ratings'] = np.log1p(hotels_df['total_ratings'])

In [8]:
#Display the updated dataframe with new features
hotels_df.head()

Unnamed: 0,city,name,address,latitude,longitude,rating,total_ratings,place_id,phone_number,normalized_rating,log_total_ratings
0,Kathmandu,Nepal Pavilion Inn,"Amrit Marg, Thamel Post Box 6062, Kathmandu 44...",27.711529,85.312063,4.2,209,ChIJeUwGJHIZ6zkReIMr3o1MB_M,01-5320383,0.84,5.347108
1,Kathmandu,Hotel Shanker Kathmandu,"Kathmandu 44600, Nepal",27.718895,85.319468,4.3,2904,ChIJw3uqaBsZ6zkRjS-DDyXqWyk,01-4510151,0.86,7.974189
2,Kathmandu,Hotel Nepalaya,"Maitripura Mahavihara, Thamel Marg, Kathmandu ...",27.711982,85.311349,4.4,1093,ChIJIS1HLPwY6zkRfpeS9we8v-k,01-5369141,0.88,6.997596
3,Kathmandu,Royal Empire Boutique Hotel,"Kathmandu 44600, Nepal",27.722913,85.3286,4.2,656,ChIJ0SM5UhEZ6zkRjWY16XUeu2E,01-4000542,0.84,6.487684
4,Kathmandu,Ramada Encore by Wyndham Kathmandu Thamel,"P885+GFG, Z Street 1, Kathmandu 44600, Nepal",27.71673,85.309217,4.4,644,ChIJn8E2t5AZ6zkRMn-jYUOMKbY,01-4700404,0.88,6.46925


<h2>Collaborative Filtering</h2>

In [9]:
#Define a function to recommend hotels based on content-based filtering
def recommend_hotels_content_based(hotel_name, df, top_n=5):
    # Extract relevant features for similarity computation
    features = df[['normalized_rating', 'log_total_ratings']]

    #Compute the cosine similarity matrix
    cosine_sim = cosine_similarity(features, features)

    #Get the index of the hotel
    idx = df[df['name'] == hotel_name].index[0]

    #Get the pairwise similarity scores for the hotel
    sim_scores = list(enumerate(cosine_sim[idx]))

    #Sort the hotels based on similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    #Get the indices of the top-n most similar hotels
    sim_indices = [i[0] for i in sim_scores[1:top_n+1]]

    #Return the top-n most similar hotels
    return df.iloc[sim_indices][['name', 'city', 'rating', 'total_ratings']]

In [10]:
#Example: Recommend hotels similar to "Hotel Shanker Kathmandu"
recommendations = recommend_hotels_content_based("Hotel Shanker Kathmandu", hotels_df)
recommendations

Unnamed: 0,name,city,rating,total_ratings
403,Hotel Manaki International,Janakpur,3.4,492
587,Hotel Red Karpet,Bharatpur,3.6,708
7,Hyatt Regency Kathmandu,Kathmandu,4.5,4972
419,"Pukar Hotel & Guest House, Lalgadh",Janakpur,3.3,556
33,"Hotel Shangri~La, Kathmandu",Kathmandu,4.2,1881


In [11]:
def recommend_hotels_item_based(hotel_name, df, top_n=5):
    # Extract relevant features for similarity computation
    features = df[['latitude', 'longitude', 'rating', 'total_ratings']]

    #Compute the cosine similarity matrix
    cosine_sim = cosine_similarity(features, features)

    #Get the index of the hotel
    idx = df[df['name'] == hotel_name].index[0]

    #Get the pairwise similarity scores for the hotel
    sim_scores = list(enumerate(cosine_sim[idx]))

    #Sort the hotels based on similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    #Get the indices of the top-n most similar hotels
    sim_indices = [i[0] for i in sim_scores[1:top_n+1]]

    #Return the top-n most similar hotels
    return df.iloc[sim_indices][['name', 'city', 'rating', 'total_ratings']]

    #Example: Recommend hotels similar to "Hotel Shanker Kathmandu" using item-based collaborative filtering

In [12]:
item_based_recommendations = recommend_hotels_item_based("Hotel Shanker Kathmandu", hotels_df)
item_based_recommendations

Unnamed: 0,name,city,rating,total_ratings
180,Hotel Himalaya,Lalitpur,4.1,2869
10,Kathmandu Marriott Hotel,Kathmandu,4.6,2538
337,"Tiger Palace Resort, Bhairahawa",Lumbini,4.5,2357
517,"Tiger Palace Resort, Bhairahawa, Lumbini, Nepal",Butwal,4.5,2357
12,Radisson Hotel Kathmandu,Kathmandu,4.3,4229
