In [1]:
import re
import traceback
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import seaborn as sns
from nltk.corpus import stopwords 
from nltk.tokenize import word_tokenize 
from sklearn.metrics import accuracy_score
import math
import os
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer
import sqlite3
from collections import Counter
import ast
from ast import literal_eval

In [68]:
class FilterRecommender:
    def __init__(self,database):
        self.database=database
        pass

    def query(self,sql):
        try:
            conn = sqlite3.connect(self.database)
            cursor = conn.cursor()
            cursor.execute(sql)
            rows = cursor.fetchall()
            columns = [desc[0] for desc in cursor.description]
            df = pd.DataFrame(rows, columns=columns)
            return df
        except Exception as e:
            print(f"An error occurred: {e}")
        finally:
            conn.close()

    def get_data_hotel(self):
        hotel_details = self.query('select * from airbnb_data')
        df_hotel_details = pd.DataFrame(hotel_details)
        # Xử lý dữ liệu
        df_hotel_details.dropna()
        df_hotel_details.drop_duplicates(subset='listing_id', keep=False, inplace=True)
        data_hotel = pd.DataFrame(df_hotel_details)
        return data_hotel
    
    def city_based(self, city):
        data_hotel = self.get_data_hotel()
        data_hotel['city'] = data_hotel['city'].str.lower()
        # Lọc dữ liệu theo thành phố
        citybased = data_hotel[data_hotel['city'] == city.lower()]
        citybased = citybased.sort_values(by='review_scores_rating', ascending=False)
        # Loại bỏ các bản ghi trùng lặp
        citybased.drop_duplicates(subset='listing_id', keep='first', inplace=True)
        if not citybased.empty:
            hname = citybased[['listing_id','name', 'review_scores_rating', 'room_type', 'amenities','minimum_nights', 'listing_url']]
            return hname.head()
        else:
            print('No hotels available')

    def roomtype_based(self, roomtype):
        data_hotel = self.get_data_hotel()
        data_hotel['room_type'] = data_hotel['room_type'].str.lower()
        roomtypebased = data_hotel[data_hotel['room_type'] == roomtype.lower()]
        roomtypebased = roomtypebased.sort_values(by='review_scores_rating', ascending=False)
        roomtypebased.drop_duplicates(subset='listing_id', keep='first', inplace=True)
        if not roomtypebased.empty:
            hname = roomtypebased[['listing_id','name', 'review_scores_rating', 'room_type', 'amenities', 'minimum_nights','listing_url']]
            return hname.head()
        else:
            print('No hotels available')
            return pd.DataFrame()

    def pop_citybased(self, city, roomtype):
        data_hotel = self.get_data_hotel()

        data_hotel['city'] = data_hotel['city'].str.lower()
        data_hotel['room_type'] = data_hotel['room_type'].str.lower()

        popbased = data_hotel[data_hotel['city'] == city.lower()]
        popbased = popbased[popbased['room_type'] == roomtype.lower()].sort_values(by='review_scores_rating', ascending=False)

        popbased.drop_duplicates(subset='listing_id', keep='first', inplace=True)
        if not popbased.empty:
            hname = popbased[['listing_id','name', 'review_scores_rating', 'room_type', 'amenities','minimum_nights', 'listing_url']]
            return hname.head()
        else:
            print('No hotels available')
            return pd.DataFrame()
    def get_all_amenities(self):
            df_amenities = self.query('select amenities from airbnb_data')
            df_amenities['amenities'] = df_amenities['amenities'].apply(ast.literal_eval)
            result_amenities=pd.DataFrame(df_amenities)
            return result_amenities
    def amenities_based(self, amenities):
        data_hotel = self.get_data_hotel()
        has_amenities = data_hotel[data_hotel['amenities'].apply(lambda x: set(amenities).issubset(x))]

        if not has_amenities.empty:
            # Return the hotels that have all specified amenities
            return has_amenities[['listing_id','name', 'review_scores_rating', 'room_type', 'amenities', 'minimum_nights', 'listing_url']]
        else:
            # If no hotels have all specified amenities, print a message and return an empty DataFrame
            print('No hotels available with specified amenities')
            return pd.DataFrame()
    def price_range_based(self, min_price, max_price):
        # Ensure min_price is less than or equal to max_price
        if min_price > max_price:
            print('Error: min_price should be less than or equal to max_price')
            return pd.DataFrame()

        # Get hotel data
        data_hotel = self.get_data_hotel()
        # Remove '$' and convert 'price' column to numeric
        data_hotel['price'] = data_hotel['price'].replace('[\$,]', '', regex=True)
        data_hotel['price'] = pd.to_numeric(data_hotel['price'], errors='coerce')

        # Filter hotels within the price range
        price_filtered_hotels = data_hotel[(data_hotel['price'] >= min_price) & (data_hotel['price'] <= max_price)]
        # Sort filtered hotels by price
        price_filtered_hotels_sorted = price_filtered_hotels.sort_values(by='review_scores_rating')
        # Remove duplicates based on 'listing_id'
        price_filtered_hotels_sorted.drop_duplicates(subset='listing_id', keep='first', inplace=True)

        # Select relevant columns to return
        if not price_filtered_hotels_sorted.empty:
            hname = price_filtered_hotels_sorted[
                ['listing_id','name', 'review_scores_rating', 'room_type', 'amenities','minimum_nights', 'listing_url', 'price']]
            return hname
        else:
            print('No hotels available')
            return pd.DataFrame()
    def days_based(self, days):
        # Get hotel data
        data_hotel = self.get_data_hotel()
        data_hotel['minimum_nights'] = pd.to_numeric(data_hotel['minimum_nights'], errors='coerce')
        days_filtered_hotels = data_hotel[data_hotel['minimum_nights'] <= days]

        # Sort filtered hotels by price
        days_filtered_hotels_sorted = days_filtered_hotels.sort_values(by='review_scores_rating',ascending=False)
        # Remove duplicates based on 'listing_id'
        days_filtered_hotels_sorted.drop_duplicates(subset='listing_id', keep='first', inplace=True)

        # Select relevant columns to return
        if not days_filtered_hotels_sorted.empty:
            hname = days_filtered_hotels_sorted[
                ['listing_id','name', 'review_scores_rating', 'room_type', 'amenities','minimum_nights', 'listing_url', 'price']]
            return hname
        else:
            print('No hotels available')
            return pd.DataFrame()
        
    def distance(self, coord1, coord2):
        R = 6373.0  # Radius of Earth in kilometers
        lat1, lon1 = coord1
        lat2, lon2 = coord2
        lat1, lon1 = np.radians(lat1), np.radians(lon1)
        lat2, lon2 = np.radians(lat2), np.radians(lon2)
        dlon = lon2 - lon1
        dlat = lat2 - lat1

        a = np.sin(dlat / 2) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2) ** 2
        c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))

        dist = R * c / 1.6
        return dist

    def distance_based(self, city, max_distance):
        city_coords = {
            'New Jersey': (40.0583, -74.4057),
            'Washington DC': (38.9072, -77.0369),
            'Seattle': (47.6062, -122.3321),
            'Chicago': (41.8781, -87.6298),
            'Texas': (31.9686, -99.9018),
            'Boston': (42.3601, -71.0589),
            'San Diego': (32.7157, -117.1611),
            'Dallas': (32.7767, -96.7970)
        }

        if city not in city_coords:
            print(f"Coordinates for {city} not found.")
            return pd.DataFrame()

        target_coord = city_coords[city]
        data_hotel = self.get_data_hotel()

        # Convert latitude and longitude to numeric values, forcing errors to NaN
        data_hotel['latitude'] = pd.to_numeric(data_hotel['latitude'], errors='coerce')
        data_hotel['longitude'] = pd.to_numeric(data_hotel['longitude'], errors='coerce')

        # Drop rows with missing latitude or longitude
        data_hotel.dropna(subset=['latitude', 'longitude'], inplace=True)

        # Calculate the distance of each hotel to the target city coordinates
        data_hotel['distance'] = data_hotel.apply(
            lambda row: self.distance(target_coord, (row['latitude'], row['longitude'])),
            axis=1
        )

        # Filter hotels by the specified city and distance
        data_hotel_with_distances = data_hotel[data_hotel['city'].str.lower() == city.lower()]
        nearby_hotels = data_hotel_with_distances[data_hotel_with_distances['distance'] <= max_distance]
        nearby_hotels_sorted=nearby_hotels.sort_values(by='distance')
        if not nearby_hotels_sorted.empty:
            hname = nearby_hotels_sorted[
                ['listing_id','name', 'review_scores_rating', 'room_type', 'amenities','minimum_nights', 'listing_url', 'price','distance']]
            return hname
        else:
            print('No hotels available')
            return pd.DataFrame()
        
    def filter_by_all(self, city, roomtype, amenities, minprice, maxprice, days):
        # Convert inputs to lowercase for case-insensitive matching
        data_hotel=self.get_data_hotel()
        city = city.lower()
        roomtype = roomtype.lower()
        amenities = [amenity.lower() for amenity in amenities]
        data_hotel['city'] = data_hotel['city'].str.lower()
        data_hotel['room_type'] = data_hotel['room_type'].str.lower()
        
        data_hotel['price'] = data_hotel['price'].replace('[\$,]', '', regex=True)
        data_hotel['price'] = pd.to_numeric(data_hotel['price'], errors='coerce')
        data_hotel['minimum_nights'] = pd.to_numeric(data_hotel['minimum_nights'], errors='coerce')
        # Filter by price range

        price_filtered = data_hotel[(data_hotel['price'] >= minprice) & (data_hotel['price'] <= maxprice)]
        
        # Filter by the number of days
        days_filtered = price_filtered[price_filtered['minimum_nights'] <= days]
        
        # Filter by city
        city_filtered = days_filtered[days_filtered['city'].str.lower() == city]
        
        # Filter by room type
        roomtype_filtered = city_filtered[city_filtered['room_type'].str.lower() == roomtype]
        
        # Filter by amenities
        amenities_filtered = roomtype_filtered[roomtype_filtered['amenities'].apply(lambda x: set(amenities).issubset(x))]
        
        # Sort by review scores rating
        sorted_hotels = amenities_filtered.sort_values(by='review_scores_rating', ascending=False)
        
        # Remove duplicates based on listing_id
        unique_hotels = sorted_hotels.drop_duplicates(subset='listing_id', keep='first')
        
        # Return the filtered hotels
        if not unique_hotels.empty:
            return unique_hotels[['listing_id', 'city','name', 'review_scores_rating', 'room_type', 'amenities', 'minimum_nights', 'listing_url', 'price']]
        else:
            print('No hotels available')
            return pd.DataFrame()
        

In [76]:
database='airbnb_data.db'
fr = FilterRecommender(database)


In [47]:
result1=fr.city_based('chicago')
result1

Unnamed: 0,listing_id,name,review_scores_rating,room_type,amenities,minimum_nights,listing_url
2257,8841713,Private bath. 1/2 block pink train at 18th st.,5.0,Private room,"[""Books and reading material"", ""Microwave"", ""F...",32,https://www.airbnb.com/rooms/883501499698672837
2258,8841714,Master bedrm. 1/2 blk pink line. Parking.,5.0,Private room,"[""Microwave"", ""Fire extinguisher"", ""Iron"", ""Co...",32,https://www.airbnb.com/rooms/883545643482655632
2263,8841719,Private room south loop LC,5.0,Shared room,"[""First aid kit"", ""Paid parking on premises"", ...",2,https://www.airbnb.com/rooms/884035085276804686
2264,8841720,Modern Spacious Studio in Lakeview,5.0,Entire home/apt,"[""Fire pit"", ""Fire extinguisher"", ""Iron"", ""Cof...",32,https://www.airbnb.com/rooms/884049933896242006
2262,8841718,Logan Square Cozy Apartment - Games+Arcade - P...,4.88,Entire home/apt,"[""Free washer \u2013 In unit"", ""Coffee"", ""Smar...",2,https://www.airbnb.com/rooms/883904768479998447


In [48]:
result2=fr.roomtype_based('Private room')
result2.head(5)

Unnamed: 0,listing_id,name,review_scores_rating,room_type,amenities,minimum_nights,listing_url
2055,521307060,"Fabulous Private Bedroom !!!! Shaw, DC",5,private room,"[""Microwave"", ""Dining table"", ""Hair dryer"", ""B...",31,https://www.airbnb.com/rooms/52130706
2084,524989200,Penthouse condo 2BR/2.5BA w/ Rooftop & Parking,5,private room,"[""Dining table"", ""Hair dryer"", ""Clothing stora...",2,https://www.airbnb.com/rooms/52498920
1998,512141900,Cozy BR and Private Bath in Quiet TH,5,private room,"[""Wifi"", ""Kitchen"", ""Heating"", ""Smoke alarm"", ...",31,https://www.airbnb.com/rooms/51214190
685,183468490,"Season? decor, palm reading 4 2024, walk to metro",5,private room,"[""Microwave"", ""Dining table"", ""Hair dryer"", ""B...",31,https://www.airbnb.com/rooms/18346849
2012,514480420,"Private, Peaceful & Affordable Accommodation (A)",5,private room,"[""Microwave"", ""Dining table"", ""Hair dryer"", ""F...",31,https://www.airbnb.com/rooms/51448042


In [49]:
result3=fr.pop_citybased('washington dc','private room')
result3.head(5)       

Unnamed: 0,listing_id,name,review_scores_rating,room_type,amenities,minimum_nights,listing_url
1514,419929700,"Eaton DC, Cabin King",5,private room,"[""Hair dryer"", ""Bluetooth sound system"", ""Safe...",1,https://www.airbnb.com/rooms/41992970
1543,426332090,"Lovely 2 bd, 1 ba with first floor seating area",5,private room,"[""Essentials"", ""Bed linens"", ""First aid kit"", ...",2,https://www.airbnb.com/rooms/42633209
1608,445217510,"Stay b/w Dupont, Logan Cir, U St, CoHi, & AdMo!",5,private room,"[""Hair dryer"", ""Essentials"", ""Hot water"", ""Fir...",31,https://www.airbnb.com/rooms/44521751
1449,407129720,Private Suite & Roofdeck,5,private room,"[""Paid parking garage on premises \u2013 1 spa...",31,https://www.airbnb.com/rooms/40712972
1894,496712190,Room in 4BD apt across from Stadium Armory metro,5,private room,"[""Hair dryer"", ""Free street parking"", ""Essenti...",31,https://www.airbnb.com/rooms/49671219


In [50]:
city='Washington DC'  
max_distance = 20  
result4= fr.distance_based(city, max_distance)
result4.head(5)

Unnamed: 0,listing_id,name,review_scores_rating,room_type,amenities,minimum_nights,listing_url,price,distance
101,21154210,"Nice room for rent on 18th st, few mins to Dupont",4.67,Private room,"[""Carbon monoxide alarm"", ""Wifi"", ""Cleaning pr...",31,https://www.airbnb.com/rooms/2115421,$55.00,0.052184
1615,449980590,Sojourn on 15th - 2br/ba,4.75,Entire home/apt,"[""Microwave"", ""Hair dryer"", ""Essentials"", ""Bed...",31,https://www.airbnb.com/rooms/44998059,,0.08157
1689,464191320,Sojourn on 15th,4.81,Entire home/apt,"[""Microwave"", ""Hair dryer"", ""Essentials"", ""Bed...",2,https://www.airbnb.com/rooms/46419132,$332.00,0.092595
1690,464193420,Sojourn on 15th,4.33,Entire home/apt,"[""Microwave"", ""Hair dryer"", ""Essentials"", ""Bed...",31,https://www.airbnb.com/rooms/46419342,$98.00,0.152908
1303,362582810,"Blueground | Dupont Circle, nr Embassy Row",5.0,Entire home/apt,"[""Microwave"", ""Hair dryer"", ""Essentials"", ""Bed...",32,https://www.airbnb.com/rooms/36258281,$133.00,0.158668


In [51]:
result5=fr.days_based(5)
result5.head(5)

Unnamed: 0,listing_id,name,review_scores_rating,room_type,amenities,minimum_nights,listing_url,price
2301,8841783,Spanish Style Cozy Studio,5,Entire home/apt,"[""GE stainless steel electric stove"", ""HDTV wi...",1,https://www.airbnb.com/rooms/884327413218157644,$135.00
1599,443896770,"Private Basement Unit - Free Parking, Pet Frie...",5,Entire home/apt,"[""Microwave"", ""Hair dryer"", ""Blender"", ""Free s...",2,https://www.airbnb.com/rooms/44389677,$132.00
1622,451915100,Capitol Hill - Easy Metro Access & Free Parking,5,Entire home/apt,"[""Microwave"", ""Hair dryer"", ""Free street parki...",1,https://www.airbnb.com/rooms/45191510,$135.00
1099,299236480,Upscale 3 Bedroom Modern Georgetown Home,5,Entire home/apt,"[""Viking 36\"", 4-burner gas stove top with gri...",4,https://www.airbnb.com/rooms/29923648,$620.00
434,127589310,"Contemporary, Spacious 1-Bedroom",5,Entire home/apt,"[""Carbon monoxide alarm"", ""Fire extinguisher"",...",5,https://www.airbnb.com/rooms/12758931,$200.00


In [52]:
result6=fr.amenities_based("Free street parking")
result6.head(5)

Unnamed: 0,listing_id,name,review_scores_rating,room_type,amenities,minimum_nights,listing_url
2,36860,Vita's Hideaway,4.64,Private room,"[""Microwave"", ""Free street parking"", ""Essentia...",31,https://www.airbnb.com/rooms/3686
3,39430,Historic Rowhouse Near Monuments,4.83,Private room,"[""Microwave"", ""Dining table"", ""Hair dryer"", ""M...",1,https://www.airbnb.com/rooms/3943
4,41970,Capitol Hill Bedroom walk to Metro,4.86,Private room,"[""Microwave"", ""Hair dryer"", ""Free street parki...",7,https://www.airbnb.com/rooms/4197
5,55890,Cozy apt in Adams Morgan,4.5,Entire home/apt,"[""Microwave"", ""Hair dryer"", ""Free street parki...",31,https://www.airbnb.com/rooms/5589
6,61650,Private Bath & Laundry + Huge Lower Level Suite,4.95,Private room,"[""Microwave"", ""Hair dryer"", ""Free street parki...",31,https://www.airbnb.com/rooms/6165


In [53]:
result7=fr.price_range_based(50,100)
result7.head(5)

Unnamed: 0,listing_id,name,review_scores_rating,room_type,amenities,minimum_nights,listing_url,price
153,38220150,Open Style Shared 3 Walls & Divider,3.17,Shared room,"[""Hair dryer"", ""Free street parking"", ""Essenti...",1,https://www.airbnb.com/rooms/3822015,97.0
624,172409940,Capitol View Room,3.67,Private room,"[""Microwave"", ""Hair dryer"", ""Free street parki...",31,https://www.airbnb.com/rooms/17240994,55.0
1558,431240080,Queen Bedroom 2A in #179: Capitol Hill,4.0,Private room,"[""Patio or balcony"", ""Self check-in"", ""Carbon ...",90,https://www.airbnb.com/rooms/43124008,53.0
1743,471604360,Sojourn | Foggy Bottom | #4,4.0,Entire home/apt,"[""Microwave"", ""Hair dryer"", ""Essentials"", ""Bed...",31,https://www.airbnb.com/rooms/47160436,98.0
2095,526520800,Lovely 1 bedroom,4.0,Entire home/apt,"[""Essentials"", ""First aid kit"", ""Air condition...",31,https://www.airbnb.com/rooms/52652080,84.0


In [77]:
minprice = 30
maxprice = 50
city = "washington dc"
roomtype = "private room"
amenities = "Wifi"
days = 14

result8=fr.filter_by_all(city,roomtype,amenities,minprice,maxprice,days)
result8.head(5)

Unnamed: 0,listing_id,city,name,review_scores_rating,room_type,amenities,minimum_nights,listing_url,price
2176,536992420,washington dc,One-Room Getaway within Shared DC Urban Oasis,5.0,private room,"[""Microwave"", ""Dining table"", ""Free street par...",5,https://www.airbnb.com/rooms/53699242,44.0
1214,334327780,washington dc,"very quiet, clean and safe.",5.0,private room,"[""Hair dryer"", ""Essentials"", ""First aid kit"", ...",7,https://www.airbnb.com/rooms/33432778,50.0
1948,503333340,washington dc,Cozy room washing Dc near metro (9),4.94,private room,"[""Microwave"", ""Dining table"", ""Hair dryer"", ""F...",1,https://www.airbnb.com/rooms/50333334,44.0
393,116897500,washington dc,"Clean,Cozy, and Affordable-1",4.93,private room,"[""Microwave"", ""Dining table"", ""Free street par...",14,https://www.airbnb.com/rooms/11689750,49.0
764,201255500,washington dc,Private Room #2 15 mins to White House,4.92,private room,"[""Hair dryer"", ""Free street parking"", ""Private...",1,https://www.airbnb.com/rooms/20125550,31.0
