In [14]:
import re
import traceback
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import seaborn as sns
from nltk.corpus import stopwords 
from nltk.tokenize import word_tokenize 
from sklearn.metrics import accuracy_score
import math
import os
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer
import sqlite3
from collections import Counter
import ast
from ast import literal_eval

In [15]:
class ContentRecommender:
    def __init__(self,database):
        self.database=database
        pass

    def query(self,sql):
        try:
            conn = sqlite3.connect(self.database)
            cursor = conn.cursor()
            cursor.execute(sql)
            rows = cursor.fetchall()
            columns = [desc[0] for desc in cursor.description]
            df = pd.DataFrame(rows, columns=columns)
            return df
        except Exception as e:
            print(f"An error occurred: {e}")
        finally:
            conn.close()

    def get_data_hotel(self):
        hotel_details = self.query('select * from airbnb_data')
        df_hotel_details = pd.DataFrame(hotel_details)
        # Xử lý dữ liệu
        df_hotel_details.dropna()
        df_hotel_details.drop_duplicates(subset='listing_id', keep=False, inplace=True)
        data_hotel = pd.DataFrame(df_hotel_details)
        return data_hotel
    
    def recommend_by_amenities_topic(self, listing_ids):
        data = self.get_data_hotel()
        data['amenities'] = data['amenities'].apply(ast.literal_eval)
        data['Topics'] = data['Topics'].apply(ast.literal_eval)
        data['combined_features'] = data['amenities'].apply(lambda x: ' '.join(x))
        
        tfidf = TfidfVectorizer()
        tfidf_matrix = tfidf.fit_transform(data['combined_features'])
        cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
        
        businesses = data['listing_id']
        indices = pd.Series(businesses.index, index=data['listing_id'])
        
        recommendations = pd.DataFrame()
        
        for listing_id in listing_ids:
            if listing_id not in indices:
                print(f"Listing ID {listing_id} not found in the dataset.")
                continue
            
            idx = indices[listing_id]
            sim_scores = list(enumerate(cosine_sim[idx]))
            sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
            sim_scores = sim_scores[1:21]
            biz_indices = [i[0] for i in sim_scores]
            recommendations = pd.concat([recommendations, data.iloc[biz_indices]])
        
        return recommendations


In [16]:
database='airbnb_data.db'
ct = ContentRecommender(database)
recommendation_func = ct.recommend_by_amenities_topic([115433430,36860])
recommendation_func

Unnamed: 0,city,listing_id,listing_url,name,description,picture_url,latitude,longitude,room_type,bathrooms,...,beds,amenities,price,minimum_nights,maximum_nights,review_scores_rating,Dominant Ids,Percent,Topics,combined_features
359,Washington DC,105439850,https://www.airbnb.com/rooms/10543985,Gorgeous 1 BD in Capitol Hill,Luxury 1 bedroom apartment is the perfect plac...,https://a0.muscache.com/pictures/63a88192-efab...,38.8874721,-76.999219,Entire home/apt,1.0,...,2.0,"[Microwave, Hair dryer, Essentials, Bed linens...",$235.00,31,1125,4.83,"[1, 2, 3]","[0.34529725, 0.32087782, 0.25665197]","[Nice host, Highly recommended / Comfortable, ...",Microwave Hair dryer Essentials Bed linens Pac...
360,Washington DC,105443630,https://www.airbnb.com/rooms/10544363,Newly renovated 1 BD near Capitol,You will love staying in this historic area. W...,https://a0.muscache.com/pictures/c7f9937e-fe13...,38.88745,-76.99925,Entire home/apt,1.0,...,2.0,"[Microwave, Hair dryer, Essentials, Bed linens...",$508.00,2,1125,4.87,"[3, 0, 2]","[0.32657593, 0.30299467, 0.2090249]","[Convenient location, Nice, clean room, Highly...",Microwave Hair dryer Essentials Bed linens Pac...
354,Washington DC,104055450,https://www.airbnb.com/rooms/10405545,Bright Sunny Capitol Hill Studio,This bright and modern studio is the perfect p...,https://a0.muscache.com/pictures/prohost-api/H...,38.8874605,-76.9992831,Hotel room,1.0,...,1.0,"[Microwave, Hair dryer, Essentials, Bed linens...",$167.00,2,365,4.9,"[1, 2, 3]","[0.29924107, 0.29694805, 0.24584079]","[Nice host, Highly recommended / Comfortable, ...",Microwave Hair dryer Essentials Bed linens Pac...
358,Washington DC,105437510,https://www.airbnb.com/rooms/10543751,Newly renovated Capitol Hill studio #302,Charming studio is the perfect place to stay d...,https://a0.muscache.com/pictures/prohost-api/H...,38.88759,-76.99931,Entire home/apt,1.0,...,1.0,"[Microwave, Hair dryer, Essentials, Bed linens...",$127.00,31,1125,4.96,"[0, 3, 1]","[0.8679537, 0.065138265, 0.034095332]","[Nice, clean room, Convenient location, Nice h...",Microwave Hair dryer Essentials Bed linens Pac...
891,Washington DC,228077150,https://www.airbnb.com/rooms/22807715,Luxury Apartment steps to The Dupont Circle Metro,This is true Dupont Circle living. Come home t...,https://a0.muscache.com/pictures/61de7d53-3953...,38.91045,-77.04478,Entire home/apt,,...,,"[Microwave, Hair dryer, Essentials, Bed linens...",,30,1125,4.76,"[0, 1, 3]","[0.73964745, 0.10667318, 0.07968985]","[Nice, clean room, Nice host, Convenient locat...",Microwave Hair dryer Essentials Bed linens Pac...
1553,Washington DC,429412550,https://www.airbnb.com/rooms/42941255,Sojourn the 13th Street Flats,"Bright, comfortable and modern describe this f...",https://a0.muscache.com/pictures/prohost-api/H...,38.91508,-77.02847,Entire home/apt,1.0,...,2.0,"[Microwave, Hair dryer, Essentials, Bed linens...",$232.00,3,365,4.76,"[3, 1, 2]","[0.40889564, 0.35782915, 0.15706165]","[Convenient location, Nice host, Highly recomm...",Microwave Hair dryer Essentials Bed linens Pac...
1645,Washington DC,456697350,https://www.airbnb.com/rooms/45669735,Sojourn at The Paden,A flawlessly renovated 1 BR in Capitol Hill. C...,https://a0.muscache.com/pictures/1d8666d0-ac98...,38.8815,-76.99547,Entire home/apt,,...,,"[Microwave, Hair dryer, Essentials, Bed linens...",,31,1125,5.0,"[0, 1, 2]","[0.5529721, 0.19121194, 0.155236]","[Nice, clean room, Nice host, Highly recommend...",Microwave Hair dryer Essentials Bed linens Pac...
1376,Washington DC,382162800,https://www.airbnb.com/rooms/38216280,Sojourn the 13th Street Flats,"Luminous, comfortable and modern describe this...",https://a0.muscache.com/pictures/prohost-api/H...,38.91497,-77.02888,Entire home/apt,,...,,"[Microwave, Hair dryer, Essentials, Bed linens...",,1,1125,4.77,"[3, 1, 0]","[0.4585229, 0.28954878, 0.1651748]","[Convenient location, Nice host, Nice, clean r...",Microwave Hair dryer Essentials Bed linens Pac...
1812,Washington DC,483835420,https://www.airbnb.com/rooms/48383542,Sojourn at The Paden | Capitol Hill | #B1,"Sojourn presents The Paden, turnkey luxury in ...",https://a0.muscache.com/pictures/1245a114-0bcc...,38.88067,-76.99645,Entire home/apt,2.0,...,3.0,"[Microwave, Hair dryer, Essentials, Bed linens...",$189.00,2,365,4.61,"[0, 3]","[0.9733049, 0.01241712]","[Nice, clean room, Convenient location]",Microwave Hair dryer Essentials Bed linens Pac...
1561,Washington DC,431867130,https://www.airbnb.com/rooms/43186713,Sojourn at The Paden | Capitol Hill | #102,"Sojourn presents to you, The Paden, a flawless...",https://a0.muscache.com/pictures/prohost-api/H...,38.88182,-76.99458,Entire home/apt,1.0,...,1.0,"[Microwave, Hair dryer, Essentials, Bed linens...",$171.00,2,365,4.75,"[1, 2, 0]","[0.32890236, 0.32033604, 0.22367185]","[Nice host, Highly recommended / Comfortable, ...",Microwave Hair dryer Essentials Bed linens Pac...
