# INDENG 243 Project : Airbnb Recommendation System - Module 2

In [1]:
import pandas as pd
import numpy as np

In [4]:
df = pd.read_csv('Airbnb_cleaned(1).csv')
df.head()

Unnamed: 0,url,name,stars,pricing/rate/amount,address,city,country,roomTypeCategory,bedroomLabel,bedLabel,...,Patio or balcony,Pool,Private entrance,Refrigerator,Shampoo,Smoke alarm,Stove,TV,Washer,Wifi
0,https://www.airbnb.com/rooms/10007690,Period Home in Dublin 9,5.0,495,"Dublin 9, County Dublin, Ireland",Dublin 9,Ireland,entire_home,3.0,5.0,...,0,0,1,1,1,1,1,1,1,1
1,https://www.airbnb.com/rooms/10035766,"Apartment in the Heart of Dublin ,Best Location",4.54,541,"Dublin, Ireland",Dublin,Ireland,entire_home,1.0,2.0,...,0,0,0,1,1,1,0,1,1,1
2,https://www.airbnb.com/rooms/10044794,"Entire Apt in Tung Chung, near Airport, Great ...",4.88,113,"Hong Kong, New Territories, Hong Kong",Hong Kong,Hong Kong,entire_home,1.0,1.0,...,1,0,0,1,0,0,1,1,1,1
3,https://www.airbnb.com/rooms/10052652,New luxury Puerta del Sol,4.94,338,"Madrid, Comunidad de Madrid, Spain",Madrid,Spain,entire_home,3.0,5.0,...,0,0,0,1,1,1,0,1,1,1
4,https://www.airbnb.com/rooms/10055942,Villa Colors Of South Beach.,4.92,1269,"Miami Beach, Florida, United States",Miami Beach,United States,entire_home,5.0,9.0,...,1,1,1,1,1,1,1,1,1,1


In [5]:
df['guestControls/allowsEvents'] = df['guestControls/allowsEvents'].astype('int')
df['guestControls/allowsPets'] = df['guestControls/allowsPets'].astype('int')

## Popularity Model: Recommend 10 airbnb listing

In [6]:
df['Total_amenities'] = df.iloc[:, -40:].sum(axis=1)
df['Price_per_guest'] = df['pricing/rate/amount'] / df['numberOfGuests']

mean_price = df['Price_per_guest'].mean()
std_price = df['Price_per_guest'].std()
df['Adjusted_price_per_guest'] = (df['Price_per_guest'] - mean_price) / std_price

df = df.drop(columns = df.columns[16:56])

In [7]:
weights = {
    'stars': 5,
    'Adjusted_price_per_guest': 1,
    'roomTypeCategory': {'entire_home': 3, 'hotel_room': 3, 'Private_room':3, 'shared_room': 1},
    'guestControls/allowsEvents': 1,
    'guestControls/allowsPets': 2,
    'Total_amenities': 5
}

df['weighted_score'] = (
    df['stars'] * weights['stars'] +
    df['Adjusted_price_per_guest'] * weights['Adjusted_price_per_guest'] +
    df['roomTypeCategory'].map(weights['roomTypeCategory']) +
    df['guestControls/allowsEvents'] * weights['guestControls/allowsEvents'] +
    df['guestControls/allowsPets'] * weights['guestControls/allowsPets'] +
    df['Total_amenities'] * weights['Total_amenities']
)

In [8]:
# # def recommend_listings_popularity(city, country, room_type, number_of_bedrooms, number_of_beds, number_of_bathrooms, number_of_guests, max_nights, min_nights, price_lower, price_upper, star):
#     # filtered_data = df[(df['city'] == city) & (df['country'] == country) & (df['roomTypeCategory'] == room_type) & (df['bedroomLabel'] >= number_of_bedrooms)
#     #                 & (df['bedLabel'] >= number_of_beds) & (df['bathroomLabel'] >= number_of_bathrooms) & (df['numberOfGuests'] >= number_of_guests)
#     #                 & (df['minNights'] <= min_nights) & (df['maxNights'] >= max_nights) & (df['pricing/rate/amount'] >= price_lower)
#     #                 & (df['pricing/rate/amount'] <= price_upper) & (df['stars'] >= star)]
#     # top_10_listings = filtered_data.nlargest(10, 'weighted_score')
# def recommend_listings_popularity(user_input):
#     top_10_listings = df.nlargest(10, 'weighted_score')
#     return top_10_listings

# user_input = {
#     'stars': 4.5,
#     'city': 'Los Angeles',
#     'country': 'United States',
#     'roomTypeCategory': 'entire_home',
#     'bedroomLabel': 2,
#     'bedLabel': 2,
#     'bathroomLabel': 1,
#     'minNights': 2,
#     'maxNights': 10,
#     'numberOfGuests': 4
# }

# # recommend_listings_popularity('Los Angeles', 'United States', 'entire_home', 2, 3.0, 2.0, 4, 4, 800, 1000, 4.5)
# recommended_pop = recommend_listings_popularity(user_input)
# recommended_pop

In [9]:
from sklearn.metrics.pairwise import cosine_similarity

class PopularityRecommender:

    MODEL_NAME = 'Popularity'

    def __init__(self, df):
        self.df = df

    def get_model_name(self):
        return self.MODEL_NAME

    def recommend_items(self, user_input, num_recommendations=10):
        # Recommend the more popular items that the user hasn't seen yet.

        recommendations_df = self.df.nlargest(num_recommendations, 'weighted_score')

        return recommendations_df

user_input = {
    'stars': 4.5,
    'lowest_price': 0,
    'highest_price': 10000,
    'city': 'Los Angeles',
    'country': 'United States',
    'roomTypeCategory': 'entire_home',
    'bedroomLabel': 2,
    'bedLabel': 2,
    'bathroomLabel': 1,
    'minNights': 2,
    'maxNights': 10,
    'numberOfGuests': 4
}

filtered_data = df[(df['city'] == user_input['city']) & (df['country'] == user_input['country'])
                   & (df['roomTypeCategory'] == user_input['roomTypeCategory'])
                   & (df['bedroomLabel'] >= user_input['bedroomLabel'])
                    & (df['bedLabel'] >= user_input['bedLabel'])
                   & (df['bathroomLabel'] >= user_input['bathroomLabel'])
                   & (df['numberOfGuests'] >= user_input['numberOfGuests'])
                   & (df['minNights'] <= user_input['minNights'])
                   & (df['maxNights'] >= user_input['maxNights'])
                   & (df['pricing/rate/amount'] >= user_input['lowest_price'])
                    & (df['pricing/rate/amount'] <= user_input['highest_price'])
                   & (df['stars'] >= user_input['stars'])]

popularity_model = PopularityRecommender(filtered_data)

recommended_pop = popularity_model.recommend_items(user_input, num_recommendations=10)

item_popularity = np.array(recommended_pop['weighted_score']).reshape(1, -1)
cosine_sim = cosine_similarity(item_popularity, item_popularity)

largest_similarity_pop = np.max(cosine_sim)
average_similarity_pop = np.mean(cosine_sim)
print(f"Average similarity to user preferences: {average_similarity_pop}")
print(f"Largest similarity to input listing: {largest_similarity_pop}")

Average similarity to user preferences: 0.9999999999999999
Largest similarity to input listing: 0.9999999999999999


In [10]:
recommended_pop

Unnamed: 0,url,name,stars,pricing/rate/amount,address,city,country,roomTypeCategory,bedroomLabel,bedLabel,bathroomLabel,minNights,maxNights,numberOfGuests,guestControls/allowsEvents,guestControls/allowsPets,Total_amenities,Price_per_guest,Adjusted_price_per_guest,weighted_score
4683,https://www.airbnb.com/rooms/49369698,Sunset Villa | Pool Spa Theatre Sauna Roof Deck,4.94,1652,"Los Angeles, California, United States",Los Angeles,United States,entire_home,6.0,6.0,8.5,2,365,14,0,1,38,118.0,-0.08804,219.61196
3260,https://www.airbnb.com/rooms/39556513,"Luxurious 6-BR Villa: Pool, and Stunning Views",4.91,2495,"Los Angeles, California, United States",Los Angeles,United States,entire_home,6.0,7.0,5.0,1,365,12,0,1,38,207.916667,-0.054766,219.495234
4679,https://www.airbnb.com/rooms/49343068,Hollywood Hills Villa | Pool/Theatre/Roof Deck,4.89,1673,"Los Angeles, California, United States",Los Angeles,United States,entire_home,7.0,7.0,4.5,2,365,16,0,1,38,104.5625,-0.093013,219.356987
4540,https://www.airbnb.com/rooms/48200094,Sleek & Chic Melrose Villa,4.78,1800,"Los Angeles, California, United States",Los Angeles,United States,entire_home,6.0,7.0,6.5,1,1125,12,0,1,38,150.0,-0.076198,218.823802
4965,https://www.airbnb.com/rooms/51149967,Stunning Entertainer's Palisades Home with Pool,4.91,1750,"Los Angeles, California, United States",Los Angeles,United States,entire_home,6.0,6.0,5.5,2,30,12,0,0,38,145.833333,-0.07774,217.47226
5482,https://www.airbnb.com/rooms/53560177,peaceful luxury retreat☁️pool+view+yard+studio...,5.0,1989,"Los Angeles, California, United States",Los Angeles,United States,entire_home,4.0,3.0,4.5,2,1125,8,1,1,37,248.625,-0.039701,215.960299
5036,https://www.airbnb.com/rooms/51640774,Marbella by Stay Awhile Villas,5.0,1569,"Los Angeles, California, United States",Los Angeles,United States,entire_home,4.0,4.0,3.5,2,365,8,0,0,37,196.125,-0.059129,212.940871
1526,https://www.airbnb.com/rooms/24398599,"THE ADALYN - Outdoor Living, Sparkling Pool+Spa",4.97,942,"Los Angeles, California, United States",Los Angeles,United States,entire_home,3.0,5.0,2.0,2,28,7,0,0,37,134.571429,-0.081908,212.768092
5560,https://www.airbnb.com/rooms/53832521,Modern Luxury Home | Indoor-Outdoor | Pool and...,4.95,1056,"Los Angeles, California, United States",Los Angeles,United States,entire_home,4.0,4.0,3.0,2,365,8,0,1,36,132.0,-0.08286,209.66714
4115,https://www.airbnb.com/rooms/45158683,ArtSpace Venice Beach Compound @ the best loca...,4.89,1571,"Los Angeles, California, United States",Los Angeles,United States,entire_home,4.0,5.0,4.0,1,1125,8,1,0,36,196.375,-0.059037,208.390963


In [11]:
from sklearn.model_selection import train_test_split

df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)

print('Train set: %d' % len(df_train))
print('Test set: %d' % len(df_test))

Train set: 4928
Test set: 1232


# Content-based filtering model

In [12]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split

# Concatenate features into a description-like column
df['features'] = df['name'] + ' ' + df['roomTypeCategory'] + ' ' + df['city']

# User input
user_keywords = 'luxury'
city = 'Los Angeles'
roomTypeCategory = 'entire_home'

# Split data into training and test sets,  with 80% of the data used for training and 20% for testing.
X_train, X_test = train_test_split(df, test_size=0.2, random_state=42)

# TF-IDF vectorization
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix_train = tfidf_vectorizer.fit_transform(X_train['features'])
tfidf_matrix_test = tfidf_vectorizer.transform(X_test['features'])

# Calculate similarity based on user input
user_input = f"{user_keywords} {roomTypeCategory} {city}"
user_input_vector = tfidf_vectorizer.transform([user_input])
similarities = cosine_similarity(user_input_vector, tfidf_matrix_test).flatten()

# Get top 10 recommendations
recommended_indices = similarities.argsort()[::-1][1:11]  # Exclude user input
recommended_listings = X_test.iloc[recommended_indices]
recommended_listings.sort_values(by='weighted_score', ascending=False)
recommended_listings

Unnamed: 0,url,name,stars,pricing/rate/amount,address,city,country,roomTypeCategory,bedroomLabel,bedLabel,...,minNights,maxNights,numberOfGuests,guestControls/allowsEvents,guestControls/allowsPets,Total_amenities,Price_per_guest,Adjusted_price_per_guest,weighted_score,features
5682,https://www.airbnb.com/rooms/54279607,OCEAN VIEW ESTATE,4.94,1699,"Los Angeles, California, United States",Los Angeles,United States,entire_home,7.0,13.0,...,2,365,15,0,0,36,113.266667,-0.089792,207.610208,OCEAN VIEW ESTATE entire_home Los Angeles
4635,https://www.airbnb.com/rooms/48938871,Hollywood Hills Glam Paradise w/Pool & Views,4.68,2300,"Los Angeles, California, United States",Los Angeles,United States,entire_home,5.0,5.0,...,3,60,8,0,0,14,287.5,-0.025315,96.374685,Hollywood Hills Glam Paradise w/Pool & Views e...
994,https://www.airbnb.com/rooms/20308137,Private Modern Venice Compound with Pool/Spa,4.96,1850,"Los Angeles, California, United States",Los Angeles,United States,entire_home,5.0,5.0,...,6,1125,10,0,0,27,185.0,-0.063246,162.736754,Private Modern Venice Compound with Pool/Spa e...
259,https://www.airbnb.com/rooms/13377441,Modern Moroccan Pool Paradise near WEHO/Bev Ce...,4.84,895,"Los Angeles, California, United States",Los Angeles,United States,entire_home,4.0,4.0,...,3,1125,8,0,0,33,111.875,-0.090307,192.109693,Modern Moroccan Pool Paradise near WEHO/Bev Ce...
3207,https://www.airbnb.com/rooms/39191606,Private Resort Style: Pavilion/Pool/Theater,4.96,1093,"Los Angeles, California, United States",Los Angeles,United States,entire_home,5.0,5.0,...,30,1125,10,0,0,34,109.3,-0.09126,197.70874,Private Resort Style: Pavilion/Pool/Theater en...
5036,https://www.airbnb.com/rooms/51640774,Marbella by Stay Awhile Villas,5.0,1569,"Los Angeles, California, United States",Los Angeles,United States,entire_home,4.0,4.0,...,2,365,8,0,0,37,196.125,-0.059129,212.940871,Marbella by Stay Awhile Villas entire_home Los...
5229,https://www.airbnb.com/rooms/52598669,"Stunning Villa, Skyline Views Hollywood Hills",4.87,900,"Los Angeles, California, United States",Los Angeles,United States,entire_home,3.0,3.0,...,3,180,6,0,0,37,150.0,-0.076198,212.273802,"Stunning Villa, Skyline Views Hollywood Hills ..."
1867,https://www.airbnb.com/rooms/28009239,Private Clean Creative Living Space,4.57,14,"Los Angeles, California, United States",Los Angeles,United States,private_room,1.0,1.0,...,30,365,1,0,0,28,14.0,-0.126526,,Private Clean Creative Living Space private_ro...
4480,https://www.airbnb.com/rooms/47787674,3 Story 4 bd Hilltop Mansion with View of the ...,5.0,999,"Los Angeles, California, United States",Los Angeles,United States,entire_home,4.0,4.0,...,5,120,8,0,0,35,124.875,-0.085496,202.914504,3 Story 4 bd Hilltop Mansion with View of the ...
3661,https://www.airbnb.com/rooms/42140439,Your Own Private RESORT!🏝9BR/10BA Gated Compound✰,4.98,1786,"Los Angeles, California, United States",Los Angeles,United States,entire_home,9.0,9.0,...,30,730,16,1,1,36,111.625,-0.090399,210.809601,Your Own Private RESORT!🏝9BR/10BA Gated Compou...


In [13]:
# Evaluation

# Get the 10 recommended
data=similarities
largest_indices = np.argpartition(data, -10)[-10:]
largest_elements = data[largest_indices]

# Calculate the average and max
average_simi = np.mean(largest_elements)
largest_element = np.max(data)

# Print the result
print("Largest similarity:", largest_element)
print("Average similarity:", average_simi)

Largest similarity: 0.6186119929841327
Average similarity: 0.4902072320026217


# Hybrid Recommender(Popularity Model + Content-Based Model)

In [14]:
def recommend_listings_hybrid(city, country, room_type, number_of_bedrooms, number_of_beds, number_of_bathrooms, number_of_guests, number_of_nights, price_lower, price_upper, star, user_keywords):
    # Filter data based on user preferences
    filtered_data = df[(df['city'] == city) & (df['country'] == country) & (df['roomTypeCategory'] == room_type) & (df['bedroomLabel'] >= number_of_bedrooms)
                        & (df['bedLabel'] >= number_of_beds) & (df['bathroomLabel'] >= number_of_bathrooms) & (df['numberOfGuests'] >= number_of_guests)
                        & (df['minNights'] <= number_of_nights) & (df['maxNights'] >= number_of_nights) & (df['pricing/rate/amount'] >= price_lower)
                        & (df['pricing/rate/amount'] <= price_upper) & (df['stars'] >= star)]

    # Calculate weighted score from model 1
    filtered_data['weighted_score'] = (
        filtered_data['stars'] * weights['stars'] +
        filtered_data['Adjusted_price_per_guest'] * weights['Adjusted_price_per_guest'] +
        filtered_data['roomTypeCategory'].map(weights['roomTypeCategory']) +
        filtered_data['guestControls/allowsEvents'] * weights['guestControls/allowsEvents'] +
        filtered_data['guestControls/allowsPets'] * weights['guestControls/allowsPets'] +
        filtered_data['Total_amenities'] * weights['Total_amenities']
    )

    # TF-IDF vectorization
    tfidf_vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf_vectorizer.fit_transform(filtered_data['features'])

    # Calculate similarity based on user keywords
    user_input = f"{user_keywords} {room_type} {city}"
    user_input_vector = tfidf_vectorizer.transform([user_input])
    similarities = cosine_similarity(user_input_vector, tfidf_matrix).flatten()

    # Get top 10 recommendations based on weighted score and similarity
    filtered_data['final_score'] = filtered_data['weighted_score'] + similarities
    top_10_listings = filtered_data.nlargest(10, 'final_score')

    return top_10_listings

# Example usage
recommend_listings_hybrid('Los Angeles', 'United States', 'entire_home', 2, 3.0, 2.0, 4, 4, 0, 10000, 4.5, 'Lux')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data['weighted_score'] = (
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data['final_score'] = filtered_data['weighted_score'] + similarities


Unnamed: 0,url,name,stars,pricing/rate/amount,address,city,country,roomTypeCategory,bedroomLabel,bedLabel,...,maxNights,numberOfGuests,guestControls/allowsEvents,guestControls/allowsPets,Total_amenities,Price_per_guest,Adjusted_price_per_guest,weighted_score,features,final_score
4125,https://www.airbnb.com/rooms/4525124,OLYMPIA LUXE ESTATE,5.0,2400,"Los Angeles, California, United States",Los Angeles,United States,entire_home,4.0,4.0,...,365,10,1,1,38,240.0,-0.042893,220.957107,OLYMPIA LUXE ESTATE entire_home Los Angeles,221.230028
4683,https://www.airbnb.com/rooms/49369698,Sunset Villa | Pool Spa Theatre Sauna Roof Deck,4.94,1652,"Los Angeles, California, United States",Los Angeles,United States,entire_home,6.0,6.0,...,365,14,0,1,38,118.0,-0.08804,219.61196,Sunset Villa | Pool Spa Theatre Sauna Roof Dec...,219.806679
3785,https://www.airbnb.com/rooms/42973853,VILLA FENDI by LUXJB | Beverly Hills Villa | Gym,4.93,1950,"Los Angeles, California, United States",Los Angeles,United States,entire_home,5.0,6.0,...,1125,10,0,1,38,195.0,-0.059546,219.590454,VILLA FENDI by LUXJB | Beverly Hills Villa | G...,219.794229
3260,https://www.airbnb.com/rooms/39556513,"Luxurious 6-BR Villa: Pool, and Stunning Views",4.91,2495,"Los Angeles, California, United States",Los Angeles,United States,entire_home,6.0,7.0,...,365,12,0,1,38,207.916667,-0.054766,219.495234,"Luxurious 6-BR Villa: Pool, and Stunning Views...",219.722847
4679,https://www.airbnb.com/rooms/49343068,Hollywood Hills Villa | Pool/Theatre/Roof Deck,4.89,1673,"Los Angeles, California, United States",Los Angeles,United States,entire_home,7.0,7.0,...,365,16,0,1,38,104.5625,-0.093013,219.356987,Hollywood Hills Villa | Pool/Theatre/Roof Deck...,219.585919
4540,https://www.airbnb.com/rooms/48200094,Sleek & Chic Melrose Villa,4.78,1800,"Los Angeles, California, United States",Los Angeles,United States,entire_home,6.0,7.0,...,1125,12,0,1,38,150.0,-0.076198,218.823802,Sleek & Chic Melrose Villa entire_home Los Ang...,219.069924
2903,https://www.airbnb.com/rooms/36693929,VILLA BOND by LUXJB | Luxury Beverly Hills Villa,5.0,1950,"Los Angeles, California, United States",Los Angeles,United States,entire_home,5.0,5.0,...,1125,10,0,0,38,195.0,-0.059546,217.940454,VILLA BOND by LUXJB | Luxury Beverly Hills Vil...,218.148743
4852,https://www.airbnb.com/rooms/50454240,Hollywood Hills Luxury Resort Style Villa,4.98,1682,"Los Angeles, California, United States",Los Angeles,United States,entire_home,5.0,6.0,...,30,10,0,0,38,168.2,-0.069463,217.830537,Hollywood Hills Luxury Resort Style Villa enti...,218.065703
4965,https://www.airbnb.com/rooms/51149967,Stunning Entertainer's Palisades Home with Pool,4.91,1750,"Los Angeles, California, United States",Los Angeles,United States,entire_home,6.0,6.0,...,30,12,0,0,38,145.833333,-0.07774,217.47226,Stunning Entertainer's Palisades Home with Poo...,217.712329
5482,https://www.airbnb.com/rooms/53560177,peaceful luxury retreat☁️pool+view+yard+studio...,5.0,1989,"Los Angeles, California, United States",Los Angeles,United States,entire_home,4.0,3.0,...,1125,8,1,1,37,248.625,-0.039701,215.960299,peaceful luxury retreat☁️pool+view+yard+studio...,216.14652


In [15]:
def recommend_listings_hybrid(city, country, room_type, number_of_bedrooms, number_of_beds, number_of_bathrooms, number_of_guests, number_of_nights, price_lower, price_upper, star, user_keywords):
    # Filter data based on user preferences
    filtered_data = df[(df['city'] == city) & (df['country'] == country) & (df['roomTypeCategory'] == room_type) & (df['bedroomLabel'] >= number_of_bedrooms)
                        & (df['bedLabel'] >= number_of_beds) & (df['bathroomLabel'] >= number_of_bathrooms) & (df['numberOfGuests'] >= number_of_guests)
                        & (df['minNights'] <= number_of_nights) & (df['maxNights'] >= number_of_nights) & (df['pricing/rate/amount'] >= price_lower)
                        & (df['pricing/rate/amount'] <= price_upper) & (df['stars'] >= star)]

    # Calculate weighted score from model 1
    filtered_data['weighted_score'] = (
        filtered_data['stars'] * weights['stars'] +
        filtered_data['Adjusted_price_per_guest'] * weights['Adjusted_price_per_guest'] +
        filtered_data['roomTypeCategory'].map(weights['roomTypeCategory']) +
        filtered_data['guestControls/allowsEvents'] * weights['guestControls/allowsEvents'] +
        filtered_data['guestControls/allowsPets'] * weights['guestControls/allowsPets'] +
        filtered_data['Total_amenities'] * weights['Total_amenities']
    )

    # TF-IDF vectorization
    tfidf_vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf_vectorizer.fit_transform(filtered_data['features'])

    # Calculate similarity based on user keywords
    user_input = f"{user_keywords} {room_type} {city}"
    user_input_vector = tfidf_vectorizer.transform([user_input])
    similarities = cosine_similarity(user_input_vector, tfidf_matrix).flatten()

    # Store the similarities in a new column
    filtered_data['cosine_similarity'] = similarities

    # Get top 10 recommendations based on weighted score and similarity
    filtered_data['final_score'] = filtered_data['weighted_score'] + filtered_data['cosine_similarity']
    top_10_listings = filtered_data.nlargest(10, 'final_score')

    # Calculate and return max and average of cosine similarities for top 10 listings
    max_similarity = top_10_listings['cosine_similarity'].max()
    avg_similarity = top_10_listings['cosine_similarity'].mean()

    return top_10_listings[['name', 'final_score', 'cosine_similarity']],max_similarity,avg_similarity

# Example usage
result = recommend_listings_hybrid('Los Angeles', 'United States', 'entire_home', 2, 3.0, 2.0, 4, 4, 0, 10000, 4.5, 'Lux')
print(result)


(                                                   name  final_score  \
4125                                OLYMPIA LUXE ESTATE   221.230028   
4683    Sunset Villa | Pool Spa Theatre Sauna Roof Deck   219.806679   
3785   VILLA FENDI by LUXJB | Beverly Hills Villa | Gym   219.794229   
3260     Luxurious 6-BR Villa: Pool, and Stunning Views   219.722847   
4679     Hollywood Hills Villa | Pool/Theatre/Roof Deck   219.585919   
4540                         Sleek & Chic Melrose Villa   219.069924   
2903   VILLA BOND by LUXJB | Luxury Beverly Hills Villa   218.148743   
4852          Hollywood Hills Luxury Resort Style Villa   218.065703   
4965    Stunning Entertainer's Palisades Home with Pool   217.712329   
5482  peaceful luxury retreat☁️pool+view+yard+studio...   216.146520   

      cosine_similarity  
4125           0.272921  
4683           0.194719  
3785           0.203775  
3260           0.227613  
4679           0.228932  
4540           0.246122  
2903           0.208289 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data['weighted_score'] = (
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data['cosine_similarity'] = similarities
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data['final_score'] = filtered_data['weighted_score'] + filtered_data['cosine_similarity']


## Collaborative Filtering Model

1. Users input url (previous stay)

In [16]:
from sklearn.neighbors import NearestNeighbors

class cf_url_AirbnbRecommendationSystem:
    def __init__(self, n_neighbors=10, metric='cosine'):
        """
        Initializes the model with the number of neighbors and similarity metric.
        """
        self.model = NearestNeighbors(n_neighbors=n_neighbors, metric=metric)
        self.listings_features = None
        self.listings_ids = None

    def preprocess_data(self, df):
        """
        Preprocesses the DataFrame by handling missing values, converting categorical variables to numerical,
        and normalizing numerical variables.
        """
        # Handle missing values
        # For numerical columns, replace NaN with the mean of the column
        numerical_cols = ['stars', 'pricing/rate/amount', 'bedroomLabel', 'bedLabel', 'bathroomLabel',
                          'minNights', 'maxNights', 'numberOfGuests', 'Total_amenities',
                          'Price_per_guest', 'Adjusted_price_per_guest', 'weighted_score']
        for col in numerical_cols:
            df[col].fillna(df[col].mean(), inplace=True)

        # For categorical columns, replace NaN with a placeholder string, e.g., 'Unknown'
        categorical_cols = ['city', 'country', 'roomTypeCategory']
        df[categorical_cols] = df[categorical_cols].fillna('Unknown')

        # Convert categorical variables using one-hot encoding
        df = pd.get_dummies(df, columns=categorical_cols)

        # Normalize numerical variables
        df[numerical_cols] = (df[numerical_cols] - df[numerical_cols].mean()) / df[numerical_cols].std()

        return df

    def fit(self, df):
        """
        Fits the model to the Airbnb listings data.
        df: A pandas DataFrame with Airbnb listings data.
        """
        self.listings_ids = df['url'].values
        df_preprocessed = self.preprocess_data(df.drop(['url', 'name', 'address'], axis=1))
        self.listings_features = df_preprocessed.values
        self.model.fit(self.listings_features)

    def get_recommendations(self, listing_url, num_recommendations=5):
        """
        Given a listing URL, finds similar listings.
        """
        if self.listings_features is None or self.listings_ids is None:
            raise ValueError("Model has not been fitted.")

        # Find the index of the listing in the listings_ids array
        listing_idx = df[df['url'] == listing_url].index[0]

        listing_features_2d = self.listings_features[listing_idx].reshape(1, -1)
        # Find nearest neighbors for the listing
        distances, indices = self.model.kneighbors([self.listings_features[listing_idx]], n_neighbors=num_recommendations + 1)

        # Get URLs of recommended listings, excluding the input listing itself
        recommended_urls = self.listings_ids[indices.flatten()][1:]

        cl_list = df['url'].isin(recommended_urls)
        cl_lists = df[cl_list]

        return cl_lists

# Example usage:
modelcf = cf_url_AirbnbRecommendationSystem(n_neighbors=10)
modelcf.fit(df_train)

listing_url = "https://www.airbnb.com/rooms/44931"  # Example listing URL
recommended_urls = modelcf.get_recommendations(listing_url, num_recommendations=10)
print(f"Recommended listings for {listing_url}: {recommended_urls}")

Recommended listings for https://www.airbnb.com/rooms/44931:                                                   url  \
212             https://www.airbnb.com/rooms/12780684   
685             https://www.airbnb.com/rooms/17429603   
1205            https://www.airbnb.com/rooms/21913884   
1405            https://www.airbnb.com/rooms/23428546   
1653            https://www.airbnb.com/rooms/25698586   
1687            https://www.airbnb.com/rooms/26115064   
1884            https://www.airbnb.com/rooms/28105027   
2178              https://www.airbnb.com/rooms/307497   
3292            https://www.airbnb.com/rooms/39703469   
5921  https://www.airbnb.com/rooms/751710639285450000   

                                                   name  stars  \
212    Can Verdaguer, ideal para compartir experiencias   4.80   
685              종로구 부암동 G.HOUSE(단체,독채) 워크샵,세미나,가족모임,행사   4.91   
1205                   [NEW]Whole House/up to 48 people   4.88   
1405        Sleep Up To 54 at The Great Escape 

In [17]:
from sklearn.metrics.pairwise import cosine_similarity

def evaluate_similarity_to_input(model, input_listing_url, recommended_listings):
    # Find the index of the input listing
    input_idx = np.where(model.listings_ids == input_listing_url)[0][0]

    # Extract the feature vector for the input listing
    input_features = model.listings_features[input_idx].reshape(1, -1)

    # Extract the feature vectors for the recommended listings
    recommended_indices = [np.where(model.listings_ids == url)[0][0] for url in recommended_listings['url']]
    recommended_features = model.listings_features[recommended_indices]

    # Calculate the largest and average cosine similarity between the input and each recommended listing
    similarities = cosine_similarity(input_features, recommended_features)
    largest_similarity = np.max(similarities)
    average_similarity = np.mean(similarities)

    return largest_similarity, average_similarity

# Use the function
largest_similarity, average_similarity = evaluate_similarity_to_input(modelcf, listing_url, recommended_urls)

print(f"Average similarity to input listing: {average_similarity}")
print(f"Largest similarity to input listing: {largest_similarity}")

Average similarity to input listing: 0.5282878535200413
Largest similarity to input listing: 0.6734404113689729


2. Users input detailed feature (user preference)

In [18]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors

class AirbnbRecommendationSystem_url:
    def __init__(self, n_neighbors=10, metric='cosine'):
        self.model = NearestNeighbors(n_neighbors=n_neighbors, metric=metric)
        self.pipeline = None
        self.listings_ids = None

    def create_preprocessing_pipeline(self, df):
        numerical_cols = ['stars', 'bedroomLabel', 'bedLabel', 'bathroomLabel',
                          'minNights', 'maxNights', 'numberOfGuests']
        categorical_cols = ['city', 'country', 'roomTypeCategory']

        # Define preprocessing for numerical columns
        numeric_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='mean')),
            ('scaler', StandardScaler())])

        # Define preprocessing for categorical columns
        categorical_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='constant', fill_value='Unknown')),
            ('onehot', OneHotEncoder(handle_unknown='ignore'))])

        # Bundle preprocessing for numerical and categorical data
        preprocessor = ColumnTransformer(
            transformers=[
                ('num', numeric_transformer, numerical_cols),
                ('cat', categorical_transformer, categorical_cols)])

        self.pipeline = Pipeline(steps=[('preprocessor', preprocessor)])

        # Fit the pipeline to the data
        self.pipeline.fit(df)

    def fit(self, df):
        self.df = df.copy()
        self.listings_ids = df['url'].values
        self.create_preprocessing_pipeline(df.drop(['url', 'name', 'address'], axis=1))
        df_processed = self.pipeline.transform(df.drop(['url', 'name', 'address'], axis=1))
        self.model.fit(df_processed)

    def get_recommendations_from_features(self, user_input, num_recommendations=10):

        if self.pipeline is None:
            raise ValueError("Model has not been fitted.")

        # Extract price range from user input and remove it before processing
        lowest_price = user_input.pop('lowest_price', None)
        highest_price = user_input.pop('highest_price', None)

        user_input_df = pd.DataFrame([user_input])
        user_input_processed = self.pipeline.transform(user_input_df)
        distances, indices = self.model.kneighbors(user_input_processed, n_neighbors=num_recommendations)

        recommended_ids = self.listings_ids[indices.flatten()]

        cl_list = df['url'].isin(recommended_ids)
        cl_lists = df[cl_list]

        return cl_lists


# Example usage

# Define user_input without 'Total_amenities', 'weighted_score', 'Price_per_guest', 'Adjusted_price_per_guest'
user_input = {
    'stars': 4.5,
    'lowest_price': 0,  # New input for the lowest acceptable price
    'highest_price': 10000,
    'city': 'Los Angeles',
    'country': 'United States',
    'roomTypeCategory': 'entire_home',
    'bedroomLabel': 2,
    'bedLabel': 2,
    'bathroomLabel': 1,
    'minNights': 2,
    'maxNights': 10,
    'numberOfGuests': 4
}

lowest_price = user_input.pop('lowest_price', None)
highest_price = user_input.pop('highest_price', None)
#city = user_input.pop('city', None)

model = AirbnbRecommendationSystem_url(n_neighbors=10)
modiified_df_train = df_train[(df_train['pricing/rate/amount'] >= lowest_price) &
                              (df_train['pricing/rate/amount'] <= highest_price) &
                               (df_train['city'] == user_input['city'])]
model.fit(modiified_df_train)  # Assume df_train is your DataFrame containing the Airbnb listings

recommended_listings = model.get_recommendations_from_features(user_input, num_recommendations=10)
print("Recommended listings based on user input:")
print(recommended_listings)

Recommended listings based on user input:
                                        url  \
51    https://www.airbnb.com/rooms/10569659   
649   https://www.airbnb.com/rooms/17122853   
1371  https://www.airbnb.com/rooms/23126991   
1526  https://www.airbnb.com/rooms/24398599   
3141  https://www.airbnb.com/rooms/38755677   
4114  https://www.airbnb.com/rooms/45156803   
4455  https://www.airbnb.com/rooms/47564997   
4766  https://www.airbnb.com/rooms/49949604   
4983  https://www.airbnb.com/rooms/51282870   
5560  https://www.airbnb.com/rooms/53832521   

                                                   name  stars  \
51             Private Luxury Villa with waterfall pool   4.74   
649                        Exquisite Midcentury Retreat   4.94   
1371  The Ultimate Creative Oasis in the Heart of LA 33   4.51   
1526    THE ADALYN - Outdoor Living, Sparkling Pool+Spa   4.97   
3141   Designer apt at luxury resort in the heart of LA   4.95   
4114                 The Angeleno by Stay Aw

In [19]:
from sklearn.metrics.pairwise import cosine_similarity

def evaluate_recommendations(model, user_input, recommended_listings):
    if model.pipeline is None:
        raise ValueError("Model has not been fitted.")

    # Process the user input through the pipeline
    user_input_df = pd.DataFrame([user_input])
    user_input_processed = model.pipeline.transform(user_input_df)

    # Extract the feature vectors for the recommended listings
    recommended_indices = [np.where(model.listings_ids == url)[0][0] for url in recommended_listings['url']]
    recommended_features = model.pipeline.transform(model.df.iloc[recommended_indices].drop(['url', 'name', 'address'], axis=1))

    # Calculate the average cosine similarity between the processed user input and each recommended listing
    similarities = cosine_similarity(user_input_processed, recommended_features)
    largest_similarity = np.max(similarities)
    average_similarity = np.mean(similarities)

    return largest_similarity, average_similarity

# Prepare user input
user_input = {
    'stars': 4.5,
    'city': 'Los Angeles',
    'country': 'United States',
    'roomTypeCategory': 'entire_home',
    'bedroomLabel': 2,
    'bedLabel': 2,
    'bathroomLabel': 1,
    'minNights': 2,
    'maxNights': 10,
    'numberOfGuests': 4
}

# Assuming 'recommended_listings' is the DataFrame containing your recommended listings
largest_similarity, average_similarity = evaluate_recommendations(model, user_input, recommended_listings)
print(f"Average similarity to user preferences: {average_similarity}")
print(f"Largest similarity to input listing: {largest_similarity}")

Average similarity to user preferences: 0.7488302569625273
Largest similarity to input listing: 0.8451315765691716


## Hybrid CB+CL


In [20]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.neighbors import NearestNeighbors
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pandas as pd

In [21]:
# Content-Based (C-B) Model
def content_based_model(user_keywords, city, roomTypeCategory, X_train, X_test):
    # Concatenate features into a description-like column
    X_train['features'] = X_train['name'] + ' ' + X_train['roomTypeCategory'] + ' ' + X_train['city']
    X_test['features'] = X_test['name'] + ' ' + X_test['roomTypeCategory'] + ' ' + X_test['city']

    # TF-IDF vectorization
    tfidf_vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix_train = tfidf_vectorizer.fit_transform(X_train['features'])
    tfidf_matrix_test = tfidf_vectorizer.transform(X_test['features'])

    # Calculate similarity based on user input
    user_input = f"{user_keywords} {roomTypeCategory} {city}"
    user_input_vector = tfidf_vectorizer.transform([user_input])
    similarities = cosine_similarity(user_input_vector, tfidf_matrix_test).flatten()

    # Get top 10 recommendations
    recommended_indices = similarities.argsort()[::-1][1:11]  # Exclude user input
    recommended_listings = X_test.iloc[recommended_indices]

    return recommended_listings


In [22]:
# Collaborative Filtering (CF) Model
class AirbnbRecommendationSystem:
    def __init__(self, n_neighbors=10, metric='cosine'):
        self.model = NearestNeighbors(n_neighbors=n_neighbors, metric=metric)
        self.pipeline = None
        self.listings_ids = None

    def create_preprocessing_pipeline(self, df):
        numerical_cols = ['stars', 'bedroomLabel', 'bedLabel', 'bathroomLabel',
                          'minNights', 'maxNights', 'numberOfGuests']
        categorical_cols = ['city', 'country', 'roomTypeCategory']

        # Define preprocessing for numerical columns
        numeric_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='mean')),
            ('scaler', StandardScaler())])

        # Define preprocessing for categorical columns
        categorical_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='constant', fill_value='Unknown')),
            ('onehot', OneHotEncoder(handle_unknown='ignore'))])

        # Bundle preprocessing for numerical and categorical data
        preprocessor = ColumnTransformer(
            transformers=[
                ('num', numeric_transformer, numerical_cols),
                ('cat', categorical_transformer, categorical_cols)])

        self.pipeline = Pipeline(steps=[('preprocessor', preprocessor)])

        # Fit the pipeline to the data
        self.pipeline.fit(df)

    def fit(self, df):
        self.df = df.copy()
        self.listings_ids = df['url'].values
        self.create_preprocessing_pipeline(df.drop(['url', 'name', 'address'], axis=1))
        df_processed = self.pipeline.transform(df.drop(['url', 'name', 'address'], axis=1))
        self.model.fit(df_processed)

    def get_recommendations_from_features(self, user_input, num_recommendations=10):

        if self.pipeline is None:
            raise ValueError("Model has not been fitted.")

        user_input_df = pd.DataFrame([user_input])
        user_input_processed = self.pipeline.transform(user_input_df)
        distances, indices = self.model.kneighbors(user_input_processed, n_neighbors=num_recommendations)

        recommended_ids = self.listings_ids[indices.flatten()]

        cl_list = df['url'].isin(recommended_ids)
        cl_lists = df[cl_list]

        return cl_lists


In [23]:
# Hybrid Model
def hybrid_model(user_keywords, city, roomTypeCategory, user_input, X_train, df):
    # Content-Based Model
    cb_model_results = content_based_model(user_keywords, city, roomTypeCategory, X_train, df)

    # Collaborative Filtering Model
    cf_model = AirbnbRecommendationSystem(n_neighbors=5)
    cf_model.fit(df)

    # Get recommendations from CF model
    cf_recommendations = cf_model.get_recommendations_from_features(user_input, num_recommendations=10)

    # Merge results from both models
    hybrid_recommendations = pd.concat([cb_model_results, cf_recommendations]).drop_duplicates().head(10)

    return hybrid_recommendations

# Example usage
user_keywords = 'luxury'
city = 'Los Angeles'
roomTypeCategory = 'entire_home'
user_input = {
    'stars': 4.5,
    'lowest_price': 0,
    'highest_price': 10000,
    'city': 'Los Angeles',
    'country': 'United States',
    'roomTypeCategory': 'entire_home',
    'bedroomLabel': 2,
    'bedLabel': 2,
    'bathroomLabel': 1,
    'minNights': 2,
    'maxNights': 10,
    'numberOfGuests': 4
}

# Assuming df_train is your DataFrame containing the Airbnb listings for training
X_train, X_test = train_test_split(df_train, test_size=0.2, random_state=42)

recommended_listings = hybrid_model(user_keywords, city, roomTypeCategory, user_input, X_train, df_train)
recommended_listings

Unnamed: 0,url,name,stars,pricing/rate/amount,address,city,country,roomTypeCategory,bedroomLabel,bedLabel,...,minNights,maxNights,numberOfGuests,guestControls/allowsEvents,guestControls/allowsPets,Total_amenities,Price_per_guest,Adjusted_price_per_guest,weighted_score,features
4965,https://www.airbnb.com/rooms/51149967,Stunning Entertainer's Palisades Home with Pool,4.91,1750,"Los Angeles, California, United States",Los Angeles,United States,entire_home,6.0,6.0,...,2,30,12,0,0,38,145.833333,-0.07774,217.47226,Stunning Entertainer's Palisades Home with Poo...
51,https://www.airbnb.com/rooms/10569659,Private Luxury Villa with waterfall pool,4.74,1749,"Los Angeles, California, United States",Los Angeles,United States,entire_home,3.0,3.0,...,30,90,8,0,1,36,218.625,-0.050803,208.649197,Private Luxury Villa with waterfall pool entir...
5486,https://www.airbnb.com/rooms/53576948,Luxury Grand Residence With Pool & Jacuzzi,5.0,8427,"Los Angeles, California, United States",Los Angeles,United States,entire_home,4.0,5.0,...,30,1125,10,1,1,37,842.7,0.180141,216.180141,Luxury Grand Residence With Pool & Jacuzzi ent...
5017,https://www.airbnb.com/rooms/51483165,Luxury Modern Hollywood Hills Villa,5.0,2217,"Los Angeles, California, United States",Los Angeles,United States,entire_home,5.0,6.0,...,3,1125,10,0,0,17,221.7,-0.049665,112.950335,Luxury Modern Hollywood Hills Villa entire_hom...
5188,https://www.airbnb.com/rooms/52389016,Villa Lareira,4.8,2200,"Los Angeles, California, United States",Los Angeles,United States,entire_home,5.0,5.0,...,30,365,10,0,1,37,220.0,-0.050294,213.949706,Villa Lareira entire_home Los Angeles
3141,https://www.airbnb.com/rooms/38755677,Designer apt at luxury resort in the heart of LA,4.95,5000,"Los Angeles, California, United States",Los Angeles,United States,entire_home,1.0,1.0,...,1,28,2,0,0,26,2500.0,0.79344,158.54344,Designer apt at luxury resort in the heart of ...
4852,https://www.airbnb.com/rooms/50454240,Hollywood Hills Luxury Resort Style Villa,4.98,1682,"Los Angeles, California, United States",Los Angeles,United States,entire_home,5.0,6.0,...,3,30,10,0,0,38,168.2,-0.069463,217.830537,Hollywood Hills Luxury Resort Style Villa enti...
5024,https://www.airbnb.com/rooms/51545675,Bellagio Estate,4.67,1800,"Los Angeles, California, United States",Los Angeles,United States,entire_home,8.0,9.0,...,3,1125,16,0,1,33,112.5,-0.090076,193.259924,Bellagio Estate entire_home Los Angeles
4315,https://www.airbnb.com/rooms/46569587,Luxury 3-Story | Pool & Spa | Theater | City V...,5.0,1779,"Los Angeles, California, United States",Los Angeles,United States,entire_home,4.0,4.0,...,30,365,8,0,0,35,222.375,-0.049415,202.950585,Luxury 3-Story | Pool & Spa | Theater | City V...
5560,https://www.airbnb.com/rooms/53832521,Modern Luxury Home | Indoor-Outdoor | Pool and...,4.95,1056,"Los Angeles, California, United States",Los Angeles,United States,entire_home,4.0,4.0,...,2,365,8,0,1,36,132.0,-0.08286,209.66714,Modern Luxury Home | Indoor-Outdoor | Pool and...


In [24]:
recommended_listings['url'].reset_index(drop=True)


0    https://www.airbnb.com/rooms/51149967
1    https://www.airbnb.com/rooms/10569659
2    https://www.airbnb.com/rooms/53576948
3    https://www.airbnb.com/rooms/51483165
4    https://www.airbnb.com/rooms/52389016
5    https://www.airbnb.com/rooms/38755677
6    https://www.airbnb.com/rooms/50454240
7    https://www.airbnb.com/rooms/51545675
8    https://www.airbnb.com/rooms/46569587
9    https://www.airbnb.com/rooms/53832521
Name: url, dtype: object

In [25]:
pip install dash

Collecting dash
  Downloading dash-2.16.1-py3-none-any.whl (10.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m25.4 MB/s[0m eta [36m0:00:00[0m
Collecting dash-html-components==2.0.0 (from dash)
  Downloading dash_html_components-2.0.0-py3-none-any.whl (4.1 kB)
Collecting dash-core-components==2.0.0 (from dash)
  Downloading dash_core_components-2.0.0-py3-none-any.whl (3.8 kB)
Collecting dash-table==5.0.0 (from dash)
  Downloading dash_table-5.0.0-py3-none-any.whl (3.9 kB)
Collecting retrying (from dash)
  Downloading retrying-1.3.4-py3-none-any.whl (11 kB)
Installing collected packages: dash-table, dash-html-components, dash-core-components, retrying, dash
Successfully installed dash-2.16.1 dash-core-components-2.0.0 dash-html-components-2.0.0 dash-table-5.0.0 retrying-1.3.4


In [26]:
pip install dash_ag_grid

Collecting dash_ag_grid
  Downloading dash_ag_grid-31.0.1-py3-none-any.whl (4.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.9/4.9 MB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: dash_ag_grid
Successfully installed dash_ag_grid-31.0.1


In [27]:
pip install dash_bootstrap_components

Collecting dash_bootstrap_components
  Downloading dash_bootstrap_components-1.6.0-py3-none-any.whl (222 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m222.5/222.5 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: dash_bootstrap_components
Successfully installed dash_bootstrap_components-1.6.0


In [28]:
from dash import Dash, Input, Output, callback, dcc, html
import copy
import dash_bootstrap_components as dbc
import plotly.express as px
import pandas as pd
import dash_ag_grid as dag
from dash import Dash, html, dcc, Input, Output

# df = copy.deepcopy(df_train)

app = Dash(__name__,external_stylesheets=[dbc.themes.BOOTSTRAP])

tab_1_layout = dbc.Row(
            [
                dbc.Col(
                    [
                        html.Div(
                            [
                                html.Br(),
                                html.H6("Stars"),
                                dcc.Slider(2, 5, 0.1, value=4.5, marks=None, id="stars1",
                                    tooltip={"placement": "bottom", "always_visible": True,"style": {"fontSize": "10px"}}),

                                html.H6("Price Range"),
                                dcc.RangeSlider(
                                    id="pricing/rate/amount1",
                                    min=df['pricing/rate/amount'].min(),
                                    max=df['pricing/rate/amount'].max(),
                                    step=1,
                                    marks={0: 0, 101778: "100k"},
                                    value=[7, 101778],
                                    tooltip={"placement": "bottom", "always_visible": True,"style": {"fontSize": "10px"}}
                                ),

                                html.H6("City"),
                                dcc.Dropdown(id="city1", value="Los Angeles",options=sorted(df["city"].unique())),

                                html.H6("Country"),
                                dcc.Dropdown(id="country1", value="United States",options=sorted(df["country"].unique()),),

                                html.H6("Room Type"),
                                dcc.Dropdown(id="roomTypeCategory1", value="entire_home",
                                             options=sorted(df["roomTypeCategory"].unique()),),
                                html.Br(),
                                dbc.Row(
                                    [
                                        dbc.Col(
                                            [
                                                html.H6("Number of Bedroom"),
                                                dcc.Input(id='bedroomLabel1', type='number', value=2, min=1, max=50, step=1),
                                            ]),
                                        dbc.Col(
                                            [
                                                html.H6("Number of Bed"),
                                                dcc.Input(id='bedLabel1', type='number', value=2, min=0, max=53, step=1),
                                            ]),
                                    ]),


                                dbc.Row(
                                    [
                                        dbc.Col(
                                            [
                                                html.H6("Number of Bathroom"),
                                                dcc.Input(id='bathroomLabel1', type='number', value=1, min=0, max=26.5, step=0.5),
                                            ]),
                                        dbc.Col(
                                            [
                                                html.H6("Number Of Guests"),
                                                dcc.Input(id='numberOfGuests1', type='number', value=4, min=1, max=16, step=1),
                                            ]),
                                    ]),


                                dbc.Row(
                                    [
                                        dbc.Col(
                                            [
                                                html.H6("Minimum Nights"),
                                                dcc.Input(id='minNights1', type='number', value=3, min=1, max=400, step=1),
                                            ]),
                                        dbc.Col(
                                            [
                                                html.H6("Maximum Nights"),
                                                dcc.Input(id='maxNights1', type='number', value=10, min=1, max=3000, step=1),
                                            ]),
                                    ]),


                            ])
                    ],
                    width=3,
            ),
            dbc.Col(
                    [
                        html.Br(),
                        dag.AgGrid(
                            id="url1",
                            columnDefs=[{"field": i} for i in df.columns],
                            columnSize="sizeToFit",
                            defaultColDef={"type": "rightAligned"},
                            dashGridOptions = {"domLayout": "autoHeight"},
                            style = {"height": None},
                        )],
            )]

        )

tab_2_layout = dbc.Row(
            [
                dbc.Col(
                    [
                        html.Div(
                            [
                                html.H6("User Keywords"),
                                dcc.Input(id="user_keywords", type="text", value="luxury", placeholder="Enter keywords"),

                                html.H6("Stars"),
                                dcc.Slider(2, 5, 0.1, value=4.5, marks=None, id="stars2",
                                    tooltip={"placement": "bottom", "always_visible": True,"style": {"fontSize": "10px"}}),

                                html.H6("Price Range"),
                                dcc.RangeSlider(
                                    id="pricing/rate/amount2",
                                    min=df['pricing/rate/amount'].min(),
                                    max=df['pricing/rate/amount'].max(),
                                    step=1,
                                    marks={0: 0, 101778: "100k"},
                                    value=[7, 101778],
                                    tooltip={"placement": "bottom", "always_visible": True,"style": {"fontSize": "10px"}}
                                ),

                                html.H6("City"),
                                dcc.Dropdown(id="city2", value="Los Angeles",options=sorted(df["city"].unique())),

                                html.H6("Country"),
                                dcc.Dropdown(id="country2", value="United States",options=sorted(df["country"].unique()),),

                                html.H6("Room Type"),
                                dcc.Dropdown(id="roomTypeCategory2", value="entire_home",
                                             options=sorted(df["roomTypeCategory"].unique()),),

                                html.Br(),
                                dbc.Row(
                                    [
                                        dbc.Col(
                                            [
                                                html.H6("Number of Bedroom"),
                                                dcc.Input(id='bedroomLabel2', type='number', value=2, min=1, max=50, step=1),
                                            ]),
                                        dbc.Col(
                                            [
                                                html.H6("Number of Bed"),
                                                dcc.Input(id='bedLabel2', type='number', value=2, min=0, max=53, step=1),
                                            ]),
                                    ]),


                                dbc.Row(
                                    [
                                        dbc.Col(
                                            [
                                                html.H6("Number of Bathroom"),
                                                dcc.Input(id='bathroomLabel2', type='number', value=1, min=0, max=26.5, step=0.5),
                                            ]),
                                        dbc.Col(
                                            [
                                                html.H6("Number Of Guests"),
                                                dcc.Input(id='numberOfGuests2', type='number', value=4, min=1, max=16, step=1),
                                            ]),
                                    ]),


                                dbc.Row(
                                    [
                                        dbc.Col(
                                            [
                                                html.H6("Minimum Nights"),
                                                dcc.Input(id='minNights2', type='number', value=3, min=1, max=400, step=1),
                                            ]),
                                        dbc.Col(
                                            [
                                                html.H6("Maximum Nights"),
                                                dcc.Input(id='maxNights2', type='number', value=10, min=1, max=3000, step=1),
                                            ]),
                                    ]),

                            ])
                    ],
                    width=3,
            ),
            dbc.Col(
                    [
                        html.Br(),
                        dag.AgGrid(
                            id="url2",
                            columnDefs=[{"field": i} for i in df.columns],
                            columnSize="sizeToFit",
                            defaultColDef={"type": "rightAligned"},
                            dashGridOptions = {"domLayout": "autoHeight"},
                            style = {"height": None},
                        )],
            )]

        )

tab_3_layout = html.Div(
                            [
                                html.Br(),
                                html.H6("User URL"),
                                dcc.Input(id="listing_url", type="text", placeholder="Enter listing URL",
                                         value="https://www.airbnb.com/rooms/44931",className="w-100"),

                                dag.AgGrid(
                                    id="url3",
                                    columnDefs=[{"field": i} for i in df.columns],
                                    columnSize="sizeToFit",
                                    defaultColDef={"type": "rightAligned"},
                                    dashGridOptions = {"domLayout": "autoHeight"},
                                    style={"height": None}
                                ),

                            ])


# columnDefs = [
#     {"headerName": "url", "field": "url"},
#     {"headerName": "Name", "field": "name"},
#     {"headerName": "Stars", "field": "stars"},
#     {"headerName": "Price", "field": "pricing/rate/amount"},
#     {"headerName": "Country", "field": "country"},
#     {"headerName": "City", "field": "city"},
#     {"headerName": "Room Type Category", "field": "roomTypeCategory"},
#     {"headerName": "Bedroom Label", "field": "bedroomLabel"},
#     {"headerName": "Bed Label", "field": "bedLabel"},
#     {"headerName": "Bathroom Label", "field": "bathroomLabel"},
#     {"headerName": "Min Nights", "field": "minNights"},
#     {"headerName": "Max Nights", "field": "maxNights"},
#     {"headerName": "Number Of Guests", "field": "numberOfGuests"}
# ]

app.layout = dbc.Container(
    [
        dcc.Markdown(
            "### Airbnb Recommendation System",
            style={"textAlign": "center"},
            className="my-4",
        ),
        dcc.Tabs(id="tabs", value="tab-1", children=[
            dcc.Tab(label='Popularity Model', value="tab-1", children=tab_1_layout),
            dcc.Tab(label='Hybrid Model', value="tab-2", children=tab_2_layout),
            dcc.Tab(label='CF Model', value="tab-3", children=tab_3_layout),
        ]),

    ])

@callback(
    Output("city", "options"),
    Input("country", "value")
)
def chained_callback_country(country):

    dff = copy.deepcopy(df)

    if country is not None:
        dff = dff.query("country == @country")

    return sorted(dff["city"].unique())


@callback(
    Output("country", "options"),
    Input("city", "value")
)
def chained_callback_city(city):

    dff = copy.deepcopy(df)

    if city is not None:
        dff = dff.query("city == @city")

    return sorted(dff["country"].unique())

@callback(
    Output("url1", "rowData"),
#     Output("url2", "rowData"),
    Input("stars1", "value"),
#     Input("user_keywords", "value"),
    Input("pricing/rate/amount1", "value"),
    Input("city1", "value"),
    Input("country1", "value"),
    Input("roomTypeCategory1", "value"),
    Input("bedroomLabel1", "value"),
    Input("bedLabel1", "value"),
    Input("bathroomLabel1", "value"),
    Input("minNights1", "value"),
    Input("maxNights1", "value"),
    Input("numberOfGuests1", "value")
)
def render_content_1(stars, range_slider, city, country, roomTypeCategory, bedroomLabel, bedLabel, bathroomLabel, minNights, maxNights, numberOfGuests):
    low, high = range_slider
    user_input = {
        'stars': stars,
        'lowest_price': low,
        'highest_price': high,
        'city': city,
        'country': country,
        'roomTypeCategory': roomTypeCategory,
        'bedroomLabel': bedroomLabel,
        'bedLabel': bedLabel,
        'bathroomLabel': bathroomLabel,
        'minNights': minNights,
        'maxNights': maxNights,
        'numberOfGuests': numberOfGuests
    }

    filtered_data = df[(df['city'] == user_input['city']) & (df['country'] == user_input['country'])
                   & (df['roomTypeCategory'] == user_input['roomTypeCategory'])
                   & (df['bedroomLabel'] >= user_input['bedroomLabel'])
                    & (df['bedLabel'] >= user_input['bedLabel'])
                   & (df['bathroomLabel'] >= user_input['bathroomLabel'])
                   & (df['numberOfGuests'] >= user_input['numberOfGuests'])
                   & (df['minNights'] <= user_input['minNights'])
                   & (df['maxNights'] >= user_input['maxNights'])
                   & (df['pricing/rate/amount'] >= user_input['lowest_price'])
                    & (df['pricing/rate/amount'] <= user_input['highest_price'])
                   & (df['stars'] >= user_input['stars'])]

    popularity_model = PopularityRecommender(filtered_data)

    recommended_pop = popularity_model.recommend_items(user_input, num_recommendations=10)

    return recommended_pop.to_dict("records")



@callback(
#     Output("url1", "rowData"),
    Output("url2", "rowData"),
    Input("stars2", "value"),
    Input("user_keywords", "value"),
    Input("pricing/rate/amount2", "value"),
    Input("city2", "value"),
    Input("country2", "value"),
    Input("roomTypeCategory2", "value"),
    Input("bedroomLabel2", "value"),
    Input("bedLabel2", "value"),
    Input("bathroomLabel2", "value"),
    Input("minNights2", "value"),
    Input("maxNights2", "value"),
    Input("numberOfGuests2", "value")
)
def render_content_2(stars, user_keywords, range_slider, city, country, roomTypeCategory, bedroomLabel, bedLabel, bathroomLabel, minNights, maxNights, numberOfGuests):
    low, high = range_slider
    user_input = {
        'stars': stars,
        'lowest_price': low,
        'highest_price': high,
        'city': city,
        'country': country,
        'roomTypeCategory': roomTypeCategory,
        'bedroomLabel': bedroomLabel,
        'bedLabel': bedLabel,
        'bathroomLabel': bathroomLabel,
        'minNights': minNights,
        'maxNights': maxNights,
        'numberOfGuests': numberOfGuests
    }

    recommended_hybrid = hybrid_model(user_keywords, city, roomTypeCategory, user_input, X_train, df_train)

    return recommended_hybrid.to_dict("records")

@callback(
#     Output("url1", "rowData"),
    Output("url3", "rowData"),
    Input("listing_url", "value"),
)
def render_content_3(listing_url):

    recommended_urls = modelcf.get_recommendations(listing_url, num_recommendations=10)

    return recommended_urls.to_dict("records")


if __name__ == "__main__":
    #app.run(debug=True)
    app.run(jupyter_mode="external")

Dash app running on:


<IPython.core.display.Javascript object>

In [29]:
# from dash import Dash, Input, Output, callback, dcc, html
# import copy
# import dash_bootstrap_components as dbc
# import plotly.express as px
# import pandas as pd
# import dash_ag_grid as dag
# from dash import Dash, html, dcc, Input, Output

# df = copy.deepcopy(df_train)

# app = Dash(__name__,external_stylesheets=[dbc.themes.BOOTSTRAP])

# columnDefs = [
#     {"headerName": "url", "field": "url"},
#     {"headerName": "Name", "field": "name"},
#     {"headerName": "Stars", "field": "stars"},
#     {"headerName": "Price", "field": "pricing/rate/amount"},
# #     {"headerName": "Lowest Price", "field": "lowest_price"},
# #     {"headerName": "Highest Price", "field": "highest_price"},
#     {"headerName": "Country", "field": "country"},
#     {"headerName": "City", "field": "city"},
#     {"headerName": "Room Type Category", "field": "roomTypeCategory"},
#     {"headerName": "Bedroom Label", "field": "bedroomLabel"},
#     {"headerName": "Bed Label", "field": "bedLabel"},
#     {"headerName": "Bathroom Label", "field": "bathroomLabel"},
#     {"headerName": "Min Nights", "field": "minNights"},
#     {"headerName": "Max Nights", "field": "maxNights"},
#     {"headerName": "Number Of Guests", "field": "numberOfGuests"}
# ]

# app.layout = dbc.Container(
#     [
#         dcc.Markdown(
#             "#### Airbnb Recommendation System",
#             style={"textAlign": "center"},
#             className="my-4",
#         ),
#         dbc.Row(
#             [
#                 dbc.Col(
#                     [
#                         html.Div(
#                             [
#                                 html.H6("User Keywords"),
#                                 dcc.Input(id="user_keywords", type="text", placeholder="Enter keywords"),

#                                 html.H6("Stars"),
#                                 dcc.Slider(2, 5, 0.1, value=4.5, marks=None, id="stars",
#                                     tooltip={"placement": "bottom", "always_visible": True}),

#                                 html.H6("Price Range"),
#                                 dcc.RangeSlider(
#                                     id="pricing/rate/amount",
#                                     min=df['pricing/rate/amount'].min(),
#                                     max=df['pricing/rate/amount'].max(),
#                                     step=1,
#                                     marks={0: 0, 101778: "100k"},
#                                     value=[7, 101778],
#                                     tooltip={"placement": "bottom", "always_visible": True}
#                                 ),

#                                 html.H6("Country"),
#                                 dcc.Dropdown(id="country", value="United States",options=sorted(df["country"].unique()),),

#                                 html.H6("City"),
#                                 dcc.Dropdown(id="city", value="Los Angeles",options=sorted(df["city"].unique())),

#                                 html.H6("Room Type Category"),
#                                 dcc.Dropdown(id="roomTypeCategory", value="entire_home",
#                                              options=sorted(df["roomTypeCategory"].unique()),),

#                                 html.H6("Number of Bedroom"),
#                                 dcc.Input(id='bedroomLabel', type='number', value=2, min=1, max=50, step=1),

#                                 html.H6("Number of Bed"),
#                                 dcc.Input(id='bedLabel', type='number', value=2, min=0, max=53, step=1),

#                                 html.H6("Number of Bathroom"),
#                                 dcc.Input(id='bathroomLabel', type='number', value=1, min=0, max=26.5, step=0.5),

#                                 html.H6("Minimum Nights"),
#                                 dcc.Input(id='minNights', type='number', value=3, min=1, max=400, step=1),

#                                 html.H6("Maximum Nights"),
#                                 dcc.Input(id='maxNights', type='number', value=10, min=1, max=3000, step=1),

#                                 html.H6("Number Of Guests"),
#                                 dcc.Input(id='numberOfGuests', type='number', value=4, min=1, max=16, step=1),


#                             ])
#                     ],
#                     width=3,
#             ),
#             dbc.Col(
#                     [
#                         dcc.Tabs(
#                             id="tabs",
#                             value="tab-1",
#                             children=[
#                                 dcc.Tab(
#                                     label="Popularity Model",
#                                     value="tab-1",
#                                     children=[dag.AgGrid(
#                                                 id="url1",
#                                                 columnDefs=columnDefs,
#                                                 columnSize="sizeToFit",
#                                                 defaultColDef={"type": "rightAligned"},
#                                                 dashGridOptions = {"domLayout": "autoHeight"},
#                                                 style = {"height": None}
#                                           )],
#                                 ),
#                                 dcc.Tab(
#                                     label="Hybrid Model",
#                                     value="tab-2",
#                                     children=[dag.AgGrid(
#                                                 id="url2",
#                                                 columnDefs=columnDefs,
#                                                 columnSize="sizeToFit",
#                                                 defaultColDef={"type": "rightAligned"},
#                                                 dashGridOptions = {"domLayout": "autoHeight"},
#                                                 style = {"height": None}
#                                             )],
#                                 ),
#                                 dcc.Tab(
#                                     label="url Model",
#                                     value="tab-3",
#                                     children=[dag.AgGrid(
#                                                 id="url3",
#                                                 columnDefs=columnDefs,
#                                                 columnSize="sizeToFit",
#                                                 defaultColDef={"type": "rightAligned"},
#                                                 dashGridOptions = {"domLayout": "autoHeight"},
#                                                 style = {"height": None}
#                                           )],
#                                 ),
#                             ],
#                         )
#                 ]
#             )]

#         )
#     ])

# @callback(
#     Output("city", "options"),
#     Input("country", "value")
# )
# def chained_callback_country(country):

#     dff = copy.deepcopy(df)

#     if country is not None:
#         dff = dff.query("country == @country")

#     return sorted(dff["city"].unique())


# @callback(
#     Output("country", "options"),
#     Input("city", "value")
# )
# def chained_callback_city(city):

#     dff = copy.deepcopy(df)

#     if city is not None:
#         dff = dff.query("city == @city")

#     return sorted(dff["country"].unique())


# @callback(
#     Output("url1", "rowData"),
#     Output("url2", "rowData"),
#     Input("stars", "value"),
#     Input("user_keywords", "value"),
#     Input("pricing/rate/amount", "value"),
#     Input("city", "value"),
#     Input("country", "value"),
#     Input("roomTypeCategory", "value"),
#     Input("bedroomLabel", "value"),
#     Input("bedLabel", "value"),
#     Input("bathroomLabel", "value"),
#     Input("minNights", "value"),
#     Input("maxNights", "value"),
#     Input("numberOfGuests", "value")
# )

# def render_content(stars, user_keywords, range_slider, city, country, roomTypeCategory, bedroomLabel, bedLabel, bathroomLabel, minNights, maxNights, numberOfGuests):
#     low, high = range_slider
#     user_input = {
#         'stars': stars,
#         'lowest_price': low,
#         'highest_price': high,
#         'city': city,
#         'country': country,
#         'roomTypeCategory': roomTypeCategory,
#         'bedroomLabel': bedroomLabel,
#         'bedLabel': bedLabel,
#         'bathroomLabel': bathroomLabel,
#         'minNights': minNights,
#         'maxNights': maxNights,
#         'numberOfGuests': numberOfGuests
#     }

#     filtered_data = df[(df['city'] == user_input['city']) & (df['country'] == user_input['country'])
#                    & (df['roomTypeCategory'] == user_input['roomTypeCategory'])
#                    & (df['bedroomLabel'] >= user_input['bedroomLabel'])
#                     & (df['bedLabel'] >= user_input['bedLabel'])
#                    & (df['bathroomLabel'] >= user_input['bathroomLabel'])
#                    & (df['numberOfGuests'] >= user_input['numberOfGuests'])
#                    & (df['minNights'] <= user_input['minNights'])
#                    & (df['maxNights'] >= user_input['maxNights'])
#                    & (df['pricing/rate/amount'] >= user_input['lowest_price'])
#                     & (df['pricing/rate/amount'] <= user_input['highest_price'])
#                    & (df['stars'] >= user_input['stars'])]

#     popularity_model = PopularityRecommender(filtered_data)

#     recommended_pop = popularity_model.recommend_items(user_input, num_recommendations=5)

#     recommended_hybrid = hybrid_model(user_keywords, city, roomTypeCategory, user_input, X_train, df_train)

#     recommended_urls = modelcf.get_recommendations(listing_url, num_recommendations=5)

#     return (recommended_pop.to_dict("records"),recommended_hybrid.to_dict("records"), recommended_urls.to_dict("records"))


# if __name__ == "__main__":
#     #app.run(debug=True)
#     app.run(jupyter_mode="external")