## Introduction

Notebook ini digunakan untuk membuat recommender system berdasarkan preferensi pelanggan AirBnB

## Import Libraries

In [1]:
import pandas as pd
import numpy as np
import ast
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import hstack
from sklearn.preprocessing import MinMaxScaler, MultiLabelBinarizer

## Exploratory Data Analysis

In [2]:
df = pd.read_csv('listings_clustered.csv')
df.head()

Unnamed: 0,id,listing_url,picture_url,name,description,property_type,room_type,accommodates,number_of_reviews,bedrooms,beds,price,review_scores_rating,city,latitude,longitude,bathrooms,cluster
0,52438122,https://www.airbnb.com/rooms/52438122,https://a0.muscache.com/pictures/miso/Hosting-...,Stunning New Cottage minutes to downtown Ashev...,Come relax in this brand new cottage with tast...,Entire cottage,Entire home/apt,4,63,2.0,2.0,225.0,4.98,Asheville,35.56967,-82.63193,1.5,0
1,22119778,https://www.airbnb.com/rooms/22119778,https://a0.muscache.com/pictures/bac6ce5d-d2ff...,Large king suite with private balcony and soak...,Whether you're looking for a romantic Ashevill...,Private room in bed and breakfast,Private room,3,1,2.0,2.0,306.0,5.0,Asheville,35.60284,-82.56727,1.0,0
2,47812966,https://www.airbnb.com/rooms/47812966,https://a0.muscache.com/pictures/324713f3-ea1c...,Blue Ridge Magic: Artist's Hideaway (Brand New),New listing: Blue Ridge Magic named for our pa...,Entire rental unit,Entire home/apt,4,25,1.0,2.0,108.0,5.0,Asheville,35.58475,-82.57182,1.0,0
3,46284932,https://www.airbnb.com/rooms/46284932,https://a0.muscache.com/pictures/miso/Hosting-...,Simple and Cozy Bedroom in Central Area,Looking for a simple bedroom and bathroom to r...,Private room in bungalow,Private room,2,3,1.0,1.0,70.0,5.0,Asheville,35.5787,-82.61582,1.0,0
4,48366092,https://www.airbnb.com/rooms/48366092,https://a0.muscache.com/pictures/prohost-api/H...,"Hot tub, Fire pit, 5 miles to downtown Asheville","Beautiful 2 bedroom oasis. Custom built, fully...",Entire cottage,Entire home/apt,4,143,2.0,2.0,114.0,4.92,Asheville,35.55106,-82.51424,1.0,0


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28923 entries, 0 to 28922
Data columns (total 18 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   id                    28923 non-null  int64  
 1   listing_url           28923 non-null  object 
 2   picture_url           28923 non-null  object 
 3   name                  28923 non-null  object 
 4   description           28923 non-null  object 
 5   property_type         28923 non-null  object 
 6   room_type             28923 non-null  object 
 7   accommodates          28923 non-null  int64  
 8   number_of_reviews     28923 non-null  int64  
 9   bedrooms              28923 non-null  float64
 10  beds                  28923 non-null  float64
 11  price                 28923 non-null  float64
 12  review_scores_rating  28923 non-null  float64
 13  city                  28923 non-null  object 
 14  latitude              28923 non-null  float64
 15  longitude          

In [4]:
raw_df = pd.read_csv('listings_raw.csv', low_memory=False)

In [5]:
df = df.merge(raw_df[['id', 'amenities']], on='id')
df.head()

Unnamed: 0,id,listing_url,picture_url,name,description,property_type,room_type,accommodates,number_of_reviews,bedrooms,beds,price,review_scores_rating,city,latitude,longitude,bathrooms,cluster,amenities
0,52438122,https://www.airbnb.com/rooms/52438122,https://a0.muscache.com/pictures/miso/Hosting-...,Stunning New Cottage minutes to downtown Ashev...,Come relax in this brand new cottage with tast...,Entire cottage,Entire home/apt,4,63,2.0,2.0,225.0,4.98,Asheville,35.56967,-82.63193,1.5,0,"[""Puracy Natural Body Wash. Gentle, plant-base..."
1,22119778,https://www.airbnb.com/rooms/22119778,https://a0.muscache.com/pictures/bac6ce5d-d2ff...,Large king suite with private balcony and soak...,Whether you're looking for a romantic Ashevill...,Private room in bed and breakfast,Private room,3,1,2.0,2.0,306.0,5.0,Asheville,35.60284,-82.56727,1.0,0,"[""Shampoo"", ""Hair dryer"", ""Extra pillows and b..."
2,47812966,https://www.airbnb.com/rooms/47812966,https://a0.muscache.com/pictures/324713f3-ea1c...,Blue Ridge Magic: Artist's Hideaway (Brand New),New listing: Blue Ridge Magic named for our pa...,Entire rental unit,Entire home/apt,4,25,1.0,2.0,108.0,5.0,Asheville,35.58475,-82.57182,1.0,0,"[""Shampoo"", ""Private backyard \u2013 Not fully..."
3,46284932,https://www.airbnb.com/rooms/46284932,https://a0.muscache.com/pictures/miso/Hosting-...,Simple and Cozy Bedroom in Central Area,Looking for a simple bedroom and bathroom to r...,Private room in bungalow,Private room,2,3,1.0,1.0,70.0,5.0,Asheville,35.5787,-82.61582,1.0,0,"[""Hair dryer"", ""Cleaning products"", ""Extra pil..."
4,48366092,https://www.airbnb.com/rooms/48366092,https://a0.muscache.com/pictures/prohost-api/H...,"Hot tub, Fire pit, 5 miles to downtown Asheville","Beautiful 2 bedroom oasis. Custom built, fully...",Entire cottage,Entire home/apt,4,143,2.0,2.0,114.0,4.92,Asheville,35.55106,-82.51424,1.0,0,"[""Shampoo"", ""Coffee"", ""Private backyard \u2013..."


In [6]:
# Assuming df['amenities'] is your Series
str_list = df['amenities']

# Convert string representation of list to list
list_data = str_list.apply(lambda x: ast.literal_eval(x))

# Apply one-hot encoding
mlb = MultiLabelBinarizer()
res = pd.DataFrame(mlb.fit_transform(list_data), columns=mlb.classes_, index=list_data.index)

In [7]:
df_encoded = pd.get_dummies(df, columns=['amenities'])

In [8]:
res

Unnamed: 0,\t Brookstone® Big Blue Portable Bluetooth® Go Speaker sound system with Bluetooth and aux,Various conditioner,6-burner GE Monogram stovetop stainless steel electric stove,Bose sound system with Bluetooth and aux,Dr Teals body soap,Ginger Lily Farms conditioner,Miscellaneous Brands conditioner,Refresh Citrus with Vitamin C body soap,Tea Tree Tingle conditioner,Vizio sound system with Bluetooth and aux,...,其他 stainless steel gas stove,国产 refrigerator,国产 stainless steel oven,房间有冰箱 refrigerator,普通 conditioner,普通 refrigerator,普通 shampoo,海飞丝 shampoo,美国生产 body soap,美国生产 shampoo
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28918,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
28919,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
28920,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
28921,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [9]:
def cosine_sim(vect1,vect2):
  norm_1 = np.linalg.norm(vect1)
  norm_2 = np.linalg.norm(vect2)

  cos_sim = (vect1 @ vect2) / (norm_1 * norm_2)
  return cos_sim

In [10]:
df

Unnamed: 0,id,listing_url,picture_url,name,description,property_type,room_type,accommodates,number_of_reviews,bedrooms,beds,price,review_scores_rating,city,latitude,longitude,bathrooms,cluster,amenities
0,52438122,https://www.airbnb.com/rooms/52438122,https://a0.muscache.com/pictures/miso/Hosting-...,Stunning New Cottage minutes to downtown Ashev...,Come relax in this brand new cottage with tast...,Entire cottage,Entire home/apt,4,63,2.0,2.0,225.0,4.98,Asheville,35.56967,-82.63193,1.5,0,"[""Puracy Natural Body Wash. Gentle, plant-base..."
1,22119778,https://www.airbnb.com/rooms/22119778,https://a0.muscache.com/pictures/bac6ce5d-d2ff...,Large king suite with private balcony and soak...,Whether you're looking for a romantic Ashevill...,Private room in bed and breakfast,Private room,3,1,2.0,2.0,306.0,5.00,Asheville,35.60284,-82.56727,1.0,0,"[""Shampoo"", ""Hair dryer"", ""Extra pillows and b..."
2,47812966,https://www.airbnb.com/rooms/47812966,https://a0.muscache.com/pictures/324713f3-ea1c...,Blue Ridge Magic: Artist's Hideaway (Brand New),New listing: Blue Ridge Magic named for our pa...,Entire rental unit,Entire home/apt,4,25,1.0,2.0,108.0,5.00,Asheville,35.58475,-82.57182,1.0,0,"[""Shampoo"", ""Private backyard \u2013 Not fully..."
3,46284932,https://www.airbnb.com/rooms/46284932,https://a0.muscache.com/pictures/miso/Hosting-...,Simple and Cozy Bedroom in Central Area,Looking for a simple bedroom and bathroom to r...,Private room in bungalow,Private room,2,3,1.0,1.0,70.0,5.00,Asheville,35.57870,-82.61582,1.0,0,"[""Hair dryer"", ""Cleaning products"", ""Extra pil..."
4,48366092,https://www.airbnb.com/rooms/48366092,https://a0.muscache.com/pictures/prohost-api/H...,"Hot tub, Fire pit, 5 miles to downtown Asheville","Beautiful 2 bedroom oasis. Custom built, fully...",Entire cottage,Entire home/apt,4,143,2.0,2.0,114.0,4.92,Asheville,35.55106,-82.51424,1.0,0,"[""Shampoo"", ""Coffee"", ""Private backyard \u2013..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28918,27775041,https://www.airbnb.com/rooms/27775041,https://a0.muscache.com/pictures/a843d538-6891...,All Bunked Up! New bathroom & washer/dryer!,This is unbelievably DC! Oldest house in Chevy...,Private room in home,Private room,3,112,1.0,3.0,49.0,4.91,Washington DC,38.96679,-77.07738,1.0,0,"[""Central air conditioning"", ""Record player"", ..."
28919,15248451,https://www.airbnb.com/rooms/15248451,https://a0.muscache.com/pictures/6c3a8c0f-121b...,2BR/1BA Capitol Hill Apt (Potomac Ave Metro)!,We are back after renting to renovating neighb...,Entire rental unit,Entire home/apt,4,42,2.0,2.0,112.0,5.00,Washington DC,38.88408,-76.98483,1.0,0,"[""Bathtub"", ""Air conditioning"", ""Private entra..."
28920,17248804,https://www.airbnb.com/rooms/17248804,https://a0.muscache.com/pictures/bdcf71be-560a...,203【Private Room - Queen bed in CoHi DC!】,Looking to explore DC and live comfortably on ...,Private room in townhouse,Private room,2,97,1.0,1.0,141.0,4.37,Washington DC,38.92742,-77.02941,2.0,0,"[""Paid parking off premises"", ""Central air con..."
28921,30775571,https://www.airbnb.com/rooms/30775571,https://a0.muscache.com/pictures/ade48059-2136...,Ultra Chic & Modern Garden Condo by Union Station,Located in Judiciary Square with spectacular v...,Entire serviced apartment,Entire home/apt,8,124,2.0,3.0,310.0,4.89,Washington DC,38.89994,-77.01103,2.0,1,"[""Bathtub"", ""Paid parking off premises"", ""Sing..."


In [11]:
tfidf_vectorizer1 = TfidfVectorizer(stop_words='english')
tfidf_matrix1 = tfidf_vectorizer1.fit_transform(df['description'])

tfidf_vectorizer2 = TfidfVectorizer(stop_words='english')
tfidf_matrix2 = tfidf_vectorizer2.fit_transform(df['city'])

tfidf_matrix_combined = hstack([tfidf_matrix1, tfidf_matrix2])

In [12]:
scaler = MinMaxScaler()
beds_scaled = scaler.fit_transform(df['beds'].values.reshape(-1, 1))

# Add to the feature matrix
feature_matrix = hstack([tfidf_matrix_combined, beds_scaled])

In [13]:
df['Stunning New Cottage minutes to downtown Asheville'].drop(index='Stunning New Cottage minutes to downtown Asheville').sort_values(ascending=False).iloc[:5]

KeyError: 'Stunning New Cottage minutes to downtown Asheville'

In [32]:
def sorting(name):
  tmp = df[name].drop(index=name).sort_values(ascending=False).iloc[:5]
  print(f'You like {name}, so based on our recommender system, We recommend you to stay in:')
  for i,name in enumerate(tmp.index):
    print(f'{i+1}. {name}')

In [34]:
sorting('Cozy Escape in the Mountains')

You like Cozy Escape in the Mountains, so based on our recommender system, We recommend you to stay in:
1. Sweet North Asheville Home Close to Downtown
2. Cozy 1 Bedroom
3. Clean Contemporary House Minutes from Downtown
4. Guest Suite of New,Modern House 5 min to Downtown
5. Downtown Inn Asheville, NC #10-Double
