# 1- What are the aspects of a listing that best correlate to price ?

# 2- What are the aspects of a listing that best correlate to availabilty (lack of bookings), and if found (those aspects), do they necessarily correlate  with fully booked listings ?

# 3- What are the aspects of a listing that best correlate to a positive review, or a negative one ?


In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from bs4 import BeautifulSoup
import urllib


In [2]:
Boston_calendar = pd.read_csv('Boston_calendar.csv')
Boston_listings = pd.read_csv('Boston_listings.csv')
Boston_reviews = pd.read_csv('Boston_reviews.csv')

In [3]:
Boston_reviews[Boston_reviews['listing_id']==3781]

Unnamed: 0,listing_id,id,date,reviewer_id,reviewer_name,comments
0,3781,37776825,2015-07-10,36059247,Greg,The apartment was as advertised and Frank was ...
1,3781,41842494,2015-08-09,10459388,Tai,It was a pleasure to stay at Frank's place. Th...
2,3781,45282151,2015-09-01,12264652,Damien,The apartment description is entirely faithful...
3,3781,49022647,2015-09-30,41426327,Mike,Thoroughly enjoyed my time at Frank's home. Ha...
4,3781,52503327,2015-10-30,15151513,Ivan,Great value for the money! This location has e...
5,3781,63371931,2016-02-22,55456499,John-Mark,Frank was very accommodating throughout my sta...
6,3781,67909504,2016-04-01,55456499,John-Mark,Excellent! This was my second stay at the Jeff...
7,3781,97906006,2016-08-28,88528884,Elisa,Frank was a wonderful and accommodating host. ...
8,3781,105143774,2016-09-30,1342806,Nicole,"Frank was great, the apartment has everything ..."
9,3781,129692749,2017-02-01,55126634,Carlos,Very nice. Comfortable apartment. Good locatio...


In [4]:
ol = Boston_calendar[Boston_calendar['listing_id']==3168]
(ol['available']=='t').mean()

0.7890410958904109

In [5]:
ol[ol['available']=='t']

Unnamed: 0,listing_id,date,available,price,adjusted_price,minimum_nights,maximum_nights
1354592,3168,2022-12-01,t,$99.00,$99.00,91,730
1354593,3168,2022-12-02,t,$99.00,$99.00,91,730
1354594,3168,2022-12-03,t,$99.00,$99.00,91,730
1354595,3168,2022-12-04,t,$99.00,$99.00,91,730
1354596,3168,2022-12-05,t,$99.00,$99.00,91,730
...,...,...,...,...,...,...,...
1354875,3168,2023-09-10,t,$99.00,$99.00,91,730
1354876,3168,2023-09-11,t,$99.00,$99.00,91,730
1354877,3168,2023-09-12,t,$99.00,$99.00,91,730
1354878,3168,2023-09-13,t,$99.00,$99.00,91,730


In [6]:
# Boston_calendar['price'] = Boston_calendar['price'].str.strip('$').str.replace(',', '').astype('float')

# Boston_calendar['adjusted_price'] = Boston_calendar['adjusted_price'].str.strip('$').str.replace(',', '').astype('float')


In [7]:
# ol = Boston_calendar[Boston_calendar['listing_id']==3781]

In [8]:
# ol[ol['available']=='f'].mean()

### After a quick data exploration, we need to drop a couple of columns, which are:
> 1- Columns that are all nulls <br>
2- Personal info (Id) <br>
3- Non-relevant info (urls) <br>
4- Meta-data (source) 

In [9]:
# Boston_listings.drop(columns=['id','listing_url','scrape_id','last_scraped','source','picture_url',# ids & urls & meta
#                              'host_id', 'host_url','host_thumbnail_url','host_picture_url',# id & urls
#                              'calendar_updated','bathrooms','neighbourhood_group_cleansed',# all_nan
#                              'neighbourhood', 'neighborhood_overview'],
#                     inplace=True)


> The *neighbourhood* column does not have a lot of information, because 2255 instances are **Boston, Massachusetts, United States**, which does not specify the exact region of a listing, unfortunatley *neighborhood_overview* dependes on *neighbourhood*, not to mention the missing values, dropping the two columns seems fine. <br><br> An alternative to the *neighbourhood* column would be *neighbourhood_cleansed*. 

In [10]:
Boston_listings.drop(columns=['scrape_id','last_scraped','source','picture_url',# ids & urls & meta
                             'host_id', 'host_url','host_thumbnail_url','host_picture_url',# id & urls
                             'calendar_updated','bathrooms','neighbourhood_group_cleansed',# all_nan
                             'neighbourhood', 'neighborhood_overview'],
                    inplace=True)


In [11]:
Boston_listings.drop(columns=['neighbourhood_cleansed','host_location','host_response_time'],
                    inplace=True)

# one_hot = pd.get_dummies(Boston_listings['neighbourhood_cleansed'])

# Boston_listings = Boston_listings.drop('neighbourhood_cleansed',axis = 1)

# Boston_listings = Boston_listings.join(one_hot)

# =================================================================

# one_hot = pd.get_dummies(Boston_listings['host_location'], dummy_na=True, prefix='host_location')

# Boston_listings = Boston_listings.drop('host_location',axis = 1)

# Boston_listings = Boston_listings.join(one_hot)

# # =================================================================


# one_hot = pd.get_dummies(Boston_listings['host_response_time'], dummy_na=True, , prefix='host_response_time')

# Boston_listings = Boston_listings.drop('host_response_time',axis = 1)

# Boston_listings = Boston_listings.join(one_hot)

# =================================================================


# Boston_listings['host_response_rate'] = Boston_listings['host_response_rate'].str.rstrip('%').astype('float')


# =================================================================



Boston_listings['price'] = Boston_listings['price'].str.strip('$').str.replace(',', '').astype('float')


In [12]:
Boston_listings[Boston_listings['bedrooms'].isnull()]['listing_url']

0                     https://www.airbnb.com/rooms/3168
3                     https://www.airbnb.com/rooms/6695
7                    https://www.airbnb.com/rooms/10813
8                    https://www.airbnb.com/rooms/10986
34                  https://www.airbnb.com/rooms/210097
                             ...                       
5137    https://www.airbnb.com/rooms/708066864505175780
5158    https://www.airbnb.com/rooms/711804721312473870
5159    https://www.airbnb.com/rooms/712092718787212242
5174    https://www.airbnb.com/rooms/714906239224334877
5176    https://www.airbnb.com/rooms/715658190467254169
Name: listing_url, Length: 559, dtype: object

In [13]:
Boston_listings['bedrooms'].isnull().mean()

0.10781099324975892

In [14]:
bedrooms_mode = Boston_listings['bedrooms'].mode()

Boston_listings = Boston_listings.fillna({'bedrooms':0})

In [15]:
Boston_listings['bedrooms'].isnull().mean()

0.0

In [16]:
# Boston_listings.head(n=1)

In [17]:
# #<span class="ll4r2nl dir dir-ltr">100%</span>
# from selenium import webdriver
# import time
# from bs4 import BeautifulSoup
# import re
# import requests




# for iteration, (ind, row) in enumerate(Boston_listings[Boston_listings['host_response_rate'].isnull()].iterrows()):
    
#     print('iteration: ', iteration)
    
#     url = row['listing_url']
#     response = requests.get(url)
#     if response.status_code != 200:
#         continue
        
#     driver = webdriver.Chrome()
#     driver.get(url)

#     time.sleep(20)
    
#     content = driver.page_source.encode('utf-8').strip()
#     driver.quit() 
#     soup = BeautifulSoup(content,"html.parser")
    
        
        
#     officials = soup.findAll("div",{"class":"_1k8vduze"}) 
#     res_rate = re.findall(r'(?:\d+%)|(?:\d+\.\d+%)', str(officials))
#     if len(res_rate) == 0 :
#         continue
        
#     res_time =  str(officials).split('ul>')[0].split('li>')[-2].split('span>')[0].split('>')[2].split('<')[0].strip()

#     Boston_listings.at[ind, 'host_response_rate'] = float(res_rate[0][:-1])
#     Boston_listings.at[ind, 'host_response_time'] = res_time
#     print(iteration, "\t", ind, "\t", url)



In [18]:
Boston_listings = Boston_listings.fillna({'description':'', 'host_about':''})

In [19]:
Boston_listings[Boston_listings['description'] == '']

Unnamed: 0,id,listing_url,name,description,host_name,host_since,host_about,host_response_rate,host_acceptance_rate,host_is_superhost,...,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
1816,29860624,https://www.airbnb.com/rooms/29860624,Roxbury living,,Gavin,2014-03-13,30 year old High School Science teacher.,,,f,...,,,,,f,1,0,1,0,
1877,30964172,https://www.airbnb.com/rooms/30964172,Spacious duplex in historic Charlestown w/ par...,,Teona,2014-02-24,Hi there! I am a publicist in Manhattan workin...,,,f,...,,,,,f,1,1,0,0,
1883,31140376,https://www.airbnb.com/rooms/31140376,Spacious Room 5 min walk to T 10 mins to BC,,Mo,2018-07-06,,,,f,...,5.0,5.0,4.0,,f,5,0,5,0,0.07
1932,32366109,https://www.airbnb.com/rooms/32366109,Wonderful apartment located in the heart of Bo...,,Kondy,2019-02-13,,,,f,...,,,,,f,1,1,0,0,
2012,33487000,https://www.airbnb.com/rooms/33487000,Large Room,,Mo,2018-07-06,,,,f,...,5.0,5.0,5.0,,f,5,0,5,0,0.05
2100,35579707,https://www.airbnb.com/rooms/35579707,The blue room,,Fehr,2018-05-24,,,,f,...,5.0,5.0,5.0,,f,2,0,2,0,0.08
2115,35807024,https://www.airbnb.com/rooms/35807024,Spacious 1 bedroom apartment on commonwealth ave,,Alekya,2016-08-16,,,,f,...,,,,,f,1,1,0,0,
2177,36687285,https://www.airbnb.com/rooms/36687285,Private condo great for work trips,,Janice,2011-03-25,I'm an independent consultant who travels as o...,,,f,...,5.0,5.0,5.0,,f,1,1,0,0,0.08
2221,37749165,https://www.airbnb.com/rooms/37749165,LUxury condo in downtown Boston best location,,Karl,2019-07-13,,0%,,f,...,,,,,t,2,2,0,0,
2269,38388360,https://www.airbnb.com/rooms/38388360,Large room 5 mins to greenT 10 mins to BC,,Mo,2018-07-06,,,,f,...,,,,,f,5,0,5,0,


> For host_acceptance_rate null values, most of the actual listings either <br>
1- have been inactive for a couple of years. <br>
2- have very little reviews (i.e. they just started listing) <br>
a possible solution would to subtitute those null values with zero due to the defintion of host_acceptance_rate form the airbnb website: <br><br>
*Your acceptance rate measures how often you accept or decline reservations. Guest inquiries are not included in the calculation of your acceptance rate. You can see your acceptance rate from the last **365** days by clicking on the Performance tab, then clicking Basic Requirements.*  <br><br>
Also there is a way to verify what we just did, which is to check host_is_superhost for those missing values, because you can't be a superhost unless you satisfy those three requirements: <br>
1- Completed at least 10 trips or 3 reservations that total at least 100 nights. <br>
2- Maintained a 90% response rate or higher. <br>
3- Maintained a less than 1% cancellation rate, with exceptions made for those that fall under our Extenuating Circumstances policy. <br>



In [33]:
Boston_listings['host_response_rate'].unique()

array([nan, '100%', '90%', '94%', '40%', '83%', '92%', '70%', '50%', '0%',
       '60%', '75%', '80%', '98%', '97%', '91%', '86%', '10%', '99%',
       '88%', '33%', '93%', '95%', '56%', '89%', '87%', '57%', '20%',
       '96%', '67%', '84%', '72%', '81%', '13%', '79%', '25%', '76%',
       '65%', '82%', '71%', '64%'], dtype=object)

In [55]:
# df_and = df[(df['age'] < 35) & ~(df['state'] == 'NY')]

Boston_listings[(Boston_listings['host_response_rate'].isnull()) & (Boston_listings['host_is_superhost']=='t')].fillna({'host_response_rate':'90%'}, inplace=True)

Unnamed: 0,id,listing_url,name,description,host_name,host_since,host_about,host_response_rate,host_acceptance_rate,host_is_superhost,...,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
150,1711476,https://www.airbnb.com/rooms/1711476,"Serene & cozy 1BR oasis, walk to Harvard Square!",Tastefully decorated apartment on tree-lined s...,Case,2012-08-28,I'm an artist/educator splitting my time betwe...,0%,100%,t,...,4.92,4.89,4.71,,f,2,2,0,0,0.83
354,4995033,https://www.airbnb.com/rooms/4995033,Modern Beacon Hill 2 Bedroom,Beautiful newly renovated apartment. Full kitc...,Natasha,2014-01-11,I am originally from London. I am a nurse prac...,0%,76%,t,...,4.98,4.99,4.87,STR-411167,f,1,1,0,0,1.34
379,5581575,https://www.airbnb.com/rooms/5581575,Private Entrance * King Beds! * Kid Friendly,Maintain your personal space with a separate e...,Kathy,2015-03-04,I started out as an AirBnB guest and am now a ...,0%,100%,t,...,4.99,4.96,4.79,STR-400453,t,1,1,0,0,2.72
528,7729257,https://www.airbnb.com/rooms/7729257,Big sunny artist's 1BR near Harvard,Beautifully decorated apartment on tree-lined ...,Case,2012-08-28,I'm an artist/educator splitting my time betwe...,0%,100%,t,...,4.92,4.9,4.63,,f,2,2,0,0,0.72
741,12316683,https://www.airbnb.com/rooms/12316683,Luxury 2 bdr apartment Brookline Coolidge Corner.,Lots of natural light and great views. <br />P...,Boris,2011-11-12,Boston,0%,67%,t,...,5.0,5.0,4.83,,f,1,1,0,0,0.09
1042,17209703,https://www.airbnb.com/rooms/17209703,West Cambridge 1 BR entire apartment near Harvard,Ground floor fully furnished one-bedroom apart...,Anatole,2017-02-10,After retiring from hi-tech and telecommunicat...,0%,100%,t,...,5.0,4.83,4.9,STR-17697,t,1,1,0,0,0.44
1436,23109832,https://www.airbnb.com/rooms/23109832,Jamaica Plain Gem!,Elegant yet casual and inviting. Comfortable ...,Anna,2018-02-04,This is lovely welcoming home!\nIts furnishing...,0%,100%,t,...,5.0,5.0,5.0,,f,1,1,0,0,0.13
1540,24348595,https://www.airbnb.com/rooms/24348595,UNION SQ historic character with modern amenities,Fully renovated top floor of an 1871 Mansard V...,Robin,2016-07-04,,0%,100%,t,...,4.83,5.0,4.83,,f,1,1,0,0,0.17
1624,26005227,https://www.airbnb.com/rooms/26005227,Full Condo: Heart of the South End!,Update: condo has been repainted. New enormou...,Julie,2011-03-09,I’m Julie and my condo is in the most perfect ...,0%,100%,t,...,4.91,4.94,4.79,STR-420982,f,1,1,0,0,0.96
1666,26634032,https://www.airbnb.com/rooms/26634032,Experienced Host- 2 Bedroom Charm Close to MIT,"First floor of a charming, historic mansard in...",Caroline And David,2014-10-21,"Architect husband, photographer wife, three ki...",0%,100%,t,...,5.0,4.92,4.96,STR-136028,f,1,1,0,0,2.2


In [35]:
(Boston_listings[Boston_listings['host_response_rate'].isnull()]['host_is_superhost']=='f').mean()

0.9770491803278688

In [36]:
(Boston_listings[Boston_listings['host_response_rate'].isnull()]['host_is_superhost'].isnull()).mean()

0.001639344262295082

In [32]:
Boston_listings = Boston_listings.fillna({'host_acceptance_rate':'0%'})

> for host_is_superhost, we have th

In [27]:
Boston_listings[Boston_listings['host_is_superhost'].isnull()]

Unnamed: 0,id,listing_url,name,description,host_name,host_since,host_about,host_response_rate,host_acceptance_rate,host_is_superhost,...,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
2485,41740613,https://www.airbnb.com/rooms/41740613,The Revolution Hotel,,The Revolution Hotel,2018-12-03,,100%,74%,,...,,,,,f,1,0,0,0,
2515,42065556,https://www.airbnb.com/rooms/42065556,Inn @ St. Botolph,,Inn @,2016-12-14,Enjoy contemporary charm and outstanding value...,,0%,,...,4.67,4.56,4.5,,f,1,0,0,0,0.87
2836,46232976,https://www.airbnb.com/rooms/46232976,citizenM Boston North Station,,CitizenM,2020-11-04,You’ll find us right around the corner from th...,100%,74%,,...,,,,,f,1,0,0,0,


In [None]:
# pd.pandas.set_option('display.max_rows', None)

# pd.reset_option('all', silent=True)

In [25]:

from selenium import webdriver
import time
from bs4 import BeautifulSoup


driver = webdriver.Chrome()
url= "https://www.airbnb.com/rooms/10813?_set_bev_on_new_domain=1672487471_YmRkYzRjZmE4OTI5&source_impression_id=p3_1673713643_DcJa9orfz3w1CQku&locale=en"
driver.maximize_window()
driver.get(url)

time.sleep(5)
content = driver.page_source.encode('utf-8').strip()
soup = BeautifulSoup(content,"html.parser")
officials = soup.findAll("span",{"class":"_9xiloll"})

for entry in officials:
    print(str(entry))


driver.quit()

<span aria-hidden="false" class="_9xiloll">Boston, Massachusetts, United States</span>


> The *listing url* of the null values of the column `bedrooms` suggest that those nulls mean *zero* or *no actuall bedroom*.

In [8]:
5185 - 4626

559

In [12]:
print(Boston_listings[Boston_listings['bedrooms'].isnull()]['beds'].isnull().mean())
len(Boston_listings[Boston_listings['bedrooms'].isnull()]['beds']) * Boston_listings[Boston_listings['bedrooms'].isnull()]['beds'].isnull().mean()



0.023255813953488372


13.0

In [5]:
Boston_listings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5185 entries, 0 to 5184
Data columns (total 75 columns):
 #   Column                                        Non-Null Count  Dtype  
---  ------                                        --------------  -----  
 0   id                                            5185 non-null   int64  
 1   listing_url                                   5185 non-null   object 
 2   scrape_id                                     5185 non-null   int64  
 3   last_scraped                                  5185 non-null   object 
 4   source                                        5185 non-null   object 
 5   name                                          5185 non-null   object 
 6   description                                   5140 non-null   object 
 7   neighborhood_overview                         3435 non-null   object 
 8   picture_url                                   5185 non-null   object 
 9   host_id                                       5185 non-null   i

In [None]:
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import FunctionTransformer
from sklearn.model_selection import GridSearchCV, StratifiedKFold

# host_about, description

# Create Function Transformer to use Feature Union
def get_numeric_data(x):
    return [record[:-2].astype(float) for record in x]

def get_text_data(x):
    return [record[-1] for record in x]

transfomer_numeric = FunctionTransformer(get_numeric_data)
transformer_text = FunctionTransformer(get_text_data)

# Create a pipeline to concatenate Tfidf Vector and Numeric data
# Use RandomForestClassifier as an example
pipeline = Pipeline([
    ('features', FeatureUnion([
            ('numeric_features', Pipeline([
                ('selector', transfomer_numeric)
            ])),
             ('text_features', Pipeline([
                ('selector', transformer_text),
                ('vec', TfidfVectorizer(analyzer='word'))
            ]))
         ])),
    ('clf', RandomForestClassifier())
])

# Grid Search Parameters for RandomForest
param_grid = {'clf__n_estimators': np.linspace(1, 100, 10, dtype=int),
              'clf__min_samples_split': [3, 10],
              'clf__min_samples_leaf': [3],
              'clf__max_features': [7],
              'clf__max_depth': [None],
              'clf__criterion': ['gini'],
              'clf__bootstrap': [False]}

# Training config
kfold = StratifiedKFold(n_splits=7)
scoring = {'Accuracy': 'accuracy', 'F1': 'f1_macro'}
refit = 'F1'

# Perform GridSearch
rf_model = GridSearchCV(pipeline, param_grid=param_grid, cv=kfold, scoring=scoring, 
                         refit=refit, n_jobs=-1, return_train_score=True, verbose=1)
rf_model.fit(X_train, Y_train)
rf_best = rf_model.best_estimator_

In [3]:
Boston_listings

Unnamed: 0,id,listing_url,scrape_id,last_scraped,source,name,description,neighborhood_overview,picture_url,host_id,...,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
0,3168,https://www.airbnb.com/rooms/3168,20220915162158,2022-09-15,city scrape,TudorStudio,"The ""Studio at 14 Weldon"" is located in Newton...","Newton has 13 unique villages, and gives off a...",https://a0.muscache.com/pictures/ff7952dc-ef0b...,3697,...,,,,,f,1,0,1,0,
1,3781,https://www.airbnb.com/rooms/3781,20220915162158,2022-09-15,city scrape,HARBORSIDE-Walk to subway,Fully separate apartment in a two apartment bu...,"Mostly quiet ( no loud music, no crowed sidewa...",https://a0.muscache.com/pictures/24670/b2de044...,4804,...,4.96,4.87,4.91,,f,1,1,0,0,0.26
2,5506,https://www.airbnb.com/rooms/5506,20220915162158,2022-09-15,city scrape,** Fort Hill Inn Private! Minutes to center!**,"Private guest room with private bath, You do n...","Peaceful, Architecturally interesting, histori...",https://a0.muscache.com/pictures/miso/Hosting-...,8229,...,4.89,4.54,4.73,Approved by the government,f,10,10,0,0,0.69
3,6695,https://www.airbnb.com/rooms/6695,20220915162158,2022-09-15,city scrape,"Fort Hill Inn *Sunny* 1 bedroom, condo duplex","Comfortable, Fully Equipped private apartment...","Peaceful, Architecturally interesting, histori...",https://a0.muscache.com/pictures/38ac4797-e7a4...,8229,...,4.95,4.50,4.71,STR446650,f,10,10,0,0,0.75
4,7903,https://www.airbnb.com/rooms/7903,20220915162158,2022-09-15,city scrape,"Colorful, modern 2 BR apt shared with host",I'm a high school teacher and frequent travele...,"The apartment is in Somerville, located direct...",https://a0.muscache.com/pictures/miso/Hosting-...,14169,...,4.95,4.56,4.80,,f,1,0,1,0,1.84
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5180,716081443145047239,https://www.airbnb.com/rooms/716081443145047239,20220915162158,2022-09-15,city scrape,Private Room with Shared Bath in Quiet Street,*Please Note: You are booking a private room i...,South Boston is a very large neighborhood comp...,https://a0.muscache.com/pictures/prohost-api/H...,2356643,...,,,,STR-460218,f,71,25,46,0,
5181,716081469166085329,https://www.airbnb.com/rooms/716081469166085329,20220915162158,2022-09-15,city scrape,Cozy Bedroom in Convenient Downtown Location,*Please Note: You are booking a private room i...,South Boston is a very large neighborhood comp...,https://a0.muscache.com/pictures/prohost-api/H...,2356643,...,,,,STR-460218,f,71,25,46,0,
5182,716081495310456299,https://www.airbnb.com/rooms/716081495310456299,20220915162158,2022-09-15,city scrape,"Peaceful Bedroom w/ Shared Bath - AC, Wifi inc...",*Please Note: You are booking a private room i...,South Boston is a very large neighborhood comp...,https://a0.muscache.com/pictures/prohost-api/H...,2356643,...,,,,STR-460218,f,71,25,46,0,
5183,716235197792512391,https://www.airbnb.com/rooms/716235197792512391,20220915162158,2022-09-15,city scrape,Sunny Room w/ Shared Bath in Modest Brighton Home,"Perfect for Hospital Stays, Medical Students, ...",The apartment is located in a walkable neighbo...,https://a0.muscache.com/pictures/prohost-api/H...,2356643,...,,,,STR-484106,t,71,25,46,0,


In [4]:
Boston_listings['last_scraped'].unique()

array(['2022-09-15', '2022-10-02'], dtype=object)

In [9]:
Boston_listings['name'].unique()

array(['TudorStudio', 'HARBORSIDE-Walk to subway',
       '** Fort Hill Inn Private! Minutes to center!**', ...,
       'Peaceful Bedroom w/ Shared Bath - AC, Wifi included',
       'Sunny Room w/ Shared Bath in Modest Brighton Home',
       'Charming Room in Modern Shared Spacious Apt'], dtype=object)

In [6]:
Boston_listings['price']

0        $99.00
1       $132.00
2       $149.00
3       $179.00
4       $116.00
         ...   
5180     $51.00
5181     $51.00
5182     $51.00
5183     $51.00
5184     $51.00
Name: price, Length: 5185, dtype: object

In [7]:
Boston_listings['price'] = Boston_listings['price'].str.replace('$', '', regex=True)
Boston_listings['price'] = Boston_listings['price'].str.replace(',', '', regex=True)

In [8]:
Boston_listings = Boston_listings.astype({'price': 'float64'})

In [9]:
am_list = []
for i,amen in enumerate(Boston_listings['amenities']):
    temp_am = []
    for i,amenities in enumerate(amen.split('"')):
        if i % 2 == 1:
            temp_am.append(amenities)
    am.append(len(temp_am))
max(am), min(am), sum(am)/len(am)

NameError: name 'am' is not defined

In [None]:
Boston_listings[Boston_listings['amenities']=='["Long term stays allowed"]']

In [37]:
Boston_listings['amenities']

0       ["Dishes and silverware", "Long term stays all...
1       ["Bed linens", "Dishes and silverware", "Long ...
2       ["Bed linens", "Dishes and silverware", "Long ...
3       ["Dishes and silverware", "Long term stays all...
4       ["Bed linens", "Rice maker", "Dishes and silve...
                              ...                        
5180    ["Long term stays allowed", "Stove", "Wifi", "...
5181    ["Long term stays allowed", "Stove", "Wifi", "...
5182    ["Long term stays allowed", "Stove", "Wifi", "...
5183    ["Bed linens", "Dishes and silverware", "Long ...
5184    ["Bed linens", "Dishes and silverware", "Long ...
Name: amenities, Length: 5185, dtype: object