# 1- What are the aspects of a listing that best correlate to price ?

# 2- What are the aspects of a listing that best correlate to availabilty (lack of bookings), and if found (those aspects), do they necessarily correlate  with fully booked listings ?

# 3- What are the aspects of a listing that best correlate to a positive review, or a negative one ?


In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from bs4 import BeautifulSoup
import urllib


In [2]:
Boston_calendar = pd.read_csv('Boston_calendar.csv')
Boston_listings = pd.read_csv('Boston_listings.csv')
Boston_reviews = pd.read_csv('Boston_reviews.csv')
Boston_neighbourhoods = pd.read_csv('Boston_neighbourhoods.csv')

In [None]:
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import FunctionTransformer
from sklearn.model_selection import GridSearchCV, StratifiedKFold

# Create Function Transformer to use Feature Union
def get_numeric_data(x):
    return [record[:-2].astype(float) for record in x]

def get_text_data(x):
    return [record[-1] for record in x]

transfomer_numeric = FunctionTransformer(get_numeric_data)
transformer_text = FunctionTransformer(get_text_data)

# Create a pipeline to concatenate Tfidf Vector and Numeric data
# Use RandomForestClassifier as an example
pipeline = Pipeline([
    ('features', FeatureUnion([
            ('numeric_features', Pipeline([
                ('selector', transfomer_numeric)
            ])),
             ('text_features', Pipeline([
                ('selector', transformer_text),
                ('vec', TfidfVectorizer(analyzer='word'))
            ]))
         ])),
    ('clf', RandomForestClassifier())
])

# Grid Search Parameters for RandomForest
param_grid = {'clf__n_estimators': np.linspace(1, 100, 10, dtype=int),
              'clf__min_samples_split': [3, 10],
              'clf__min_samples_leaf': [3],
              'clf__max_features': [7],
              'clf__max_depth': [None],
              'clf__criterion': ['gini'],
              'clf__bootstrap': [False]}

# Training config
kfold = StratifiedKFold(n_splits=7)
scoring = {'Accuracy': 'accuracy', 'F1': 'f1_macro'}
refit = 'F1'

# Perform GridSearch
rf_model = GridSearchCV(pipeline, param_grid=param_grid, cv=kfold, scoring=scoring, 
                         refit=refit, n_jobs=-1, return_train_score=True, verbose=1)
rf_model.fit(X_train, Y_train)
rf_best = rf_model.best_estimator_

In [None]:
# Get one hot encoding of columns B
one_hot = pd.get_dummies(Boston_listings['neighbourhood_cleansed'])
# Drop column B as it is now encoded
Boston_listings = Boston_listings.drop('neighbourhood_cleansed',axis = 1)
# Join the encoded df
Boston_listings = Boston_listings.join(one_hot)

In [8]:
Boston_listings['neighborhood_overview']

0       Newton has 13 unique villages, and gives off a...
1       Mostly quiet ( no loud music, no crowed sidewa...
2       Peaceful, Architecturally interesting, histori...
3       Peaceful, Architecturally interesting, histori...
4       The apartment is in Somerville, located direct...
                              ...                        
5180    South Boston is a very large neighborhood comp...
5181    South Boston is a very large neighborhood comp...
5182    South Boston is a very large neighborhood comp...
5183    The apartment is located in a walkable neighbo...
5184                                                  NaN
Name: neighborhood_overview, Length: 5185, dtype: object

> The *neighbourhood* column does not have a lot of information, because 2255 instances are **Boston, Massachusetts, United States**, which does not specify the exact region of a listing, unfortunatley *neighborhood_overview* dependes on *neighbourhood*, not to mention the missing values, dropping the two columns seems fine. <br><br> An alternative to the *neighbourhood* column would be *neighbourhood_cleansed*. 

In [16]:
Boston_listings[Boston_listings['neighbourhood'] == 'Boston, Massachusetts, United States']['neighborhood_overview']

1       Mostly quiet ( no loud music, no crowed sidewa...
2       Peaceful, Architecturally interesting, histori...
3       Peaceful, Architecturally interesting, histori...
6       Beacon Hill is a historic neighborhood filled ...
7       Wander around this quintessential neighborhood...
                              ...                        
5179    South Boston is a very large neighborhood comp...
5180    South Boston is a very large neighborhood comp...
5181    South Boston is a very large neighborhood comp...
5182    South Boston is a very large neighborhood comp...
5183    The apartment is located in a walkable neighbo...
Name: neighborhood_overview, Length: 2255, dtype: object

In [17]:
Boston_listings['neighbourhood_cleansed'].value_counts()

Allston                    755
Dorchester                 457
Charlestown                429
Brighton                   421
Downtown                   354
East Boston                341
Roxbury                    296
Back Bay                   278
South End                  258
Fenway                     244
Jamaica Plain              225
Beacon Hill                161
South Boston               147
West End                   116
Mission Hill                93
North End                   91
Chinatown                   89
West Roxbury                74
Hyde Park                   65
Roslindale                  63
Longwood Medical Area       61
Bay Village                 52
Mattapan                    52
South Boston Waterfront     46
Harbor Islands              12
Leather District             5
Name: neighbourhood_cleansed, dtype: int64

In [14]:
Boston_listings[['listing_url', 'neighbourhood_cleansed', 'neighbourhood']].head(n=9)

Unnamed: 0,listing_url,neighbourhood_cleansed,neighbourhood
0,https://www.airbnb.com/rooms/3168,Brighton,"Newton, Massachusetts, United States"
1,https://www.airbnb.com/rooms/3781,East Boston,"Boston, Massachusetts, United States"
2,https://www.airbnb.com/rooms/5506,Roxbury,"Boston, Massachusetts, United States"
3,https://www.airbnb.com/rooms/6695,Roxbury,"Boston, Massachusetts, United States"
4,https://www.airbnb.com/rooms/7903,Charlestown,"Somerville, Massachusetts, United States"
5,https://www.airbnb.com/rooms/8521,Allston,"Cambridge, Massachusetts, United States"
6,https://www.airbnb.com/rooms/8789,Beacon Hill,"Boston, Massachusetts, United States"
7,https://www.airbnb.com/rooms/10813,Back Bay,"Boston, Massachusetts, United States"
8,https://www.airbnb.com/rooms/10986,North End,


In [21]:
Boston_listings['neighbourhood_cleansed']
Boston_listings[Boston_listings['neighbourhood'].isnull()]['neighbourhood_cleansed']

8         North End
10           Fenway
11           Fenway
22           Fenway
25      Charlestown
           ...     
5164        Roxbury
5167       Downtown
5168    Charlestown
5177      North End
5184        Roxbury
Name: neighbourhood_cleansed, Length: 1750, dtype: object

In [9]:
Boston_listings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5185 entries, 0 to 5184
Data columns (total 75 columns):
 #   Column                                        Non-Null Count  Dtype  
---  ------                                        --------------  -----  
 0   id                                            5185 non-null   int64  
 1   listing_url                                   5185 non-null   object 
 2   scrape_id                                     5185 non-null   int64  
 3   last_scraped                                  5185 non-null   object 
 4   source                                        5185 non-null   object 
 5   name                                          5185 non-null   object 
 6   description                                   5140 non-null   object 
 7   neighborhood_overview                         3435 non-null   object 
 8   picture_url                                   5185 non-null   object 
 9   host_id                                       5185 non-null   i

In [20]:
Boston_listings[['host_location','listing_url']]

Unnamed: 0,host_location,listing_url
0,"Boston, MA",https://www.airbnb.com/rooms/3168
1,"Massachusetts, United States",https://www.airbnb.com/rooms/3781
2,"Boston, MA",https://www.airbnb.com/rooms/5506
3,"Boston, MA",https://www.airbnb.com/rooms/6695
4,"Somerville, MA",https://www.airbnb.com/rooms/7903
...,...,...
5180,"Boston, MA",https://www.airbnb.com/rooms/716081443145047239
5181,"Boston, MA",https://www.airbnb.com/rooms/716081469166085329
5182,"Boston, MA",https://www.airbnb.com/rooms/716081495310456299
5183,"Boston, MA",https://www.airbnb.com/rooms/716235197792512391


In [19]:
Boston_listings[Boston_listings['host_location'].isnull()]['listing_url']

7                    https://www.airbnb.com/rooms/10813
8                    https://www.airbnb.com/rooms/10986
57                  https://www.airbnb.com/rooms/611081
66                  https://www.airbnb.com/rooms/743759
96                 https://www.airbnb.com/rooms/1077105
                             ...                       
5130    https://www.airbnb.com/rooms/706689347742891165
5158    https://www.airbnb.com/rooms/711804721312473870
5164    https://www.airbnb.com/rooms/712856955049497663
5165    https://www.airbnb.com/rooms/712953753793616795
5168    https://www.airbnb.com/rooms/713937638206317229
Name: listing_url, Length: 700, dtype: object

In [3]:
Boston_listings

Unnamed: 0,id,listing_url,scrape_id,last_scraped,source,name,description,neighborhood_overview,picture_url,host_id,...,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
0,3168,https://www.airbnb.com/rooms/3168,20220915162158,2022-09-15,city scrape,TudorStudio,"The ""Studio at 14 Weldon"" is located in Newton...","Newton has 13 unique villages, and gives off a...",https://a0.muscache.com/pictures/ff7952dc-ef0b...,3697,...,,,,,f,1,0,1,0,
1,3781,https://www.airbnb.com/rooms/3781,20220915162158,2022-09-15,city scrape,HARBORSIDE-Walk to subway,Fully separate apartment in a two apartment bu...,"Mostly quiet ( no loud music, no crowed sidewa...",https://a0.muscache.com/pictures/24670/b2de044...,4804,...,4.96,4.87,4.91,,f,1,1,0,0,0.26
2,5506,https://www.airbnb.com/rooms/5506,20220915162158,2022-09-15,city scrape,** Fort Hill Inn Private! Minutes to center!**,"Private guest room with private bath, You do n...","Peaceful, Architecturally interesting, histori...",https://a0.muscache.com/pictures/miso/Hosting-...,8229,...,4.89,4.54,4.73,Approved by the government,f,10,10,0,0,0.69
3,6695,https://www.airbnb.com/rooms/6695,20220915162158,2022-09-15,city scrape,"Fort Hill Inn *Sunny* 1 bedroom, condo duplex","Comfortable, Fully Equipped private apartment...","Peaceful, Architecturally interesting, histori...",https://a0.muscache.com/pictures/38ac4797-e7a4...,8229,...,4.95,4.50,4.71,STR446650,f,10,10,0,0,0.75
4,7903,https://www.airbnb.com/rooms/7903,20220915162158,2022-09-15,city scrape,"Colorful, modern 2 BR apt shared with host",I'm a high school teacher and frequent travele...,"The apartment is in Somerville, located direct...",https://a0.muscache.com/pictures/miso/Hosting-...,14169,...,4.95,4.56,4.80,,f,1,0,1,0,1.84
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5180,716081443145047239,https://www.airbnb.com/rooms/716081443145047239,20220915162158,2022-09-15,city scrape,Private Room with Shared Bath in Quiet Street,*Please Note: You are booking a private room i...,South Boston is a very large neighborhood comp...,https://a0.muscache.com/pictures/prohost-api/H...,2356643,...,,,,STR-460218,f,71,25,46,0,
5181,716081469166085329,https://www.airbnb.com/rooms/716081469166085329,20220915162158,2022-09-15,city scrape,Cozy Bedroom in Convenient Downtown Location,*Please Note: You are booking a private room i...,South Boston is a very large neighborhood comp...,https://a0.muscache.com/pictures/prohost-api/H...,2356643,...,,,,STR-460218,f,71,25,46,0,
5182,716081495310456299,https://www.airbnb.com/rooms/716081495310456299,20220915162158,2022-09-15,city scrape,"Peaceful Bedroom w/ Shared Bath - AC, Wifi inc...",*Please Note: You are booking a private room i...,South Boston is a very large neighborhood comp...,https://a0.muscache.com/pictures/prohost-api/H...,2356643,...,,,,STR-460218,f,71,25,46,0,
5183,716235197792512391,https://www.airbnb.com/rooms/716235197792512391,20220915162158,2022-09-15,city scrape,Sunny Room w/ Shared Bath in Modest Brighton Home,"Perfect for Hospital Stays, Medical Students, ...",The apartment is located in a walkable neighbo...,https://a0.muscache.com/pictures/prohost-api/H...,2356643,...,,,,STR-484106,t,71,25,46,0,


In [25]:

from selenium import webdriver
import time
from bs4 import BeautifulSoup


driver = webdriver.Chrome()
url= "https://www.airbnb.com/rooms/10813?_set_bev_on_new_domain=1672487471_YmRkYzRjZmE4OTI5&source_impression_id=p3_1673713643_DcJa9orfz3w1CQku&locale=en"
driver.maximize_window()
driver.get(url)

time.sleep(5)
content = driver.page_source.encode('utf-8').strip()
soup = BeautifulSoup(content,"html.parser")
officials = soup.findAll("span",{"class":"_9xiloll"})

for entry in officials:
    print(str(entry))


driver.quit()

<span aria-hidden="false" class="_9xiloll">Boston, Massachusetts, United States</span>


In [11]:
Boston_listings[Boston_listings['host_location'].isnull()]['listing_url']

7                    https://www.airbnb.com/rooms/10813
8                    https://www.airbnb.com/rooms/10986
57                  https://www.airbnb.com/rooms/611081
66                  https://www.airbnb.com/rooms/743759
96                 https://www.airbnb.com/rooms/1077105
                             ...                       
5130    https://www.airbnb.com/rooms/706689347742891165
5158    https://www.airbnb.com/rooms/711804721312473870
5164    https://www.airbnb.com/rooms/712856955049497663
5165    https://www.airbnb.com/rooms/712953753793616795
5168    https://www.airbnb.com/rooms/713937638206317229
Name: listing_url, Length: 700, dtype: object

> The *listing url* of the null values of the column `bedrooms` suggest that those nulls mean *zero* or *no actuall bedroom*.

In [8]:
Boston_listings[Boston_listings['bedrooms'].isnull()]['listing_url']

0                     https://www.airbnb.com/rooms/3168
3                     https://www.airbnb.com/rooms/6695
7                    https://www.airbnb.com/rooms/10813
8                    https://www.airbnb.com/rooms/10986
34                  https://www.airbnb.com/rooms/210097
                             ...                       
5137    https://www.airbnb.com/rooms/708066864505175780
5158    https://www.airbnb.com/rooms/711804721312473870
5159    https://www.airbnb.com/rooms/712092718787212242
5174    https://www.airbnb.com/rooms/714906239224334877
5176    https://www.airbnb.com/rooms/715658190467254169
Name: listing_url, Length: 559, dtype: object

In [5]:
Boston_listings['bedrooms'].isnull().mean()

0.10781099324975892

In [7]:
Boston_listings[Boston_listings['bedrooms'].isnull()]

Unnamed: 0,id,listing_url,scrape_id,last_scraped,source,name,description,neighborhood_overview,picture_url,host_id,...,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
0,3168,https://www.airbnb.com/rooms/3168,20220915162158,2022-09-15,city scrape,TudorStudio,"The ""Studio at 14 Weldon"" is located in Newton...","Newton has 13 unique villages, and gives off a...",https://a0.muscache.com/pictures/ff7952dc-ef0b...,3697,...,,,,,f,1,0,1,0,
3,6695,https://www.airbnb.com/rooms/6695,20220915162158,2022-09-15,city scrape,"Fort Hill Inn *Sunny* 1 bedroom, condo duplex","Comfortable, Fully Equipped private apartment...","Peaceful, Architecturally interesting, histori...",https://a0.muscache.com/pictures/38ac4797-e7a4...,8229,...,4.95,4.5,4.71,STR446650,f,10,10,0,0,0.75
7,10813,https://www.airbnb.com/rooms/10813,20220915162158,2022-09-15,city scrape,"Back Bay Apt-blocks to subway, Newbury St, The...",Stunning Back Bay furnished studio apartment. ...,Wander around this quintessential neighborhood...,https://a0.muscache.com/pictures/20b5b9c9-e1f4...,38997,...,5.00,5.0,4.75,,f,11,11,0,0,0.07
8,10986,https://www.airbnb.com/rooms/10986,20220915162158,2022-09-15,city scrape,North End (Waterfront area) CLOSE TO MGH & SU...,Chic furnished studio apartment is located on ...,,https://a0.muscache.com/pictures/46994/567b606...,38997,...,,,,,f,11,11,0,0,0.03
34,210097,https://www.airbnb.com/rooms/210097,20220915162158,2022-09-15,city scrape,Treetop Haven in the City,Our space is unique in that it's completely pr...,"Jamaica Plain is a trendy, lovely neighborhood...",https://a0.muscache.com/pictures/3726439/e4e35...,1035747,...,4.95,4.9,4.82,STR-414064,f,1,1,0,0,1.83
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5137,708066864505175780,https://www.airbnb.com/rooms/708066864505175780,20220915162158,2022-09-15,city scrape,"Downtown Studio Apt, short walk to hospitals",Modern studio in the heart of Boston. Located ...,,https://a0.muscache.com/pictures/prohost-api/H...,2356643,...,,,,STR-423038,t,71,25,46,0,
5158,711804721312473870,https://www.airbnb.com/rooms/711804721312473870,20220915162158,2022-09-15,city scrape,(6) lovely room Free wi-fi. 5min Logan Airport,Take a break and unwind at this peaceful oasis...,,https://a0.muscache.com/pictures/97367ee8-9f46...,220434755,...,,,,STR-480353,t,7,1,6,0,
5159,712092718787212242,https://www.airbnb.com/rooms/712092718787212242,20220915162158,2022-09-15,city scrape,Bay Studio nr. Charles River & Esplanade,Feel at home wherever you choose to live with ...,This furnished apartment is situated in Back B...,https://a0.muscache.com/pictures/prohost-api/H...,107434423,...,,,,,t,477,477,0,0,
5174,714906239224334877,https://www.airbnb.com/rooms/714906239224334877,20220915162158,2022-09-15,city scrape,"Somerville studio w/ gym & W/D, nr Green Line",Feel at home wherever you choose to live with ...,This furnished apartment is located in Somervi...,https://a0.muscache.com/pictures/prohost-api/H...,107434423,...,,,,,t,477,477,0,0,


In [None]:
bedrooms_mode = Boston_listings['bedrooms'].mode()

Boston_listings = Boston_listings.fillna({'bedrooms':0})

In [7]:
Boston_listings['bedrooms'].isnull().mean()

0.0

### After a quick data exploration, we need to drop a couple of columns, which are:
> 1- Columns that are all nulls <br>
2- Personal info (Id) <br>
3- Non-relevant info (urls) <br>
4- Meta-data (source) 

In [6]:
Boston_listings['calendar_updated'].unique()

array([nan])

In [3]:
Boston_listings.drop(columns=['id','listing_url','scrape_id','last_scraped','source','picture_url',# ids & urls & meta
                             'host_id', 'host_url','host_thumbnail_url','host_picture_url',# id & urls
                             'calendar_updated','bathrooms','neighbourhood_group_cleansed',# all_nan
                             'neighbourhood', 'neighborhood_overview'],
                    inplace=True)


In [8]:
5185 - 4626

559

In [12]:
print(Boston_listings[Boston_listings['bedrooms'].isnull()]['beds'].isnull().mean())
len(Boston_listings[Boston_listings['bedrooms'].isnull()]['beds']) * Boston_listings[Boston_listings['bedrooms'].isnull()]['beds'].isnull().mean()



0.023255813953488372


13.0

In [5]:
Boston_listings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5185 entries, 0 to 5184
Data columns (total 75 columns):
 #   Column                                        Non-Null Count  Dtype  
---  ------                                        --------------  -----  
 0   id                                            5185 non-null   int64  
 1   listing_url                                   5185 non-null   object 
 2   scrape_id                                     5185 non-null   int64  
 3   last_scraped                                  5185 non-null   object 
 4   source                                        5185 non-null   object 
 5   name                                          5185 non-null   object 
 6   description                                   5140 non-null   object 
 7   neighborhood_overview                         3435 non-null   object 
 8   picture_url                                   5185 non-null   object 
 9   host_id                                       5185 non-null   i

In [3]:
Boston_listings

Unnamed: 0,id,listing_url,scrape_id,last_scraped,source,name,description,neighborhood_overview,picture_url,host_id,...,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
0,3168,https://www.airbnb.com/rooms/3168,20220915162158,2022-09-15,city scrape,TudorStudio,"The ""Studio at 14 Weldon"" is located in Newton...","Newton has 13 unique villages, and gives off a...",https://a0.muscache.com/pictures/ff7952dc-ef0b...,3697,...,,,,,f,1,0,1,0,
1,3781,https://www.airbnb.com/rooms/3781,20220915162158,2022-09-15,city scrape,HARBORSIDE-Walk to subway,Fully separate apartment in a two apartment bu...,"Mostly quiet ( no loud music, no crowed sidewa...",https://a0.muscache.com/pictures/24670/b2de044...,4804,...,4.96,4.87,4.91,,f,1,1,0,0,0.26
2,5506,https://www.airbnb.com/rooms/5506,20220915162158,2022-09-15,city scrape,** Fort Hill Inn Private! Minutes to center!**,"Private guest room with private bath, You do n...","Peaceful, Architecturally interesting, histori...",https://a0.muscache.com/pictures/miso/Hosting-...,8229,...,4.89,4.54,4.73,Approved by the government,f,10,10,0,0,0.69
3,6695,https://www.airbnb.com/rooms/6695,20220915162158,2022-09-15,city scrape,"Fort Hill Inn *Sunny* 1 bedroom, condo duplex","Comfortable, Fully Equipped private apartment...","Peaceful, Architecturally interesting, histori...",https://a0.muscache.com/pictures/38ac4797-e7a4...,8229,...,4.95,4.50,4.71,STR446650,f,10,10,0,0,0.75
4,7903,https://www.airbnb.com/rooms/7903,20220915162158,2022-09-15,city scrape,"Colorful, modern 2 BR apt shared with host",I'm a high school teacher and frequent travele...,"The apartment is in Somerville, located direct...",https://a0.muscache.com/pictures/miso/Hosting-...,14169,...,4.95,4.56,4.80,,f,1,0,1,0,1.84
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5180,716081443145047239,https://www.airbnb.com/rooms/716081443145047239,20220915162158,2022-09-15,city scrape,Private Room with Shared Bath in Quiet Street,*Please Note: You are booking a private room i...,South Boston is a very large neighborhood comp...,https://a0.muscache.com/pictures/prohost-api/H...,2356643,...,,,,STR-460218,f,71,25,46,0,
5181,716081469166085329,https://www.airbnb.com/rooms/716081469166085329,20220915162158,2022-09-15,city scrape,Cozy Bedroom in Convenient Downtown Location,*Please Note: You are booking a private room i...,South Boston is a very large neighborhood comp...,https://a0.muscache.com/pictures/prohost-api/H...,2356643,...,,,,STR-460218,f,71,25,46,0,
5182,716081495310456299,https://www.airbnb.com/rooms/716081495310456299,20220915162158,2022-09-15,city scrape,"Peaceful Bedroom w/ Shared Bath - AC, Wifi inc...",*Please Note: You are booking a private room i...,South Boston is a very large neighborhood comp...,https://a0.muscache.com/pictures/prohost-api/H...,2356643,...,,,,STR-460218,f,71,25,46,0,
5183,716235197792512391,https://www.airbnb.com/rooms/716235197792512391,20220915162158,2022-09-15,city scrape,Sunny Room w/ Shared Bath in Modest Brighton Home,"Perfect for Hospital Stays, Medical Students, ...",The apartment is located in a walkable neighbo...,https://a0.muscache.com/pictures/prohost-api/H...,2356643,...,,,,STR-484106,t,71,25,46,0,


In [4]:
Boston_listings['last_scraped'].unique()

array(['2022-09-15', '2022-10-02'], dtype=object)

In [9]:
Boston_listings['name'].unique()

array(['TudorStudio', 'HARBORSIDE-Walk to subway',
       '** Fort Hill Inn Private! Minutes to center!**', ...,
       'Peaceful Bedroom w/ Shared Bath - AC, Wifi included',
       'Sunny Room w/ Shared Bath in Modest Brighton Home',
       'Charming Room in Modern Shared Spacious Apt'], dtype=object)

In [6]:
Boston_listings['price']

0        $99.00
1       $132.00
2       $149.00
3       $179.00
4       $116.00
         ...   
5180     $51.00
5181     $51.00
5182     $51.00
5183     $51.00
5184     $51.00
Name: price, Length: 5185, dtype: object

In [7]:
Boston_listings['price'] = Boston_listings['price'].str.replace('$', '', regex=True)
Boston_listings['price'] = Boston_listings['price'].str.replace(',', '', regex=True)

In [8]:
Boston_listings = Boston_listings.astype({'price': 'float64'})

In [9]:
am_list = []
for i,amen in enumerate(Boston_listings['amenities']):
    temp_am = []
    for i,amenities in enumerate(amen.split('"')):
        if i % 2 == 1:
            temp_am.append(amenities)
    am.append(len(temp_am))
max(am), min(am), sum(am)/len(am)

NameError: name 'am' is not defined

In [None]:
Boston_listings[Boston_listings['amenities']=='["Long term stays allowed"]']

In [37]:
Boston_listings['amenities']

0       ["Dishes and silverware", "Long term stays all...
1       ["Bed linens", "Dishes and silverware", "Long ...
2       ["Bed linens", "Dishes and silverware", "Long ...
3       ["Dishes and silverware", "Long term stays all...
4       ["Bed linens", "Rice maker", "Dishes and silve...
                              ...                        
5180    ["Long term stays allowed", "Stove", "Wifi", "...
5181    ["Long term stays allowed", "Stove", "Wifi", "...
5182    ["Long term stays allowed", "Stove", "Wifi", "...
5183    ["Bed linens", "Dishes and silverware", "Long ...
5184    ["Bed linens", "Dishes and silverware", "Long ...
Name: amenities, Length: 5185, dtype: object