## Importing modules

In [237]:
import pandas as pd
import numpy as np
import re
import nltk
    
from nltk.stem import WordNetLemmatizer
from nltk.stem.porter import *
from nltk.corpus import stopwords
from nltk.sentiment.vader import SentimentIntensityAnalyzer

from textblob import TextBlob

# show all columns
pd.set_option('display.max_columns', None)

In [None]:
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('vader_lexicon')

## Importing data


#### Original

In [2]:
listings = pd.read_csv('./Airbnb/listings.csv')

In [3]:
listings_details = pd.read_csv('./Airbnb/listings_details.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
reviews_details = pd.read_csv('./Airbnb/reviews_details.csv')

In [5]:
calendar = pd.read_csv('./Airbnb/calendar.csv')

#### Listings cleaned

In [49]:
listings_clean = pd.read_csv('./Airbnb/listings_clean.csv')

In [50]:
listings_details_clean = pd.read_csv('./Airbnb/listings_details_clean.csv')

#### Current

In [252]:
listings_new = pd.read_csv('./Airbnb/listings_new.csv')
listings_new = listings_new.drop(columns=['Unnamed: 0'])

In [67]:
calendar_clean = pd.read_csv('./Airbnb/calendar_clean.csv')

## Airbnb

In [52]:
listings.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,2818,Quiet Garden View Room & Super Fast WiFi,3159,Daniel,,Oostelijk Havengebied - Indische Buurt,52.365755,4.941419,Private room,59,3,248,2018-11-28,2.1,1,44
1,3209,"Quiet apt near center, great view",3806,Maartje,,Westerpark,52.390225,4.873924,Entire home/apt,160,4,42,2018-08-29,1.03,1,47
2,20168,100%Centre-Studio 1 Private Floor/Bathroom,59484,Alex,,Centrum-Oost,52.365087,4.893541,Entire home/apt,80,1,233,2018-11-30,2.18,2,198
3,25428,Lovely apt in City Centre (Jordaan),56142,Joan,,Centrum-West,52.373114,4.883668,Entire home/apt,125,14,1,2018-01-21,0.09,2,141
4,27886,"Romantic, stylish B&B houseboat in canal district",97647,Flip,,Centrum-West,52.386727,4.892078,Private room,150,2,171,2018-11-25,2.03,1,199


In [6]:
listings.shape

(20030, 16)

In [55]:
listings_details.head(2)

Unnamed: 0,id,listing_url,scrape_id,last_scraped,name,summary,space,description,experiences_offered,neighborhood_overview,notes,transit,access,interaction,house_rules,thumbnail_url,medium_url,picture_url,xl_picture_url,host_id,host_url,host_name,host_since,host_location,host_about,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_thumbnail_url,host_picture_url,host_neighbourhood,host_listings_count,host_total_listings_count,host_verifications,host_has_profile_pic,host_identity_verified,street,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,city,state,zipcode,market,smart_location,country_code,country,latitude,longitude,is_location_exact,property_type,room_type,accommodates,bathrooms,bedrooms,beds,bed_type,amenities,square_feet,price,weekly_price,monthly_price,security_deposit,cleaning_fee,guests_included,extra_people,minimum_nights,maximum_nights,calendar_updated,has_availability,availability_30,availability_60,availability_90,availability_365,calendar_last_scraped,number_of_reviews,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,requires_license,license,jurisdiction_names,instant_bookable,is_business_travel_ready,cancellation_policy,require_guest_profile_picture,require_guest_phone_verification,calculated_host_listings_count,reviews_per_month
0,2818,https://www.airbnb.com/rooms/2818,20181206172549,2018-12-06,Quiet Garden View Room & Super Fast WiFi,Quiet Garden View Room & Super Fast WiFi,I'm renting a bedroom (room overlooking the ga...,Quiet Garden View Room & Super Fast WiFi I'm r...,none,"Indische Buurt (""Indies Neighborhood"") is a ne...",From week 38 to week 47 maintenance work to th...,The neighbourhood is well served by 24 hours p...,,,Please: - Leave your shoes in the entrance - ...,,,https://a0.muscache.com/im/pictures/10272854/8...,,3159,https://www.airbnb.com/users/show/3159,Daniel,2008-09-24,"Amsterdam, Noord-Holland, The Netherlands","Upon arriving in Amsterdam, one can imagine as...",within an hour,100%,,t,https://a0.muscache.com/im/users/3159/profile_...,https://a0.muscache.com/im/users/3159/profile_...,Indische Buurt,1.0,1.0,"['email', 'phone', 'reviews', 'jumio', 'offlin...",t,f,"Amsterdam, North Holland, Netherlands",Indische Buurt,Oostelijk Havengebied - Indische Buurt,,Amsterdam,North Holland,,Amsterdam,"Amsterdam, Netherlands",NL,Netherlands,52.365755,4.941419,f,Apartment,Private room,2,1.5,1.0,2.0,Real Bed,"{Internet,Wifi,""Paid parking off premises"",""Bu...",,$59.00,,"$1,500.00",$100.00,$50.00,1,$20.00,3,15,today,t,17,44,44,44,2018-12-06,248,2009-03-30,2018-11-28,97.0,10.0,10.0,10.0,10.0,9.0,10.0,f,,{Amsterdam},t,f,strict_14_with_grace_period,f,f,1,2.1
1,3209,https://www.airbnb.com/rooms/3209,20181206172549,2018-12-06,"Quiet apt near center, great view",You will love our spacious (90 m2) bright apar...,"Our apartment has lots of light, a balcony and...",You will love our spacious (90 m2) bright apar...,none,Welcome to the Spaarndammerbuurt! From the beg...,,"From Central Station, walk towards the busstop...",You will have the entire house to yourself.,We will meet you in person for check in whenev...,"Our house comes with our very sweet, but old (...",,,https://a0.muscache.com/im/pictures/88955424/4...,,3806,https://www.airbnb.com/users/show/3806,Maartje,2008-10-24,"Amsterdam, Noord-Holland, The Netherlands",I am a freelance radio producer and journalist...,within an hour,100%,,f,https://a0.muscache.com/im/users/3806/profile_...,https://a0.muscache.com/im/users/3806/profile_...,Spaarndammer en Zeeheldenbuurt,1.0,1.0,"['email', 'phone', 'reviews', 'jumio']",t,t,"Amsterdam, Noord-Holland, Netherlands",Spaarndammer en Zeeheldenbuurt,Westerpark,,Amsterdam,Noord-Holland,1013 XE,Amsterdam,"Amsterdam, Netherlands",NL,Netherlands,52.390225,4.873924,t,Apartment,Entire home/apt,5,1.0,2.0,2.0,Real Bed,"{Internet,Wifi,Kitchen,""Paid parking off premi...",,$160.00,$543.00,"$2,000.00",$300.00,$40.00,2,$15.00,4,20,7 weeks ago,t,0,0,0,47,2018-12-06,42,2015-07-31,2018-08-29,96.0,10.0,9.0,10.0,10.0,9.0,9.0,f,,{Amsterdam},f,f,moderate,f,f,1,1.03


In [242]:
listings_new.shape

(20030, 54)

In [127]:
reviews_details.head()

Unnamed: 0,listing_id,id,date,reviewer_id,reviewer_name,comments
0,2818,1191,2009-03-30,10952,Lam,Daniel is really cool. The place was nice and ...
1,2818,1771,2009-04-24,12798,Alice,Daniel is the most amazing host! His place is ...
2,2818,1989,2009-05-03,11869,Natalja,We had such a great time in Amsterdam. Daniel ...
3,2818,2797,2009-05-18,14064,Enrique,Very professional operation. Room is very clea...
4,2818,3151,2009-05-25,17977,Sherwin,Daniel is highly recommended. He provided all...


In [131]:
reviews_details.shape

(431830, 6)

# Questions

In [None]:
"""
What to consider if you want to become a host at Airbnb?

- Predict the price based on all factors
- Price estimator for different neighborhoods

- What should be the price if you want to book now / in a week / in a month / in half a year? (per neighborhood)
- How is the total rating of an apartment affected by the factors?
- Comments:
     - what are the most informative features per neighborhood? What people like / don't like?
     - Naive Bayes
     
1. Comments:
    - Add a column with neighborhood
    - clean_up(), tokenize(), stem_and_lemmatize(), remove_stopwords()
    



"""

# Data cleaning

#### Calendar data

In [39]:
# Remove rows where apartments are not available

calendar_clean = calendar.dropna()
calendar_clean.dtypes

listing_id     int64
date          object
available     object
price         object
dtype: object

In [40]:
# Remove the dollar sign from 'price' column and conver to float
calendar_clean['price'] = calendar_clean['price'].replace({'\$': '', ',': ''}, regex=True)
calendar_clean['price'] = calendar_clean['price'].astype(float)

# Change the 'date' column to datetime
calendar_clean['date'] = pd.to_datetime(calendar_clean['date'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [49]:
#calendar_clean.to_csv('calendar_clean.csv')

#### Listings

In [29]:
listings.isnull().sum()

##################################################################
# where 'last_review' isnull - replace?
# where 'reviews_per_month' isnull - replace with 'no reviews'

id                                    0
name                                 38
host_id                               0
host_name                             4
neighbourhood_group               20030
neighbourhood                         0
latitude                              0
longitude                             0
room_type                             0
price                                 0
minimum_nights                        0
number_of_reviews                     0
last_review                        2406
reviews_per_month                  2406
calculated_host_listings_count        0
availability_365                      0
dtype: int64

In [27]:
# drop ['neighbourhood_group', 'last_review', 'host_name', 'calculated_host_listings_count']
listings_clean = listings.drop(columns=['neighbourhood_group', 'last_review', 'host_name', 
                                        'calculated_host_listings_count', 'reviews_per_month', 'room_type',
                                       'minimum_nights', 'number_of_reviews'])

In [28]:
#listings_clean.to_csv('listings_clean.csv')

#### Listings (details)

In [42]:
listings_details_clean = listings_details.drop(columns=['space','summary','listing_url', 'scrape_id', 'last_scraped', 'name', 
                                               'experiences_offered', 'neighborhood_overview', 'notes',
                                               'transit', 'access', 'interaction', 'house_rules', 'thumbnail_url', 'medium_url', 
                                               'picture_url', 'xl_picture_url', 'host_id', 'host_url', 'host_name', 
                                               'host_acceptance_rate', 'host_thumbnail_url', 'host_picture_url', 'host_neighbourhood',
                                               'street', 'neighbourhood', 'neighbourhood_group_cleansed', 'city', 'state', 'market', 
                                               'smart_location', 'country_code', 'country', 'latitude', 'longitude', 'square_feet', 
                                               'price', 'calendar_last_scraped', 'has_availability', 'jurisdiction_names', 'requires_license', 
                                               'license', 'is_business_travel_ready', 'zipcode', 'weekly_price', 'monthly_price',
                                                'host_about', 'host_location'])
listings_details_clean

Unnamed: 0,id,description,host_since,host_response_time,host_response_rate,host_is_superhost,host_listings_count,host_total_listings_count,host_verifications,host_has_profile_pic,host_identity_verified,neighbourhood_cleansed,is_location_exact,property_type,room_type,accommodates,bathrooms,bedrooms,beds,bed_type,amenities,security_deposit,cleaning_fee,guests_included,extra_people,minimum_nights,maximum_nights,calendar_updated,availability_30,availability_60,availability_90,availability_365,number_of_reviews,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,instant_bookable,cancellation_policy,require_guest_profile_picture,require_guest_phone_verification,calculated_host_listings_count,reviews_per_month
0,2818,Quiet Garden View Room & Super Fast WiFi I'm r...,2008-09-24,within an hour,100%,t,1.0,1.0,"['email', 'phone', 'reviews', 'jumio', 'offlin...",t,f,Oostelijk Havengebied - Indische Buurt,f,Apartment,Private room,2,1.5,1.0,2.0,Real Bed,"{Internet,Wifi,""Paid parking off premises"",""Bu...",$100.00,$50.00,1,$20.00,3,15,today,17,44,44,44,248,2009-03-30,2018-11-28,97.0,10.0,10.0,10.0,10.0,9.0,10.0,t,strict_14_with_grace_period,f,f,1,2.10
1,3209,You will love our spacious (90 m2) bright apar...,2008-10-24,within an hour,100%,f,1.0,1.0,"['email', 'phone', 'reviews', 'jumio']",t,t,Westerpark,t,Apartment,Entire home/apt,5,1.0,2.0,2.0,Real Bed,"{Internet,Wifi,Kitchen,""Paid parking off premi...",$300.00,$40.00,2,$15.00,4,20,7 weeks ago,0,0,0,47,42,2015-07-31,2018-08-29,96.0,10.0,9.0,10.0,10.0,9.0,9.0,f,moderate,f,f,1,1.03
2,20168,"Cozy studio on your own private floor, 100% in...",2009-12-02,within a few hours,100%,f,2.0,2.0,"['email', 'phone', 'reviews', 'jumio', 'offlin...",t,f,Centrum-Oost,t,Townhouse,Entire home/apt,2,1.0,1.0,1.0,Real Bed,"{TV,Internet,Wifi,""Paid parking off premises"",...",,,2,$0.00,1,1000,today,0,7,24,198,233,2010-03-02,2018-11-30,87.0,9.0,10.0,9.0,9.0,10.0,9.0,f,strict_14_with_grace_period,f,f,2,2.18
3,25428,"This nicely furnished, newly renovated apt is...",2009-11-20,within a few hours,100%,f,2.0,2.0,"['email', 'phone', 'reviews']",t,f,Centrum-West,f,Apartment,Entire home/apt,3,1.0,1.0,1.0,Real Bed,"{TV,""Cable TV"",Internet,Wifi,Kitchen,Elevator,...",$300.00,$40.00,2,$10.00,14,60,2 days ago,2,32,44,141,1,2018-01-21,2018-01-21,100.0,10.0,10.0,10.0,10.0,10.0,10.0,f,strict_14_with_grace_period,f,f,2,0.09
4,27886,Stylish and romantic houseboat on fantastic hi...,2010-03-23,within an hour,100%,t,1.0,1.0,"['email', 'phone', 'reviews', 'jumio']",t,t,Centrum-West,t,Houseboat,Private room,2,1.0,1.0,1.0,Real Bed,"{TV,Internet,Wifi,Breakfast,Heating,""Smoke det...",$0.00,$0.00,1,$0.00,2,730,today,16,37,54,199,171,2012-01-09,2018-11-25,99.0,10.0,10.0,10.0,10.0,10.0,10.0,t,strict_14_with_grace_period,f,f,1,2.03
5,28658,2 beds guest room in Amsterdam West near Erasm...,2010-05-12,within an hour,100%,f,2.0,2.0,"['email', 'phone', 'facebook', 'reviews', 'jum...",t,t,Bos en Lommer,t,Apartment,Private room,2,1.0,1.0,2.0,Real Bed,"{TV,""Cable TV"",Internet,Wifi,""Paid parking off...",$150.00,$15.00,2,$0.00,3,15,2 months ago,11,29,52,295,434,2010-05-16,2018-11-19,93.0,9.0,10.0,10.0,9.0,9.0,9.0,f,moderate,t,t,2,4.16
6,28871,In a monumental house right in the center of A...,2010-05-13,within an hour,100%,t,3.0,3.0,"['email', 'phone', 'reviews', 'jumio']",t,t,Centrum-West,t,Apartment,Private room,2,,1.0,1.0,Real Bed,"{TV,""Cable TV"",Internet,Wifi,""Pets live on thi...",,,1,$0.00,2,1825,yesterday,3,10,17,137,215,2010-08-22,2018-12-03,97.0,10.0,10.0,10.0,10.0,10.0,10.0,f,moderate,f,f,3,2.13
7,29051,because of the city imposing a 4 paying guest ...,2010-05-13,within an hour,100%,t,3.0,3.0,"['email', 'phone', 'reviews', 'jumio']",t,t,Centrum-West,t,Apartment,Private room,1,1.0,1.0,1.0,Real Bed,"{TV,""Cable TV"",Internet,Wifi,""Smoking allowed""...",,,1,$0.00,2,730,today,1,10,18,188,383,2011-03-16,2018-12-05,95.0,10.0,10.0,10.0,10.0,10.0,10.0,f,moderate,f,f,3,4.07
8,31080,My apartment is light and cosy. The three bed...,2010-05-27,a few days or more,0%,f,1.0,1.0,"['email', 'phone', 'facebook', 'reviews', 'jum...",t,t,Zuid,f,Apartment,Entire home/apt,4,1.0,3.0,3.0,Real Bed,"{TV,""Cable TV"",Internet,Wifi,Kitchen,""Buzzer/w...",,$60.00,4,$30.00,3,365,16 months ago,1,31,61,336,32,2011-08-06,2017-10-16,95.0,9.0,10.0,10.0,10.0,9.0,9.0,f,moderate,f,f,1,0.36
9,41125,"A nice, sunny and spacious apartment in the ce...",2010-07-23,within a few hours,100%,f,1.0,1.0,"['email', 'phone', 'facebook', 'reviews', 'jum...",t,t,Centrum-West,f,Apartment,Entire home/apt,2,0.0,1.0,1.0,Real Bed,"{TV,""Cable TV"",Internet,Wifi,Kitchen,""Paid par...",$150.00,$40.00,2,$75.00,3,21,today,8,8,11,11,76,2010-11-25,2018-10-07,95.0,10.0,9.0,10.0,10.0,10.0,9.0,f,moderate,f,f,1,0.78


In [45]:
listings_details_clean['cancellation_policy'].isnull().sum()

0

In [47]:
listings_details_clean.host_response_rate.unique()

array(['100%', '0%', '91%', nan, '80%', '67%', '70%', '78%', '90%', '92%',
       '50%', '95%', '71%', '40%', '75%', '33%', '93%', '96%', '98%',
       '83%', '82%', '43%', '94%', '86%', '88%', '10%', '60%', '32%',
       '25%', '46%', '76%', '57%', '45%', '73%', '63%', '97%', '89%',
       '20%', '56%', '44%', '30%', '29%', '99%', '69%', '87%', '65%',
       '17%', '77%', '27%', '38%', '62%', '58%', '74%', '47%', '55%',
       '85%', '53%', '79%', '66%'], dtype=object)

In [48]:
listings_details_clean.isnull().sum()

id                                     0
description                          124
host_since                             4
host_response_time                  9483
host_response_rate                  9483
host_is_superhost                      4
host_listings_count                    4
host_total_listings_count              4
host_verifications                     0
host_has_profile_pic                   4
host_identity_verified                 4
neighbourhood_cleansed                 0
is_location_exact                      0
property_type                          0
room_type                              0
accommodates                           0
bathrooms                             10
bedrooms                               8
beds                                   7
bed_type                               0
amenities                              0
security_deposit                    6166
cleaning_fee                        3629
guests_included                        0
extra_people    

In [43]:
#listings_details_clean.to_csv('listings_details_clean.csv')

#### Reviews

In [24]:
reviews_details.head()

Unnamed: 0,listing_id,id,date,reviewer_id,reviewer_name,comments
0,2818,1191,2009-03-30,10952,Lam,Daniel is really cool. The place was nice and ...
1,2818,1771,2009-04-24,12798,Alice,Daniel is the most amazing host! His place is ...
2,2818,1989,2009-05-03,11869,Natalja,We had such a great time in Amsterdam. Daniel ...
3,2818,2797,2009-05-18,14064,Enrique,Very professional operation. Room is very clea...
4,2818,3151,2009-05-25,17977,Sherwin,Daniel is highly recommended. He provided all...


In [25]:
reviews_details.dtypes

listing_id        int64
id                int64
date             object
reviewer_id       int64
reviewer_name    object
comments         object
dtype: object

In [8]:
reviews_details.shape

(431830, 6)

In [10]:
reviews_details.listing_id.nunique()

17624

#### Merge 'listings_clean' with 'listings_details_clean'

In [61]:
listings_clean = listings_clean.drop(columns=['Unnamed: 0', 'availability_365'])
listings_details_clean = listings_details_clean.drop(columns=['Unnamed: 0'])

In [62]:
# Merge 'listings_clean' with 'listings_details_clean' (on='id')

listings_new = pd.merge(listings_clean, listings_details_clean, on='id')

In [63]:
listings_new = pd.merge(listings_clean, listings_details_clean, on='id')

In [65]:
#listings_new.to_csv('listings_new.csv')

#### Clean 'listings_new'

In [None]:
# CONVERT:

# where 'security_deposit' is NaN check if can replace with 0 (no deposit OR NO INFO AVAILABLE?)
# where 'cleaning_fee' is NaN check if can replace with 0 (no fee for cleaning OR NO INFO AVAILABLE?)

# Convert to integers:
#    ['review_scores_rating', 'review_scores_accuracy', 'review_scores_cleanliness', 'review_scores_checkin']
#    ['review_scores_checkin', 'review_scores_communication', 'review_scores_location', 'review_scores_value']

################################

# CATEGORIES:

# ['host_response_time', 'host_is_superhost', 'host_verifications', 'host_has_profile_pic', 'host_identity_verified']
# ['is_location_exact', 'property_type', 'room_type', 'bed_type', 'amenities', 'requires_license', 'instant_bookable']
# ['cancellation_policy', 'require_guest_profile_picture']


In [253]:
listings_new.head()

Unnamed: 0,id,name,host_id,neighbourhood,latitude,longitude,price,description,host_since,host_response_time,host_response_rate,host_is_superhost,host_listings_count,host_total_listings_count,host_verifications,host_has_profile_pic,host_identity_verified,neighbourhood_cleansed,is_location_exact,property_type,room_type,accommodates,bathrooms,bedrooms,beds,bed_type,amenities,security_deposit,cleaning_fee,guests_included,extra_people,minimum_nights,maximum_nights,calendar_updated,availability_30,availability_60,availability_90,availability_365,number_of_reviews,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,instant_bookable,cancellation_policy,require_guest_profile_picture,require_guest_phone_verification,calculated_host_listings_count,reviews_per_month
0,2818,Quiet Garden View Room & Super Fast WiFi,3159,Oostelijk Havengebied - Indische Buurt,52.365755,4.941419,59,Quiet Garden View Room & Super Fast WiFi I'm r...,2008-09-24,within an hour,100%,t,1.0,1.0,"['email', 'phone', 'reviews', 'jumio', 'offlin...",t,f,Oostelijk Havengebied - Indische Buurt,f,Apartment,Private room,2,1.5,1.0,2.0,Real Bed,"{Internet,Wifi,""Paid parking off premises"",""Bu...",$100.00,$50.00,1,$20.00,3,15,today,17,44,44,44,248,2009-03-30,2018-11-28,97.0,10.0,10.0,10.0,10.0,9.0,10.0,t,strict_14_with_grace_period,f,f,1,2.1
1,3209,"Quiet apt near center, great view",3806,Westerpark,52.390225,4.873924,160,You will love our spacious (90 m2) bright apar...,2008-10-24,within an hour,100%,f,1.0,1.0,"['email', 'phone', 'reviews', 'jumio']",t,t,Westerpark,t,Apartment,Entire home/apt,5,1.0,2.0,2.0,Real Bed,"{Internet,Wifi,Kitchen,""Paid parking off premi...",$300.00,$40.00,2,$15.00,4,20,7 weeks ago,0,0,0,47,42,2015-07-31,2018-08-29,96.0,10.0,9.0,10.0,10.0,9.0,9.0,f,moderate,f,f,1,1.03
2,20168,100%Centre-Studio 1 Private Floor/Bathroom,59484,Centrum-Oost,52.365087,4.893541,80,"Cozy studio on your own private floor, 100% in...",2009-12-02,within a few hours,100%,f,2.0,2.0,"['email', 'phone', 'reviews', 'jumio', 'offlin...",t,f,Centrum-Oost,t,Townhouse,Entire home/apt,2,1.0,1.0,1.0,Real Bed,"{TV,Internet,Wifi,""Paid parking off premises"",...",,,2,$0.00,1,1000,today,0,7,24,198,233,2010-03-02,2018-11-30,87.0,9.0,10.0,9.0,9.0,10.0,9.0,f,strict_14_with_grace_period,f,f,2,2.18
3,25428,Lovely apt in City Centre (Jordaan),56142,Centrum-West,52.373114,4.883668,125,"This nicely furnished, newly renovated apt is...",2009-11-20,within a few hours,100%,f,2.0,2.0,"['email', 'phone', 'reviews']",t,f,Centrum-West,f,Apartment,Entire home/apt,3,1.0,1.0,1.0,Real Bed,"{TV,""Cable TV"",Internet,Wifi,Kitchen,Elevator,...",$300.00,$40.00,2,$10.00,14,60,2 days ago,2,32,44,141,1,2018-01-21,2018-01-21,100.0,10.0,10.0,10.0,10.0,10.0,10.0,f,strict_14_with_grace_period,f,f,2,0.09
4,27886,"Romantic, stylish B&B houseboat in canal district",97647,Centrum-West,52.386727,4.892078,150,Stylish and romantic houseboat on fantastic hi...,2010-03-23,within an hour,100%,t,1.0,1.0,"['email', 'phone', 'reviews', 'jumio']",t,t,Centrum-West,t,Houseboat,Private room,2,1.0,1.0,1.0,Real Bed,"{TV,Internet,Wifi,Breakfast,Heating,""Smoke det...",$0.00,$0.00,1,$0.00,2,730,today,16,37,54,199,171,2012-01-09,2018-11-25,99.0,10.0,10.0,10.0,10.0,10.0,10.0,t,strict_14_with_grace_period,f,f,1,2.03


In [254]:
# ['first_review', 'last_review'] - convert to datetime

listings_new[['first_review', 'last_review']] = listings_new[['first_review', 'last_review']].apply(pd.to_datetime) 

In [255]:
# Remove percentige sign from: 'host_response_rate'

listings_new['host_response_rate'] = listings_new['host_response_rate'].replace({'%': ''}, regex=True)
listings_new['host_response_rate'] = listings_new['host_response_rate'].astype(float)

In [256]:
# ['security_deposit', 'cleaning_fee', 'extra_people'] - remove dollar sign and convert to floats

listings_new[['security_deposit', 'cleaning_fee', 'extra_people']] = listings_new[[ 'security_deposit', 'cleaning_fee', 'extra_people']].replace({'\$': '', ',': ''}, regex=True)
listings_new[['security_deposit', 'cleaning_fee', 'extra_people']] = listings_new[['security_deposit', 'cleaning_fee', 'extra_people']].astype(float)

In [258]:
listings_new = listings_new.drop(columns = ['name', 'neighbourhood_cleansed', 'description'])

In [259]:
listings_new

Unnamed: 0,id,host_id,neighbourhood,latitude,longitude,price,host_since,host_response_time,host_response_rate,host_is_superhost,host_listings_count,host_total_listings_count,host_verifications,host_has_profile_pic,host_identity_verified,is_location_exact,property_type,room_type,accommodates,bathrooms,bedrooms,beds,bed_type,amenities,security_deposit,cleaning_fee,guests_included,extra_people,minimum_nights,maximum_nights,calendar_updated,availability_30,availability_60,availability_90,availability_365,number_of_reviews,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,instant_bookable,cancellation_policy,require_guest_profile_picture,require_guest_phone_verification,calculated_host_listings_count,reviews_per_month
0,2818,3159,Oostelijk Havengebied - Indische Buurt,52.365755,4.941419,59,2008-09-24,within an hour,100.0,t,1.0,1.0,"['email', 'phone', 'reviews', 'jumio', 'offlin...",t,f,f,Apartment,Private room,2,1.5,1.0,2.0,Real Bed,"{Internet,Wifi,""Paid parking off premises"",""Bu...",100.0,50.0,1,20.0,3,15,today,17,44,44,44,248,2009-03-30,2018-11-28,97.0,10.0,10.0,10.0,10.0,9.0,10.0,t,strict_14_with_grace_period,f,f,1,2.10
1,3209,3806,Westerpark,52.390225,4.873924,160,2008-10-24,within an hour,100.0,f,1.0,1.0,"['email', 'phone', 'reviews', 'jumio']",t,t,t,Apartment,Entire home/apt,5,1.0,2.0,2.0,Real Bed,"{Internet,Wifi,Kitchen,""Paid parking off premi...",300.0,40.0,2,15.0,4,20,7 weeks ago,0,0,0,47,42,2015-07-31,2018-08-29,96.0,10.0,9.0,10.0,10.0,9.0,9.0,f,moderate,f,f,1,1.03
2,20168,59484,Centrum-Oost,52.365087,4.893541,80,2009-12-02,within a few hours,100.0,f,2.0,2.0,"['email', 'phone', 'reviews', 'jumio', 'offlin...",t,f,t,Townhouse,Entire home/apt,2,1.0,1.0,1.0,Real Bed,"{TV,Internet,Wifi,""Paid parking off premises"",...",,,2,0.0,1,1000,today,0,7,24,198,233,2010-03-02,2018-11-30,87.0,9.0,10.0,9.0,9.0,10.0,9.0,f,strict_14_with_grace_period,f,f,2,2.18
3,25428,56142,Centrum-West,52.373114,4.883668,125,2009-11-20,within a few hours,100.0,f,2.0,2.0,"['email', 'phone', 'reviews']",t,f,f,Apartment,Entire home/apt,3,1.0,1.0,1.0,Real Bed,"{TV,""Cable TV"",Internet,Wifi,Kitchen,Elevator,...",300.0,40.0,2,10.0,14,60,2 days ago,2,32,44,141,1,2018-01-21,2018-01-21,100.0,10.0,10.0,10.0,10.0,10.0,10.0,f,strict_14_with_grace_period,f,f,2,0.09
4,27886,97647,Centrum-West,52.386727,4.892078,150,2010-03-23,within an hour,100.0,t,1.0,1.0,"['email', 'phone', 'reviews', 'jumio']",t,t,t,Houseboat,Private room,2,1.0,1.0,1.0,Real Bed,"{TV,Internet,Wifi,Breakfast,Heating,""Smoke det...",0.0,0.0,1,0.0,2,730,today,16,37,54,199,171,2012-01-09,2018-11-25,99.0,10.0,10.0,10.0,10.0,10.0,10.0,t,strict_14_with_grace_period,f,f,1,2.03
5,28658,123414,Bos en Lommer,52.375342,4.857289,65,2010-05-12,within an hour,100.0,f,2.0,2.0,"['email', 'phone', 'facebook', 'reviews', 'jum...",t,t,t,Apartment,Private room,2,1.0,1.0,2.0,Real Bed,"{TV,""Cable TV"",Internet,Wifi,""Paid parking off...",150.0,15.0,2,0.0,3,15,2 months ago,11,29,52,295,434,2010-05-16,2018-11-19,93.0,9.0,10.0,10.0,9.0,9.0,9.0,f,moderate,t,t,2,4.16
6,28871,124245,Centrum-West,52.367187,4.890918,75,2010-05-13,within an hour,100.0,t,3.0,3.0,"['email', 'phone', 'reviews', 'jumio']",t,t,t,Apartment,Private room,2,,1.0,1.0,Real Bed,"{TV,""Cable TV"",Internet,Wifi,""Pets live on thi...",,,1,0.0,2,1825,yesterday,3,10,17,137,215,2010-08-22,2018-12-03,97.0,10.0,10.0,10.0,10.0,10.0,10.0,f,moderate,f,f,3,2.13
7,29051,124245,Centrum-West,52.367725,4.891512,55,2010-05-13,within an hour,100.0,t,3.0,3.0,"['email', 'phone', 'reviews', 'jumio']",t,t,t,Apartment,Private room,1,1.0,1.0,1.0,Real Bed,"{TV,""Cable TV"",Internet,Wifi,""Smoking allowed""...",,,1,0.0,2,730,today,1,10,18,188,383,2011-03-16,2018-12-05,95.0,10.0,10.0,10.0,10.0,10.0,10.0,f,moderate,f,f,3,4.07
8,31080,133488,Zuid,52.351321,4.848383,219,2010-05-27,a few days or more,0.0,f,1.0,1.0,"['email', 'phone', 'facebook', 'reviews', 'jum...",t,t,f,Apartment,Entire home/apt,4,1.0,3.0,3.0,Real Bed,"{TV,""Cable TV"",Internet,Wifi,Kitchen,""Buzzer/w...",,60.0,4,30.0,3,365,16 months ago,1,31,61,336,32,2011-08-06,2017-10-16,95.0,9.0,10.0,10.0,10.0,9.0,9.0,f,moderate,f,f,1,0.36
9,41125,178515,Centrum-West,52.378915,4.883205,180,2010-07-23,within a few hours,100.0,f,1.0,1.0,"['email', 'phone', 'facebook', 'reviews', 'jum...",t,t,f,Apartment,Entire home/apt,2,0.0,1.0,1.0,Real Bed,"{TV,""Cable TV"",Internet,Wifi,Kitchen,""Paid par...",150.0,40.0,2,75.0,3,21,today,8,8,11,11,76,2010-11-25,2018-10-07,95.0,10.0,9.0,10.0,10.0,10.0,9.0,f,moderate,f,f,1,0.78


In [248]:
listings_new.host_response_time.unique()

array(['within an hour', 'within a few hours', 'a few days or more',
       'within a day', nan], dtype=object)

In [260]:
#listings_new.to_csv('listings_new_2.csv')


In [236]:
listings_new.isnull().sum()

id                                     0
name                                  38
host_id                                0
neighbourhood                          0
latitude                               0
longitude                              0
price                                  0
description                          124
host_since                             4
host_response_time                  9483
host_response_rate                  9483
host_is_superhost                      4
host_listings_count                    4
host_total_listings_count              4
host_verifications                     0
host_has_profile_pic                   4
host_identity_verified                 4
neighbourhood_cleansed                 0
is_location_exact                      0
property_type                          0
room_type                              0
accommodates                           0
bathrooms                             10
bedrooms                               8
beds            

#### Reviews

In [99]:
reviews_details.head()

Unnamed: 0,listing_id,id,date,reviewer_id,reviewer_name,comments
0,2818,1191,2009-03-30,10952,Lam,Daniel is really cool. The place was nice and ...
1,2818,1771,2009-04-24,12798,Alice,Daniel is the most amazing host! His place is ...
2,2818,1989,2009-05-03,11869,Natalja,We had such a great time in Amsterdam. Daniel ...
3,2818,2797,2009-05-18,14064,Enrique,Very professional operation. Room is very clea...
4,2818,3151,2009-05-25,17977,Sherwin,Daniel is highly recommended. He provided all...


In [101]:
reviews_details = reviews_details.drop(columns='reviewer_name')

In [102]:
reviews_details['date'] = pd.to_datetime(reviews_details['date'])

In [104]:
reviews_details.dtypes

listing_id              int64
id                      int64
date           datetime64[ns]
reviewer_id             int64
comments               object
dtype: object

In [241]:
reviews_details.shape

(431830, 5)

In [238]:
test = reviews_details.iloc[:5,]
test

Unnamed: 0,listing_id,id,date,reviewer_id,comments
0,2818,1191,2009-03-30,10952,Daniel is really cool. The place was nice and ...
1,2818,1771,2009-04-24,12798,Daniel is the most amazing host! His place is ...
2,2818,1989,2009-05-03,11869,We had such a great time in Amsterdam. Daniel ...
3,2818,2797,2009-05-18,14064,Very professional operation. Room is very clea...
4,2818,3151,2009-05-25,17977,Daniel is highly recommended. He provided all...


In [239]:
analyzer = SentimentIntensityAnalyzer()

test['polarity_nltk'] = test['comments'].apply(lambda x: analyzer.polarity_scores(x))
test

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,listing_id,id,date,reviewer_id,comments,polarity_nltk
0,2818,1191,2009-03-30,10952,Daniel is really cool. The place was nice and ...,"{'neg': 0.047, 'neu': 0.694, 'pos': 0.259, 'co..."
1,2818,1771,2009-04-24,12798,Daniel is the most amazing host! His place is ...,"{'neg': 0.0, 'neu': 0.684, 'pos': 0.316, 'comp..."
2,2818,1989,2009-05-03,11869,We had such a great time in Amsterdam. Daniel ...,"{'neg': 0.027, 'neu': 0.676, 'pos': 0.297, 'co..."
3,2818,2797,2009-05-18,14064,Very professional operation. Room is very clea...,"{'neg': 0.0, 'neu': 0.53, 'pos': 0.47, 'compou..."
4,2818,3151,2009-05-25,17977,Daniel is highly recommended. He provided all...,"{'neg': 0.0, 'neu': 0.742, 'pos': 0.258, 'comp..."


In [240]:
test['polarity_tb'] = test['comments'].apply(lambda x: TextBlob(x).sentiment.polarity)
test

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,listing_id,id,date,reviewer_id,comments,polarity_nltk,polarity_tb
0,2818,1191,2009-03-30,10952,Daniel is really cool. The place was nice and ...,"{'neg': 0.047, 'neu': 0.694, 'pos': 0.259, 'co...",0.140741
1,2818,1771,2009-04-24,12798,Daniel is the most amazing host! His place is ...,"{'neg': 0.0, 'neu': 0.684, 'pos': 0.316, 'comp...",0.365278
2,2818,1989,2009-05-03,11869,We had such a great time in Amsterdam. Daniel ...,"{'neg': 0.027, 'neu': 0.676, 'pos': 0.297, 'co...",0.313681
3,2818,2797,2009-05-18,14064,Very professional operation. Room is very clea...,"{'neg': 0.0, 'neu': 0.53, 'pos': 0.47, 'compou...",0.451111
4,2818,3151,2009-05-25,17977,Daniel is highly recommended. He provided all...,"{'neg': 0.0, 'neu': 0.742, 'pos': 0.258, 'comp...",0.222778


In [None]:
# boston housing prices dataset - how to predict price
# word cloud