In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import seaborn as sns
import math
import re
import warnings

import time
import datetime
from datetime import datetime
from scipy import stats
sns.set()

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

#Set max display columns to max
pd.set_option('display.max_columns', None)

In [2]:
#Pipeline and Grids
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

#Hyperparameters
from sklearn.model_selection import cross_val_score

#Outliers
from sklearn.ensemble import IsolationForest

#Imputions
from sklearn.impute import SimpleImputer
from sklearn.impute import KNNImputer

#Dimenion Reductions
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

#Data Preparations
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

#Ensemble
from sklearn.ensemble import ExtraTreesClassifier

#Models
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier

#Clustering
from sklearn.cluster import KMeans
from sklearn import cluster, datasets, mixture
from sklearn.neighbors import kneighbors_graph
from itertools import cycle, islice

#Statistic
import statsmodels.api as sm
from statsmodels.formula.api import ols

In [3]:
#Word sentiment anylysis
import nltk
nltk.download(['punkt', 'wordnet', 'averaged_perceptron_tagger'])

from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

from sklearn.metrics import confusion_matrix
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer

from bs4 import BeautifulSoup
from nltk.tokenize import WordPunctTokenizer
tok = WordPunctTokenizer()

[nltk_data] Downloading package punkt to /Users/alicis/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/alicis/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/alicis/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


# Loading data functions

In [None]:
#List of the name that represent the missing value 
missing_values = ["n/a", "na", "--"]

def load_data():
    """ 
    Load boston and seattle dataset.
    
    Returns:
    boston_data: The Airbnb activity in Boston, MA, USA.
    seattle_data: The Airbnb activity in Seattle, WA, USA.
    """
    #Load data and convert missing value to Nan type
    boston_data = pd.read_csv("datasets/boston/listings.csv", na_values = missing_values)
    seattle_data = pd.read_csv("datasets/seattle/listings.csv", na_values = missing_values)
    
    return boston_data, seattle_data

def load_tweet_data():
    """ 
    Load twitter tweets dataset.
    
    Returns:
    merged_tweets: Tweet activity that contains positive and negative tweets.
    
    """
    #Change tweet columns name; To make columns easier to read
    cols = ['sentiment','id','date','query_string','user','text']
    tweet_df = pd.read_csv("datasets/training.1600000.processed.noemoticon.csv",
                            header=None, names=cols, encoding="ISO-8859-1", na_values = missing_values)
    
    #Select the positive and negative tweet data in equal ratio (negative = 0, positive = 4).
    neg_tweets = tweet_df[0:10000]
    pos_tweets = tweet_df[800000:810000]
    
    #Concatenate the negative and postive tweets
    merged_tweets = pd.concat([neg_tweets, pos_tweets], axis=0, join='outer', ignore_index=True, keys=None,
                          levels=None, names=None, verify_integrity=False, copy=True)
    return merged_tweets

def load_sentiment_data(boston_data, seattle_data):
    """ 
    Load user comments and reviews scores from boston and seattle dataset.
    These datasets are prepared for sentiment analysis.
    
    Args:.
    boston_data: dataframe. The Airbnb activity in Boston, MA, USA.
    seattle_data: dataframe. The Airbnb activity in Seattle, WA, USA. 
    
    Returns:
    merged_comments: The dataframe that contains comment and review scores from boston and seattle datasets.
    
    """
    
    cols = ['listing_id','comments']
    #Load data then convert missing value to Nan type
    boston_reviews = pd.read_csv("boston/reviews.csv", usecols=cols, na_values = missing_values)
    seattle_reviews = pd.read_csv("seattle/reviews.csv", usecols=cols, na_values = missing_values)
    
    #Fetch the desired columns
    boston_rating = boston_data[['id','review_scores_rating']]
    seattle_rating = seattle_data[['id','review_scores_rating']]
    
    #Merge two columns with two keys (id and listing id)
    boston_merged_comments = pd.merge(boston_rating, boston_reviews, 
                                      left_on='id', right_on='listing_id', how='left').drop('listing_id', axis=1)
    seattle_merged_comments = pd.merge(seattle_rating, seattle_reviews, 
                                      left_on='id', right_on='listing_id', how='left').drop('listing_id', axis=1)
    
    #Concatenate boston and seattle rows
    merged_comments = pd.concat([boston_merged_comments, seattle_merged_comments], axis=0, join='outer', 
                                ignore_index=True, keys=None, levels=None, names=None, verify_integrity=False, copy=True)
    
    #Drop missing values then reset the df index
    merged_comments = merged_comments.dropna()
    merged_comments.reset_index(inplace=True,drop=True)
  
    return merged_comments

# Standard Cleaning functions

In [144]:
def drop_unbalanced_ratio(df):
    """ 
    Drop low diversity or too high diversity columns:
    Drop column that has an unbalanced unique value ratio (one unique value or excessive unique values).
    
    Notes:
    One unique value or excessive unique values columns doesn't give a decent analysis because 
    it doesn't provide meaningful informaiton.
    
    Args:
    df: dataframe. The pandas dataframe.
    
    Returns:
    cleaned_data: The dataframe that dropped column that has one unique value or excessive unique values.
    
    """
    data = df.copy()
    count_unique_col= []
    num_feats = data.select_dtypes(include='number')
    # Drop column that has only one unique value
    ### For numeric variables
    # Check by using standard deviation (if std != 0 then drop)
    if len(num_feats.columns[num_feats.std()==0]) != 0:
        data = data.drop(num_feats.columns[data.std()==0],axis=1)
            
    ### For categorical variables
    # The number of unique values in each columns
    unique_val = data.describe(include='all').loc['unique']
    # The number of values in each columns
    count_val = data.describe(include='all').loc['count']    
    
    # Remove the columns that contain one unique value or 
    # The column that have unique variables equal to the number of values. (Too much unqiue value in one column)
    for idx, val in enumerate(unique_val):
        if unique_val[idx] == count_val[idx] or unique_val[idx] == 1:
            count_unique_col.append(unique_val.index[idx])
            
    # Drop the columns that contain one unique value or excessive unique value
    cleaned_data = data.drop(count_unique_col,axis=1)
    # Show Dropped columns 
    if len(count_unique_col):
        print("Dropped unbalanced ratio columns:")
        print (count_unique_col)

    return cleaned_data

def string2bool(df):
    """ 
    Convert column that has likely boolean string to boolean.
    
    Args:
    df: dataframe. The pandas dataframe.
    
    Returns:
    merged_df: The dataframe after converted column that has likely boolean string to boolean.
    
    """
    
    # Select categorical columns
    non_num_feats = df.select_dtypes(exclude=['number','bool']).copy()
    # Select numeric columns
    num_feats = df.select_dtypes(include='number').copy()

    print ("\nConverted boolean string to boolean type:")

    for col_name in non_num_feats.columns:
        # Select a serie from iteration the column name
        tmp_serie = non_num_feats[col_name].dropna()
        # Find the unique value names from above serie
        unique_set = set(tmp_serie.value_counts().index.str.lower())
        # Map boolean
        if unique_set == {'t','f'}:
            non_num_feats.loc[:, col_name] = non_num_feats.loc[:, col_name].map({'t': True, 'f': False})
            print (col_name)
        elif unique_set == {'true','false'}:
            non_num_feats.loc[:, col_name] = non_num_feats.loc[:, col_name].map({'true': True, 'false': False})
            print (col_name)

    # Combine numeric_features and non_num_features
    merged_df = pd.concat([non_num_feats, num_feats], axis=1)
    
    return merged_df

def remove_spl_char(df):
    """ 
    Remove special characters.
    
    Args:
    df: dataframe. The pandas dataframe.
    
    Returns:
    data: The removed special characters dataframe.
    
    """
    
    print("\nRemoving special character...")
    
    data = df.copy()
    # Regex: string must contain at least one ,.$% and contains only 0-9,.$%
    pattern = re.compile("^(?=^[0-9,.$%]+$)(?=.*?[,.$%]).+$")
    for col_name in data.columns:
        if data[col_name].dtype == "object":
            # Remove the text that contains only special character.
            data[col_name].replace(r"^[^A-Za-z0-9]+$", np.nan, regex=True, inplace=True)
            if data[col_name].str.match(pattern).all():
                # Replace the text that not contain A-Za-z0-9. characters
                data[col_name].replace(r"[^A-Za-z0-9.]+", "", regex=True, inplace=True)
                # Convert object dtype (that has numeric value inside)to numeric dtype
                data[col_name] = pd.to_numeric(data[col_name])
                #Store columns name
                print(col_name)
            else:
                # Replace the text that not contain A-Za-z0-9 ^,!?.\/ characters 
                data[col_name].replace(r"[^A-Za-z0-9 ^,!?.\/'+]", "", regex=True, inplace=True)
                    
    return data

def abnormal_str2nan(df):
    """ 
    Replace one character in stirng to missing value (NaN).
    
    Args:
    df: dataframe. The pandas dataframe.
    
    Returns:
    data: The dataframe after repalce abnormal strings to NaN.
    
    """
    data = df.copy()
    # Remove the columns that has only one character
    for col_name in data.columns:
        data.replace("^.{0,1}$", np.nan, regex=True, inplace=True)
    return data

def string2date(df):
    """ 
    Convert likely date string to date type.
    
    Note: 
    This function creates NaT value. Proceed with Caution!
    
    Args:
    df: dataframe. The pandas dataframe.
    
    Returns:
    data: The dataframe after converted likely date string to date type.
    
    """
    data = df.copy()
    
    print('\nCoverting date string to date type...')
    
    #Set Regex date format
    pattern = re.compile(r"\d{4}-\d{2}-\d{2}")
    
    for col_name in data.columns:
        if data[col_name].dtype == "object":
            if data[col_name].str.match(pattern).all():
                print(col_name)
                # Convert date string to date type format: %Y%m%d
                data.loc[:, col_name] = pd.to_datetime(data.loc[:, col_name], format='%Y-%m-%d', errors='coerce')
    return data

def standard_data_cleansing(df, target_name, nan_threshold = 0.75):
    """ 
    Standard data cleansing process.
    
    Args:
    df: dataframe. The pandas dataframe.
    target_name: string. The target column name.
    nan_threshold: float. The NaN ratio threshold that do not allows in one column.
    
    Returns:
    standard_cleaned_df: The cleaned dataframe after performed standard cleaning process.
    
    """
    # Filter out ID and URL columns
    data = df.loc[:, ~df.columns.str.contains('id|url')].copy()
    
    print ('Dropped url and id columns: ')
    print (df.loc[:, df.columns.str.contains('id|url')].columns.values)

    # Drop column that has an unbalanced unique value ratio
    unique_filtered = drop_unbalanced_ratio(data)

    # Transform boolean string to boolean type
    cleared_bool= string2bool(unique_filtered)

    # Date string to Datetime type
    converted_date = string2date(cleared_bool)

    # Remove abnormal string that has only one character
    df = abnormal_str2nan(converted_date)                       

    # Remove special character in DataFrame
    cleaned_df = remove_spl_char(df)
    
    # Drop the column that has missing values more than the percentage threshold
    mis_columns = cleaned_df.columns[cleaned_df.isnull().mean()>nan_threshold]
    cleaned_df.drop(mis_columns, axis=1, inplace=True)
    
    #Remove missing values in label/target column
    standard_cleaned_df = cleaned_df.dropna(subset=[target_name],axis=0).copy()
    
    #Reset index
    standard_cleaned_df.reset_index(drop=True, inplace=True)
    
    print('\n Dropped the columns that contains missing value > ', (1-nan_threshold)*100,'%: ')
    print(list(mis_columns))
    
    return standard_cleaned_df

# Filtering outier funcitons

In [107]:
def z_outlier_filter(df):
    """ 
    Filter outlier by statistic z score.
    
    Args:
    df: dataframe. The pandas dataframe that must contain only numeric values in each column.
    
    Returns:
    filtered_df: The removed outlier dataframe by using z score.
    
    """
    data = df.copy()
    
    #Calculate z-scores
    z_scores = stats.zscore(data)
    abs_z_scores = np.abs(z_scores)
    
    #set NaN to 0
    abs_z_scores[np.isnan(abs_z_scores)]= 0

    #STD method
    filtered_entries = (abs_z_scores < 3).all(axis=1)
    filtered_df = data[filtered_entries]
    return filtered_df

def ML_outlier_filter(df):
    """ 
    Filter by modeling(ML:isolationForest) instead of statistic z method.
    
    Args:
    df: dataframe. The pandas dataframe that must contain only numeric values in each column.
    
    Returns:
    anomaly_solved: The removed outlier dataframe by using machine learning (isolationForest).
    
    """
    data = df.copy()
    # Define classifier
    clf = IsolationForest(max_samples=100, random_state = 42, contamination= 'auto')
    data['anomaly'] = clf.fit_predict(data)
    
    # Select the datapoint that not abnormal
    anomaly_solved = data[data['anomaly'] == 1]
    anomaly_solved = anomaly_solved.drop('anomaly',axis=1)
    return anomaly_solved

def remove_outlier(df):
    """ 
    Remove the outliers

    Args:
    df: dataframe. The pandas dataframe that needs to remove outlier.
    
    Returns:
    removed_outlier_df: The removed outlier dataframe
    
    """
    features = df.copy()
    
    # Separate numeric and non-numeric columns/features
    # To make it simpler when dealing with outlier
    numeric_feats = features.select_dtypes(include='number')
    non_num_feats = features.select_dtypes(exclude='number')
    
    # Detect NaN to determine whether data is propriate with machine learning outlier removing functions or not
    if len(cleaned_df.columns[cleaned_df.isnull().any()]):
        # Remove outlier by statistic z score method
        anomaly_solved = z_outlier_filter(numeric_feats)
    else:
        # Remove outlier by statistic z score method
        z_anomaly_solved = z_outlier_filter(numeric_feats)
        
        # Remove outlier by ML_outlier_filter method (Manchine Learning)
        anomaly_solved = ML_outlier_filter(z_anomaly_solved)
        
    # Combine numeric features and catergorical features
    non_num_feats_df = pd.DataFrame(non_num_feats,index = anomaly_solved.index)
    removed_outlier_df = pd.concat([non_num_feats_df, anomaly_solved], axis=1)

    # Reset index after drop columns
    removed_outlier_df.reset_index(drop=True, inplace=True)
    
    return removed_outlier_df

# 1st question : What factors affect price fluctuations?

# Loading data

In [145]:
# Load Data
boston_data, seattle_data = load_data()

# Cleaning data

In [146]:
# Drop unused columns
boston_cols = ['name','summary','space','description','neighborhood_overview','notes','transit','access','interaction',
               'house_rules','host_about','latitude','longitude']
boston_data.drop(boston_cols, axis=1, inplace=True)

In [147]:
# Cleaning by standard data cleansing 
cleaned_boston = standard_data_cleansing(boston_data, 'price')
cleaned_boston.head()

Dropped url and id columns: 
['id' 'listing_url' 'scrape_id' 'thumbnail_url' 'medium_url' 'picture_url'
 'xl_picture_url' 'host_id' 'host_url' 'host_thumbnail_url'
 'host_picture_url' 'host_identity_verified']
Dropped unbalanced ratio columns:
['last_scraped', 'experiences_offered', 'state', 'country_code', 'country', 'calendar_last_scraped', 'requires_license']

Converted boolean string to boolean type:
host_is_superhost
host_has_profile_pic
is_location_exact
instant_bookable
require_guest_profile_picture
require_guest_phone_verification

Coverting date string to date type...
host_since
first_review
last_review

Removing special character...
host_response_rate
host_acceptance_rate
price
weekly_price
monthly_price
security_deposit
cleaning_fee
extra_people

 Dropped the columns that contains missing value >  25.0 %: 
['weekly_price', 'monthly_price', 'neighbourhood_group_cleansed', 'square_feet', 'has_availability', 'license', 'jurisdiction_names']


Unnamed: 0,host_name,host_since,host_location,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_neighbourhood,host_verifications,host_has_profile_pic,street,neighbourhood,neighbourhood_cleansed,city,zipcode,market,smart_location,is_location_exact,property_type,room_type,bed_type,amenities,price,security_deposit,cleaning_fee,extra_people,calendar_updated,first_review,last_review,instant_bookable,cancellation_policy,require_guest_profile_picture,require_guest_phone_verification,host_listings_count,host_total_listings_count,accommodates,bathrooms,bedrooms,beds,guests_included,minimum_nights,maximum_nights,availability_30,availability_60,availability_90,availability_365,number_of_reviews,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,calculated_host_listings_count,reviews_per_month
0,Virginia,2015-04-15,"Boston, Massachusetts, United States",,,,False,Roslindale,"'email', 'phone', 'facebook', 'reviews'",True,"Birch Street, Boston, MA 02131, United States",Roslindale,Roslindale,Boston,2131.0,Boston,"Boston, MA",True,House,Entire home/apt,Real Bed,"TV,Wireless Internet,Kitchen,Free Parking on P...",250.0,,35.0,0.0,2 weeks ago,NaT,NaT,False,moderate,False,False,1,1,4,1.5,2.0,3.0,1,2,1125,0,0,0,0,0,,,,,,,,1,
1,Andrea,2012-06-07,"Boston, Massachusetts, United States",within an hour,100.0,100.0,False,Roslindale,"'email', 'phone', 'facebook', 'linkedin', 'ame...",True,"Pinehurst Street, Boston, MA 02131, United States",Roslindale,Roslindale,Boston,2131.0,Boston,"Boston, MA",True,Apartment,Private room,Real Bed,"TV,Internet,Wireless Internet,Air Conditioning...",65.0,95.0,10.0,0.0,a week ago,2014-06-01,2016-08-13,True,moderate,False,False,1,1,2,1.0,1.0,1.0,0,2,15,26,54,84,359,36,94.0,10.0,9.0,10.0,10.0,9.0,9.0,1,1.3
2,Phil,2009-05-11,"Boston, Massachusetts, United States",within a few hours,100.0,88.0,True,Roslindale,"'email', 'phone', 'reviews', 'jumio'",True,"Ardale St., Boston, MA 02131, United States",Roslindale,Roslindale,Boston,2131.0,Boston,"Boston, MA",True,Apartment,Private room,Real Bed,"TV,Cable TV,Wireless Internet,Air Conditioning...",65.0,,,20.0,5 days ago,2009-07-19,2016-08-05,False,moderate,True,False,1,1,2,1.0,1.0,1.0,1,3,45,19,46,61,319,41,98.0,10.0,9.0,10.0,10.0,9.0,10.0,1,0.47
3,Meghna,2013-04-21,"Boston, Massachusetts, United States",within a few hours,100.0,50.0,False,,"'email', 'phone', 'reviews'",True,"Boston, MA, United States",,Roslindale,Boston,,Boston,"Boston, MA",False,House,Private room,Real Bed,"TV,Internet,Wireless Internet,Air Conditioning...",75.0,100.0,50.0,25.0,a week ago,2016-08-28,2016-08-28,False,moderate,False,False,1,1,4,1.0,1.0,2.0,2,1,1125,6,16,26,98,1,100.0,10.0,10.0,10.0,10.0,10.0,10.0,1,1.0
4,Linda,2014-05-11,"Boston, Massachusetts, United States",within an hour,100.0,100.0,True,Roslindale,"'email', 'phone', 'reviews', 'kba'",True,"Durnell Avenue, Boston, MA 02131, United States",Roslindale,Roslindale,Boston,2131.0,Boston,"Boston, MA",True,House,Private room,Real Bed,"Internet,Wireless Internet,Air Conditioning,Ki...",79.0,,15.0,0.0,2 weeks ago,2015-08-18,2016-09-01,False,flexible,False,False,1,1,2,1.5,1.0,2.0,1,2,31,13,34,59,334,29,99.0,10.0,10.0,10.0,10.0,9.0,10.0,1,2.25


## Remove outlier

In [148]:
removed_outlier = remove_outlier(cleaned_boston)
removed_outlier.head()

Unnamed: 0,host_name,host_since,host_location,host_response_time,host_is_superhost,host_neighbourhood,host_verifications,host_has_profile_pic,street,neighbourhood,neighbourhood_cleansed,city,zipcode,market,smart_location,is_location_exact,property_type,room_type,bed_type,amenities,calendar_updated,first_review,last_review,instant_bookable,cancellation_policy,require_guest_profile_picture,require_guest_phone_verification,host_response_rate,host_acceptance_rate,price,security_deposit,cleaning_fee,extra_people,host_listings_count,host_total_listings_count,accommodates,bathrooms,bedrooms,beds,guests_included,minimum_nights,maximum_nights,availability_30,availability_60,availability_90,availability_365,number_of_reviews,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,calculated_host_listings_count,reviews_per_month
0,Virginia,2015-04-15,"Boston, Massachusetts, United States",,False,Roslindale,"'email', 'phone', 'facebook', 'reviews'",True,"Birch Street, Boston, MA 02131, United States",Roslindale,Roslindale,Boston,2131.0,Boston,"Boston, MA",True,House,Entire home/apt,Real Bed,"TV,Wireless Internet,Kitchen,Free Parking on P...",2 weeks ago,NaT,NaT,False,moderate,False,False,,,250.0,,35.0,0.0,1,1,4,1.5,2.0,3.0,1,2,1125,0,0,0,0,0,,,,,,,,1,
1,Andrea,2012-06-07,"Boston, Massachusetts, United States",within an hour,False,Roslindale,"'email', 'phone', 'facebook', 'linkedin', 'ame...",True,"Pinehurst Street, Boston, MA 02131, United States",Roslindale,Roslindale,Boston,2131.0,Boston,"Boston, MA",True,Apartment,Private room,Real Bed,"TV,Internet,Wireless Internet,Air Conditioning...",a week ago,2014-06-01,2016-08-13,True,moderate,False,False,100.0,100.0,65.0,95.0,10.0,0.0,1,1,2,1.0,1.0,1.0,0,2,15,26,54,84,359,36,94.0,10.0,9.0,10.0,10.0,9.0,9.0,1,1.3
2,Phil,2009-05-11,"Boston, Massachusetts, United States",within a few hours,True,Roslindale,"'email', 'phone', 'reviews', 'jumio'",True,"Ardale St., Boston, MA 02131, United States",Roslindale,Roslindale,Boston,2131.0,Boston,"Boston, MA",True,Apartment,Private room,Real Bed,"TV,Cable TV,Wireless Internet,Air Conditioning...",5 days ago,2009-07-19,2016-08-05,False,moderate,True,False,100.0,88.0,65.0,,,20.0,1,1,2,1.0,1.0,1.0,1,3,45,19,46,61,319,41,98.0,10.0,9.0,10.0,10.0,9.0,10.0,1,0.47
3,Meghna,2013-04-21,"Boston, Massachusetts, United States",within a few hours,False,,"'email', 'phone', 'reviews'",True,"Boston, MA, United States",,Roslindale,Boston,,Boston,"Boston, MA",False,House,Private room,Real Bed,"TV,Internet,Wireless Internet,Air Conditioning...",a week ago,2016-08-28,2016-08-28,False,moderate,False,False,100.0,50.0,75.0,100.0,50.0,25.0,1,1,4,1.0,1.0,2.0,2,1,1125,6,16,26,98,1,100.0,10.0,10.0,10.0,10.0,10.0,10.0,1,1.0
4,Linda,2014-05-11,"Boston, Massachusetts, United States",within an hour,True,Roslindale,"'email', 'phone', 'reviews', 'kba'",True,"Durnell Avenue, Boston, MA 02131, United States",Roslindale,Roslindale,Boston,2131.0,Boston,"Boston, MA",True,House,Private room,Real Bed,"Internet,Wireless Internet,Air Conditioning,Ki...",2 weeks ago,2015-08-18,2016-09-01,False,flexible,False,False,100.0,100.0,79.0,,15.0,0.0,1,1,2,1.5,1.0,2.0,1,2,31,13,34,59,334,29,99.0,10.0,10.0,10.0,10.0,9.0,10.0,1,2.25
