In [12]:
import os

import pandas as pd

reviews = pd.read_csv('new_reviews.csv')
reviews.head()

Unnamed: 0.1,Unnamed: 0,listing_id,0
0,0,44077,We enjoyed our stay very much. The room was co...
1,1,44077,We have been here 4 nights. Stay in a home is ...
2,2,44077,Teresa and Hughie were great hosts. They were ...
3,3,44077,No surprises was as described. Very gracious ...
4,4,44077,Teresa was a lovely hostess and we had a delig...


In [13]:
reviews = reviews.drop(['Unnamed: 0'], axis=1)
reviews['comments'] = reviews['0']
reviews = reviews.drop(['0'], axis=1)
reviews = reviews.dropna(subset=['comments'])
reviews

Unnamed: 0,listing_id,comments
0,44077,We enjoyed our stay very much. The room was co...
1,44077,We have been here 4 nights. Stay in a home is ...
2,44077,Teresa and Hughie were great hosts. They were ...
3,44077,No surprises was as described. Very gracious ...
4,44077,Teresa was a lovely hostess and we had a delig...
...,...,...
243178,706148275480196839,Excellent location kindness and courtesy!
243179,706287276585342998,Jenny was able to get us in last minute and ex...
243180,706495821581154410,Very spacious; owners communicative. Only issu...
243181,707685389742134998,What a great host couple and great spot. Super...


In [14]:
from sklearn.feature_extraction.text import CountVectorizer
from nltk.stem import PorterStemmer
import nltk

nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/eytins/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [15]:
stemmer = PorterStemmer()


def list_to_str_include_stem(l):
    res = ''
    for s in l:
        res = res + ' ' + stemmer.stem(s)
    return res

In [16]:
# Remove punctuation and stemmerize
tokenizer = CountVectorizer().build_tokenizer()
reviews['comments'] = reviews['comments'].apply(lambda x: list_to_str_include_stem(tokenizer(x)), 1)
reviews

Unnamed: 0,listing_id,comments
0,44077,we enjoy our stay veri much the room wa comfo...
1,44077,we have been here night stay in home is the b...
2,44077,teresa and hughi were great host they were ve...
3,44077,no surpris wa as describ veri graciou host ni...
4,44077,teresa wa love hostess and we had delight sta...
...,...,...
243178,706148275480196839,excel locat kind and courtesi
243179,706287276585342998,jenni wa abl to get us in last minut and exte...
243180,706495821581154410,veri spaciou owner commun onli issu had wa no...
243181,707685389742134998,what great host coupl and great spot super cl...


In [17]:
# Vectorize
stop_words = nltk.corpus.stopwords.words('english')
vectorizer = CountVectorizer(stop_words=stop_words, max_df=0.5, min_df=0.05)
X = vectorizer.fit_transform(reviews['comments'])
words_list = vectorizer.get_feature_names()
print(words_list)
words_array = X.toarray()
print(words_array)
print(len(words_array[0]))

['accommod', 'airport', 'also', 'amaz', 'apart', 'area', 'around', 'arriv', 'away', 'back', 'bar', 'bathroom', 'beauti', 'bed', 'breakfast', 'bu', 'center', 'centr', 'check', 'citi', 'clean', 'close', 'comfort', 'commun', 'could', 'day', 'definit', 'distanc', 'dublin', 'easi', 'enjoy', 'even', 'everyth', 'excel', 'friendli', 'get', 'go', 'good', 'great', 'ha', 'help', 'highli', 'home', 'host', 'hous', 'kind', 'kitchen', 'like', 'littl', 'locat', 'lot', 'love', 'made', 'make', 'minut', 'need', 'nice', 'night', 'one', 'onli', 'perfect', 'place', 'quiet', 'realli', 'recommend', 'restaur', 'right', 'room', 'space', 'stay', 'super', 'thank', 'thi', 'time', 'us', 'veri', 'visit', 'wa', 'walk', 'welcom', 'well', 'wonder', 'would']
[[0 0 0 ... 0 0 1]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 1 ... 0 0 0]
 [0 0 1 ... 0 0 1]
 [0 0 0 ... 0 0 0]]
83




In [18]:
from pandas import DataFrame

words_df = DataFrame(words_array)
words_df.columns = words_list
words_df

Unnamed: 0,accommod,airport,also,amaz,apart,area,around,arriv,away,back,...,time,us,veri,visit,wa,walk,welcom,well,wonder,would
0,0,0,0,0,0,0,0,0,0,0,...,0,1,3,0,3,0,0,0,0,1
1,0,0,0,0,0,0,0,0,0,0,...,1,1,2,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,1,0,...,0,1,3,0,2,0,1,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,1,0,0,0,0,1
4,0,0,0,0,0,0,0,0,0,0,...,0,2,0,0,2,0,0,0,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
243156,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
243157,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,1,0,0,0,0,0
243158,0,0,1,0,0,0,0,0,0,0,...,0,0,1,0,1,0,0,0,0,0
243159,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [19]:
result = pd.concat([reviews['listing_id'], words_df], axis=1)
result.dropna()

Unnamed: 0,listing_id,accommod,airport,also,amaz,apart,area,around,arriv,away,...,time,us,veri,visit,wa,walk,welcom,well,wonder,would
0,4.407700e+04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,3.0,0.0,3.0,0.0,0.0,0.0,0.0,1.0
1,4.407700e+04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,4.407700e+04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,1.0,3.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0
3,4.407700e+04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
4,4.407700e+04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,2.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
243156,7.094515e+17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
243157,7.100541e+17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
243158,7.036042e+17,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
243159,7.036042e+17,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [20]:
result = result.groupby(by='listing_id')
result = result.agg('mean')
result

Unnamed: 0_level_0,accommod,airport,also,amaz,apart,area,around,arriv,away,back,...,time,us,veri,visit,wa,walk,welcom,well,wonder,would
listing_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
4.407700e+04,0.077519,0.027132,0.143411,0.054264,0.011628,0.135659,0.127907,0.093023,0.073643,0.108527,...,0.201550,0.585271,0.930233,0.112403,1.054264,0.131783,0.313953,0.158915,0.197674,0.286822
8.515600e+04,0.123810,0.047619,0.219048,0.076190,0.014286,0.119048,0.095238,0.095238,0.090476,0.104762,...,0.166667,0.457143,1.052381,0.152381,0.919048,0.176190,0.252381,0.128571,0.176190,0.228571
1.598890e+05,0.073239,0.295775,0.087324,0.047887,0.002817,0.078873,0.042254,0.036620,0.064789,0.090141,...,0.157746,0.019718,0.822535,0.078873,0.864789,0.104225,0.185915,0.092958,0.092958,0.171831
1.628090e+05,0.113497,0.030675,0.110429,0.079755,0.055215,0.113497,0.061350,0.098160,0.049080,0.064417,...,0.177914,0.527607,0.941718,0.104294,1.098160,0.052147,0.236196,0.147239,0.073620,0.162577
1.658280e+05,0.078125,0.125000,0.234375,0.046875,0.937500,0.203125,0.093750,0.078125,0.140625,0.078125,...,0.218750,0.218750,1.187500,0.078125,1.062500,0.218750,0.046875,0.406250,0.156250,0.328125
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7.076854e+17,,,,,,,,,,,...,,,,,,,,,,
7.078251e+17,,,,,,,,,,,...,,,,,,,,,,
7.086799e+17,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
7.094515e+17,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [21]:
result = result.reset_index()
result

Unnamed: 0,listing_id,accommod,airport,also,amaz,apart,area,around,arriv,away,...,time,us,veri,visit,wa,walk,welcom,well,wonder,would
0,4.407700e+04,0.077519,0.027132,0.143411,0.054264,0.011628,0.135659,0.127907,0.093023,0.073643,...,0.201550,0.585271,0.930233,0.112403,1.054264,0.131783,0.313953,0.158915,0.197674,0.286822
1,8.515600e+04,0.123810,0.047619,0.219048,0.076190,0.014286,0.119048,0.095238,0.095238,0.090476,...,0.166667,0.457143,1.052381,0.152381,0.919048,0.176190,0.252381,0.128571,0.176190,0.228571
2,1.598890e+05,0.073239,0.295775,0.087324,0.047887,0.002817,0.078873,0.042254,0.036620,0.064789,...,0.157746,0.019718,0.822535,0.078873,0.864789,0.104225,0.185915,0.092958,0.092958,0.171831
3,1.628090e+05,0.113497,0.030675,0.110429,0.079755,0.055215,0.113497,0.061350,0.098160,0.049080,...,0.177914,0.527607,0.941718,0.104294,1.098160,0.052147,0.236196,0.147239,0.073620,0.162577
4,1.658280e+05,0.078125,0.125000,0.234375,0.046875,0.937500,0.203125,0.093750,0.078125,0.140625,...,0.218750,0.218750,1.187500,0.078125,1.062500,0.218750,0.046875,0.406250,0.156250,0.328125
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6204,7.076854e+17,,,,,,,,,,...,,,,,,,,,,
6205,7.078251e+17,,,,,,,,,,...,,,,,,,,,,
6206,7.086799e+17,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
6207,7.094515e+17,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [22]:
result.fillna(0)

Unnamed: 0,listing_id,accommod,airport,also,amaz,apart,area,around,arriv,away,...,time,us,veri,visit,wa,walk,welcom,well,wonder,would
0,4.407700e+04,0.077519,0.027132,0.143411,0.054264,0.011628,0.135659,0.127907,0.093023,0.073643,...,0.201550,0.585271,0.930233,0.112403,1.054264,0.131783,0.313953,0.158915,0.197674,0.286822
1,8.515600e+04,0.123810,0.047619,0.219048,0.076190,0.014286,0.119048,0.095238,0.095238,0.090476,...,0.166667,0.457143,1.052381,0.152381,0.919048,0.176190,0.252381,0.128571,0.176190,0.228571
2,1.598890e+05,0.073239,0.295775,0.087324,0.047887,0.002817,0.078873,0.042254,0.036620,0.064789,...,0.157746,0.019718,0.822535,0.078873,0.864789,0.104225,0.185915,0.092958,0.092958,0.171831
3,1.628090e+05,0.113497,0.030675,0.110429,0.079755,0.055215,0.113497,0.061350,0.098160,0.049080,...,0.177914,0.527607,0.941718,0.104294,1.098160,0.052147,0.236196,0.147239,0.073620,0.162577
4,1.658280e+05,0.078125,0.125000,0.234375,0.046875,0.937500,0.203125,0.093750,0.078125,0.140625,...,0.218750,0.218750,1.187500,0.078125,1.062500,0.218750,0.046875,0.406250,0.156250,0.328125
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6204,7.076854e+17,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
6205,7.078251e+17,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
6206,7.086799e+17,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
6207,7.094515e+17,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [23]:
result.rename(columns={'listing_id': 'id'}, inplace=True)
result

Unnamed: 0,id,accommod,airport,also,amaz,apart,area,around,arriv,away,...,time,us,veri,visit,wa,walk,welcom,well,wonder,would
0,4.407700e+04,0.077519,0.027132,0.143411,0.054264,0.011628,0.135659,0.127907,0.093023,0.073643,...,0.201550,0.585271,0.930233,0.112403,1.054264,0.131783,0.313953,0.158915,0.197674,0.286822
1,8.515600e+04,0.123810,0.047619,0.219048,0.076190,0.014286,0.119048,0.095238,0.095238,0.090476,...,0.166667,0.457143,1.052381,0.152381,0.919048,0.176190,0.252381,0.128571,0.176190,0.228571
2,1.598890e+05,0.073239,0.295775,0.087324,0.047887,0.002817,0.078873,0.042254,0.036620,0.064789,...,0.157746,0.019718,0.822535,0.078873,0.864789,0.104225,0.185915,0.092958,0.092958,0.171831
3,1.628090e+05,0.113497,0.030675,0.110429,0.079755,0.055215,0.113497,0.061350,0.098160,0.049080,...,0.177914,0.527607,0.941718,0.104294,1.098160,0.052147,0.236196,0.147239,0.073620,0.162577
4,1.658280e+05,0.078125,0.125000,0.234375,0.046875,0.937500,0.203125,0.093750,0.078125,0.140625,...,0.218750,0.218750,1.187500,0.078125,1.062500,0.218750,0.046875,0.406250,0.156250,0.328125
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6204,7.076854e+17,,,,,,,,,,...,,,,,,,,,,
6205,7.078251e+17,,,,,,,,,,...,,,,,,,,,,
6206,7.086799e+17,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
6207,7.094515e+17,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [24]:
listings = pd.read_csv('new_listings1.csv')
listings.__len__()

6209

In [25]:
listings['id'] = listings['id'].astype('float64')
final = pd.merge(listings, result, on=['id'])
final

Unnamed: 0.1,Unnamed: 0,id,price,availability_30,number_of_reviews,first_review,reviews_per_month,review_scores_rating,review_scores_value,review_scores_checkin,...,time,us,veri,visit,wa,walk,welcom,well,wonder,would
0,0,4.407700e+04,70.0,2,258,4255,1.85,4.78,4.82,4.93,...,0.201550,0.585271,0.930233,0.112403,1.054264,0.131783,0.313953,0.158915,0.197674,0.286822
1,1,8.515600e+04,67.0,3,210,4190,1.53,4.79,4.78,4.90,...,0.166667,0.457143,1.052381,0.152381,0.919048,0.176190,0.252381,0.128571,0.176190,0.228571
2,2,1.598890e+05,45.0,1,355,3889,2.78,4.74,4.74,4.86,...,0.157746,0.019718,0.822535,0.078873,0.864789,0.104225,0.185915,0.092958,0.092958,0.171831
3,3,1.628090e+05,80.0,2,326,2721,3.68,4.84,4.85,4.95,...,0.177914,0.527607,0.941718,0.104294,1.098160,0.052147,0.236196,0.147239,0.073620,0.162577
4,4,1.658280e+05,251.0,1,64,4036,0.48,4.63,4.55,4.81,...,0.218750,0.218750,1.187500,0.078125,1.062500,0.218750,0.046875,0.406250,0.156250,0.328125
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6204,6204,7.076854e+17,100.0,0,1,63,1.00,5.00,4.00,5.00,...,,,,,,,,,,
6205,6205,7.078251e+17,37.0,9,1,66,1.00,5.00,5.00,5.00,...,,,,,,,,,,
6206,6206,7.086799e+17,800.0,29,1,62,1.00,5.00,5.00,5.00,...,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
6207,6207,7.094515e+17,200.0,4,1,62,1.00,5.00,5.00,5.00,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [26]:
final.columns.values.tolist()

['Unnamed: 0',
 'id',
 'price',
 'availability_30',
 'number_of_reviews',
 'first_review',
 'reviews_per_month',
 'review_scores_rating',
 'review_scores_value',
 'review_scores_checkin',
 'review_scores_accuracy',
 'review_scores_location',
 'review_scores_cleanliness',
 'review_scores_communication',
 'host_response_time_a few days or more',
 'host_response_time_unknown',
 'host_response_time_within a day',
 'host_response_time_within a few hours',
 'host_response_time_within an hour',
 'host_response_rate_0%-69%',
 'host_response_rate_70%-79%',
 'host_response_rate_80%-89%',
 'host_response_rate_90%-98%',
 'host_response_rate_99%-100%',
 'host_response_rate_unknown',
 'host_acceptance_rate_0%-69%',
 'host_acceptance_rate_70%-79%',
 'host_acceptance_rate_80%-89%',
 'host_acceptance_rate_90%-96%',
 'host_acceptance_rate_97%-98%',
 'host_acceptance_rate_99%-100%',
 'host_acceptance_rate_unknown',
 'host_is_superhost_f',
 'host_is_superhost_t',
 'calculated_host_listings_count_1',
 'cal

In [27]:
final = final.drop(['Unnamed: 0'], axis=1)
final

Unnamed: 0,id,price,availability_30,number_of_reviews,first_review,reviews_per_month,review_scores_rating,review_scores_value,review_scores_checkin,review_scores_accuracy,...,time,us,veri,visit,wa,walk,welcom,well,wonder,would
0,4.407700e+04,70.0,2,258,4255,1.85,4.78,4.82,4.93,4.83,...,0.201550,0.585271,0.930233,0.112403,1.054264,0.131783,0.313953,0.158915,0.197674,0.286822
1,8.515600e+04,67.0,3,210,4190,1.53,4.79,4.78,4.90,4.86,...,0.166667,0.457143,1.052381,0.152381,0.919048,0.176190,0.252381,0.128571,0.176190,0.228571
2,1.598890e+05,45.0,1,355,3889,2.78,4.74,4.74,4.86,4.77,...,0.157746,0.019718,0.822535,0.078873,0.864789,0.104225,0.185915,0.092958,0.092958,0.171831
3,1.628090e+05,80.0,2,326,2721,3.68,4.84,4.85,4.95,4.88,...,0.177914,0.527607,0.941718,0.104294,1.098160,0.052147,0.236196,0.147239,0.073620,0.162577
4,1.658280e+05,251.0,1,64,4036,0.48,4.63,4.55,4.81,4.89,...,0.218750,0.218750,1.187500,0.078125,1.062500,0.218750,0.046875,0.406250,0.156250,0.328125
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6204,7.076854e+17,100.0,0,1,63,1.00,5.00,4.00,5.00,5.00,...,,,,,,,,,,
6205,7.078251e+17,37.0,9,1,66,1.00,5.00,5.00,5.00,5.00,...,,,,,,,,,,
6206,7.086799e+17,800.0,29,1,62,1.00,5.00,5.00,5.00,5.00,...,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
6207,7.094515e+17,200.0,4,1,62,1.00,5.00,5.00,5.00,5.00,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [28]:
order = ['review_scores_rating',
 'review_scores_value',
 'review_scores_checkin',
 'review_scores_accuracy',
 'review_scores_location',
 'review_scores_cleanliness',
 'review_scores_communication',
 'id',
 'price',
 'availability_30',
 'number_of_reviews',
 'first_review',
 'reviews_per_month',
 'host_response_time_a few days or more',
 'host_response_time_unknown',
 'host_response_time_within a day',
 'host_response_time_within a few hours',
 'host_response_time_within an hour',
 'host_response_rate_0%-69%',
 'host_response_rate_70%-79%',
 'host_response_rate_80%-89%',
 'host_response_rate_90%-98%',
 'host_response_rate_99%-100%',
 'host_response_rate_unknown',
 'host_acceptance_rate_0%-69%',
 'host_acceptance_rate_70%-79%',
 'host_acceptance_rate_80%-89%',
 'host_acceptance_rate_90%-96%',
 'host_acceptance_rate_97%-98%',
 'host_acceptance_rate_99%-100%',
 'host_acceptance_rate_unknown',
 'host_is_superhost_f',
 'host_is_superhost_t',
 'calculated_host_listings_count_1',
 'calculated_host_listings_count_10-29',
 'calculated_host_listings_count_2-4',
 'calculated_host_listings_count_5-9',
 'calculated_host_listings_count_>29',
 'host_identity_verified_f',
 'host_identity_verified_t',
 'neighbourhood_cleansed_Dn Laoghaire-Rathdown',
 'neighbourhood_cleansed_Dublin City',
 'neighbourhood_cleansed_Fingal',
 'neighbourhood_cleansed_South Dublin',
 'property_type_Entire condo',
 'property_type_Entire cottage',
 'property_type_Entire guest suite',
 'property_type_Entire guesthouse',
 'property_type_Entire home',
 'property_type_Entire rental unit',
 'property_type_Entire serviced apartment',
 'property_type_Entire townhouse',
 'property_type_Private room in bed and breakfast',
 'property_type_Private room in condo',
 'property_type_Private room in home',
 'property_type_Private room in rental unit',
 'property_type_Private room in townhouse',
 'property_type_Shared room in home',
 'property_type_Shared room in rental unit',
 'property_type_others',
 'room_type_Entire home/apt',
 'room_type_Hotel room',
 'room_type_Private room',
 'room_type_Shared room',
 'accommodates_1',
 'accommodates_2',
 'accommodates_3',
 'accommodates_4',
 'accommodates_5',
 'accommodates_>5',
 'bathrooms_text_1 bath',
 'bathrooms_text_1 private bath',
 'bathrooms_text_1 shared bath',
 'bathrooms_text_1.5 baths',
 'bathrooms_text_1.5 shared baths',
 'bathrooms_text_2 baths',
 'bathrooms_text_2 shared baths',
 'bathrooms_text_2.5 baths',
 'bathrooms_text_3 baths',
 'bathrooms_text_others',
 'bedrooms_1',
 'bedrooms_2',
 'bedrooms_3',
 'bedrooms_4',
 'bedrooms_5',
 'bedrooms_>5',
 'bedrooms_unknown',
 'beds_1',
 'beds_2',
 'beds_3',
 'beds_4',
 'beds_5',
 'beds_6',
 'beds_>6',
 'beds_unknown',
 'minimum_nights_1',
 'minimum_nights_2',
 'minimum_nights_3',
 'minimum_nights_4',
 'minimum_nights_5',
 'minimum_nights_6',
 'minimum_nights_7-33',
 'minimum_nights_>33',
 'instant_bookable_f',
 'instant_bookable_t',
 'amenities_Hot water kettle',
 'amenities_Outdoor furniture',
 'amenities_Dining table',
 'amenities_Indoor fireplace',
 'amenities_Breakfast',
 'amenities_Central heating',
 'amenities_Cleaning products',
 'amenities_Shower gel',
 'amenities_Lock on bedroom door',
 'amenities_Dishwasher',
 'amenities_Freezer',
 'amenities_Free street parking',
 'amenities_Bathtub',
 'amenities_Coffee maker',
 'amenities_Conditioner',
 'amenities_Body soap',
 'amenities_Toaster',
 'amenities_Lockbox',
 'amenities_Room-darkening shades',
 'amenities_Outdoor dining area',
 'amenities_Wine glasses',
 'amenities_Extra pillows and blankets',
 'amenities_Luggage dropoff allowed',
 'amenities_TV with standard cable',
 'amenities_Cable TV',
 'amenities_Private patio or balcony',
 'amenities_Stove',
 'amenities_Laundromat nearby',
 'amenities_Drying rack for clothing',
 'amenities_Backyard',
 'amenities_Host greets you',
 'amenities_Paid parking off premises',
 'amenities_Security cameras on property',
 'amenities_Private entrance',
 'amenities_Dedicated workspace',
 'amenities_Patio or balcony',
 'amenities_Elevator'] + words_list
final = final[order]
final = final.drop(['id'], axis=1)
final.columns.values.tolist()

['review_scores_rating',
 'review_scores_value',
 'review_scores_checkin',
 'review_scores_accuracy',
 'review_scores_location',
 'review_scores_cleanliness',
 'review_scores_communication',
 'price',
 'availability_30',
 'number_of_reviews',
 'first_review',
 'reviews_per_month',
 'host_response_time_a few days or more',
 'host_response_time_unknown',
 'host_response_time_within a day',
 'host_response_time_within a few hours',
 'host_response_time_within an hour',
 'host_response_rate_0%-69%',
 'host_response_rate_70%-79%',
 'host_response_rate_80%-89%',
 'host_response_rate_90%-98%',
 'host_response_rate_99%-100%',
 'host_response_rate_unknown',
 'host_acceptance_rate_0%-69%',
 'host_acceptance_rate_70%-79%',
 'host_acceptance_rate_80%-89%',
 'host_acceptance_rate_90%-96%',
 'host_acceptance_rate_97%-98%',
 'host_acceptance_rate_99%-100%',
 'host_acceptance_rate_unknown',
 'host_is_superhost_f',
 'host_is_superhost_t',
 'calculated_host_listings_count_1',
 'calculated_host_listings_

In [29]:
os.remove('final_features.csv')
final.to_csv('final_features.csv')