In [32]:
%matplotlib inline
import numpy as np
import pandas as pd
import re

In [33]:
listing = pd.read_csv("listings.csv")
listing.columns

Index(['id', 'name', 'host_id', 'host_name', 'neighbourhood_group',
       'neighbourhood', 'latitude', 'longitude', 'room_type', 'price',
       'minimum_nights', 'number_of_reviews', 'last_review',
       'reviews_per_month', 'calculated_host_listings_count',
       'availability_365'],
      dtype='object')

In [34]:
#listingsJSON
listing_col = ['id',
               'latitude',
               'longitude',
               'host_id',
               'host_name',
               'calculated_host_listings_count',
               'price',
               'name',
               'neighbourhood',
               'minimum_nights',
               'room_type',
               'number_of_reviews',
               'last_review',
               'reviews_per_month',
               'availability_365']

listing = pd.read_csv("listings.csv", usecols=listing_col)

In [35]:
# fix ' and " in name
listing['name'] = listing.apply(lambda x: re.sub('[\\\'"]', ' ', str(x['name'])), axis=1)

In [36]:
# compute estimated bookings of 2018
reviews = pd.read_csv("reviews.csv")
reviews_2018 = reviews[(reviews['date'] >= '2018-01-01') & (reviews['date'] < '2019-01-01')]
reviews_per_listing = reviews_2018['listing_id'].value_counts()

In [80]:
reviews[reviews.listing_id == 31080]

Unnamed: 0,listing_id,date
1727,31080,2011-08-06
1728,31080,2011-09-03
1729,31080,2012-08-01
1730,31080,2012-08-22
1731,31080,2013-05-14
1732,31080,2014-03-30
1733,31080,2014-04-23
1734,31080,2014-05-21
1735,31080,2014-06-07
1736,31080,2014-08-10


In [90]:
reviewRate = 0.9
occupancyRate = 0.7
avgNights = 3.9

def applyOccupancyModel(id):
    try:
        noReviews = reviews_per_listing[reviews_per_listing.index == id].values[0]
        minNights = listing[listing.id == id]['minimum_nights'].values[0]
        estimation = ((noReviews / reviewRate) * avgNights) * occupancyRate
    except:
        estimation = 0

    return int(round(estimation))

In [91]:
# Estimate number of nights (Occupancy model)
listing['est_bookings_2018'] = listing.apply(lambda x: applyOccupancyModel(x['id']), axis=1)

In [92]:
# Estimate motnhly income
listing['est_monthly_income_2018'] = listing.apply(lambda x: int(round((x['est_bookings_2018']*x['price'])/12)), axis=1)

In [93]:
listing.head(50)

Unnamed: 0,id,name,host_id,host_name,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365,est_bookings_2018,est_monthly_income_2018
0,2818,Quiet Garden View Room & Super Fast WiFi,3159,Daniel,Oostelijk Havengebied - Indische Buurt,52.365755,4.941419,Private room,59,3,248,2018-11-28,2.1,1,44,91,447
1,3209,"Quiet apt near center, great view",3806,Maartje,Westerpark,52.390225,4.873924,Entire home/apt,160,4,42,2018-08-29,1.03,1,47,24,320
2,20168,100%Centre-Studio 1 Private Floor/Bathroom,59484,Alex,Centrum-Oost,52.365087,4.893541,Entire home/apt,80,1,233,2018-11-30,2.18,2,198,130,867
3,25428,Lovely apt in City Centre (Jordaan),56142,Joan,Centrum-West,52.373114,4.883668,Entire home/apt,125,14,1,2018-01-21,0.09,2,141,3,31
4,27886,"Romantic, stylish B&B houseboat in canal district",97647,Flip,Centrum-West,52.386727,4.892078,Private room,150,2,171,2018-11-25,2.03,1,199,143,1788
5,28658,Cosy guest room near city centre -1,123414,Michele,Bos en Lommer,52.375342,4.857289,Private room,65,3,434,2018-11-19,4.16,2,295,176,953
6,28871,Comfortable double room,124245,Edwin,Centrum-West,52.367187,4.890918,Private room,75,2,215,2018-12-03,2.13,3,137,285,1781
7,29051,Comfortable single room,124245,Edwin,Centrum-West,52.367725,4.891512,Private room,55,2,383,2018-12-05,4.07,3,188,234,1072
8,31080,2-story apartment + rooftop terrace,133488,Nienke,Zuid,52.351321,4.848383,Entire home/apt,219,3,32,2017-10-16,0.36,1,336,0,0
9,41125,Amsterdam Center Entire Apartment,178515,Fatih,Centrum-West,52.378915,4.883205,Entire home/apt,180,3,76,2018-10-07,0.78,1,11,21,315


In [94]:
#listing.set_index('id').transpose().to_json('listingsFromCsv.js')
listingJson = listing.set_index('id').transpose().to_json()

In [95]:
#listingJson = listingJson.replace("'","\\'")
listingJson = "var listingsFromCsv = '" + listingJson + "\'"
print(listingJson,  file=open('listingsFromCsv.js', 'w'))

In [3]:
#listingsJSON
account_col = ['id',
               'host_id',
              'host_name']

accounts = pd.read_csv("listings.csv", usecols=account_col)
#accounts.groupby('host_id')['id'].apply(list)
accounts = accounts.groupby('host_id').agg({'id' : lambda x: list(x), 'host_name': lambda x: list(x)})
accounts['host_name'] = accounts['host_name'].apply(lambda x: x[0])
accounts.to_json('AccountsFromCsv.js')

In [4]:
accounts

Unnamed: 0_level_0,id,host_name
host_id,Unnamed: 1_level_1,Unnamed: 2_level_1
3159,[2818],Daniel
3806,[3209],Maartje
5988,[2774924],Ramona
12085,[18872744],Marieke
20405,[11253322],Tiemen & Gaudi
30390,[3401602],Federica
34080,[1838958],Jesse
36701,[2323819],Leonie And Frank
47517,[3047061],Geert
49851,[21744543],Michelle
