In [236]:
import os
import matplotlib.pyplot as plt
import matplotlib as mpl
import contextily as ctx
import descartes as dsc
import pandas as pd
import geopandas as gp
from shapely.geometry import Point
from bokeh.models import ColumnDataSource, HoverTool, BoxZoomTool, ResetTool, SingleIntervalTicker,\
    Slider, Button, Label, CategoricalColorMapper, Legend, Circle, CheckboxButtonGroup, Select, NumeralTickFormatter

# Import Data

In [237]:
# Importing the Neighbourhood boundaries data
dr = os.getcwd()
#fdr = 'Final Project'
in_f = 'neighbourhoods.geojson'

target = os.path.join(dr, in_f)

df = gp.read_file(target)

In [238]:
# Importing the Airbnb listings data with geopoints
dr = os.getcwd()
fdr = ''
in_f = 'listings.geojson'

target = os.path.join(dr, fdr, in_f)

df_airbnb = gp.read_file(target)

In [239]:
# Importing the Hotels data with geopoints
dr = os.getcwd()
fdr = ''
in_f = 'sorted_hotels.geojson'

target = os.path.join(dr, fdr, in_f)

df_hotels = gp.read_file(target)

# Check Coordinate Ref System

In [240]:
# encode the data so as they map onto the Web Mercator scale
df = df.to_crs(epsg=3857)
df_airbnb = df_airbnb.to_crs(epsg=3857)
df_hotels = df_hotels.to_crs(epsg=3857)

In [241]:
df_airbnb.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365,geometry
0,3647,THE VILLAGE OF HARLEM....NEW YORK !,4632,Elisabeth,Manhattan,Harlem,Private room,150,3,0,,,1,365,POINT (-8231174.656 4984212.838)
1,3831,Cozy Entire Floor of Brownstone,4869,LisaRoxanne,Brooklyn,Clinton Hill,Entire home/apt,89,1,279,2019-08-29,4.62,1,192,POINT (-8233162.822 4966010.228)
2,5022,Entire Apt: Spacious Studio/Loft by central park,7192,Laura,Manhattan,East Harlem,Entire home/apt,80,10,9,2018-11-19,0.1,1,0,POINT (-8231407.314 4982667.208)
3,5099,Large Cozy 1 BR Apartment In Midtown East,7322,Chris,Manhattan,Murray Hill,Entire home/apt,200,3,75,2019-07-21,0.59,1,13,POINT (-8234859.331 4975193.988)
4,5121,BlissArtsSpace!,7356,Garon,Brooklyn,Bedford-Stuyvesant,Private room,60,45,49,2017-10-05,0.39,1,0,POINT (-8232739.808 4966265.665)


# Filtering and Merging Data

In [242]:
#remove neighbourhoods outside manhattan
df = df[df.neighbourhood_group == 'Manhattan']
df.head()

Unnamed: 0,neighbourhood,neighbourhood_group,geometry
18,Battery Park City,Manhattan,"MULTIPOLYGON (((-8239173.407 4970202.287, -823..."
43,Chinatown,Manhattan,"MULTIPOLYGON (((-8237641.317 4970831.912, -823..."
52,Chelsea,Manhattan,"MULTIPOLYGON (((-8236914.735 4975858.173, -823..."
53,Civic Center,Manhattan,"MULTIPOLYGON (((-8237419.012 4970060.124, -823..."
80,East Harlem,Manhattan,"MULTIPOLYGON (((-8230276.308 4985757.389, -823..."


In [258]:
# Filtering and keeping only entire homes/apt in Manahattan for Airbnb
entire = df_airbnb[df_airbnb.room_type == 'Entire home/apt']
man = entire[entire.neighbourhood_group == 'Manhattan']
man1 = man[man.availability_365 > 100]
man1

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365,geometry
7,5222,Best Hideaway,7516,Marilyn,Manhattan,East Village,Entire home/apt,116,30,94,2016-06-15,0.73,1,347,POINT (-8235359.156 4972251.250)
18,7750,Huge 2 BR Upper East Cental Park,17985,Sing,Manhattan,East Harlem,Entire home/apt,190,7,0,,,2,302,POINT (-8231933.855 4982423.106)
25,9357,Midtown Pied-a-terre,30193,Tommi,Manhattan,Hell's Kitchen,Entire home/apt,150,10,58,2017-08-13,0.48,1,142,POINT (-8236009.262 4978056.773)
41,12343,BEST BET IN HARLEM,47727,Earl,Manhattan,Harlem,Entire home/apt,150,7,100,2019-07-31,0.85,1,317,POINT (-8231495.256 4984614.359)
48,14287,Cozy 1BD on Central Park West in New York City,56094,Joya,Manhattan,Upper West Side,Entire home/apt,151,3,80,2019-09-09,0.68,1,109,POINT (-8234311.640 4980879.230)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48349,38551538,Modern Studio APT in the heart of K-TOWN,137669844,Yeonjoo,Manhattan,Midtown,Entire home/apt,170,3,0,,,1,112,POINT (-8236196.279 4975074.968)
48354,38553206,Entire Studio in Downtown Manhattan,275978809,Longfeng,Manhattan,Chinatown,Entire home/apt,160,1,0,,,1,356,POINT (-8237178.116 4970244.877)
48357,38553463,(3A) Bright 2 bed in luxury amenity building,278925199,Saul,Manhattan,Upper East Side,Entire home/apt,220,30,0,,,10,263,POINT (-8233354.292 4977144.059)
48366,38557280,"convenient location in nyc entire apt,near subway",112452087,Hannah,Manhattan,East Harlem,Entire home/apt,400,1,0,,,3,327,POINT (-8230677.058 4982574.566)


In [244]:
# Grouping dataframe by mean price per neighbourhood
nh = pd.DataFrame(man1.groupby('neighbourhood')['price'].aggregate(np.mean))
nh.head()

Unnamed: 0_level_0,price
neighbourhood,Unnamed: 1_level_1
Battery Park City,850.055556
Chelsea,333.712838
Chinatown,308.45
Civic Center,200.4
East Harlem,204.91866


In [245]:
# Merging neighbourhoods with average airbnb prices
DF = pd.merge(df, nh, on='neighbourhood')
DF.head()

Unnamed: 0,neighbourhood,neighbourhood_group,geometry,price
0,Battery Park City,Manhattan,"MULTIPOLYGON (((-8239173.407 4970202.287, -823...",850.055556
1,Chinatown,Manhattan,"MULTIPOLYGON (((-8237641.317 4970831.912, -823...",308.45
2,Chelsea,Manhattan,"MULTIPOLYGON (((-8236914.735 4975858.173, -823...",333.712838
3,Civic Center,Manhattan,"MULTIPOLYGON (((-8237419.012 4970060.124, -823...",200.4
4,East Harlem,Manhattan,"MULTIPOLYGON (((-8230276.308 4985757.389, -823...",204.91866


# Finding First Review per Airbnb

In [246]:
df_reviews = pd.read_csv("reviews.csv")
df_reviews.head()

Unnamed: 0,listing_id,date
0,3831,2014-09-30
1,3831,2014-11-02
2,3831,2014-11-09
3,3831,2014-11-19
4,3831,2014-12-06


In [247]:
# Creating new dataframe with first review of each airbnb

df_reviews = df_reviews.rename(columns={'listing_id': 'id'})
df_reviews['date'] = pd.to_datetime(df_reviews['date'])
df_reviews = pd.DataFrame(df_reviews.groupby('id')['date'].min()) #first review of each listing

In [248]:
# Creating new dataframe with first review data and location
airbnb = pd.merge(man1, df_reviews, on = 'id')
airbnb['year'] = airbnb['date'].dt.year
airbnb.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365,geometry,date,year
0,5222,Best Hideaway,7516,Marilyn,Manhattan,East Village,Entire home/apt,116,30,94,2016-06-15,0.73,1,347,POINT (-8235359.156 4972251.250),2009-02-23,2009
1,9357,Midtown Pied-a-terre,30193,Tommi,Manhattan,Hell's Kitchen,Entire home/apt,150,10,58,2017-08-13,0.48,1,142,POINT (-8236009.262 4978056.773),2009-10-04,2009
2,12343,BEST BET IN HARLEM,47727,Earl,Manhattan,Harlem,Entire home/apt,150,7,100,2019-07-31,0.85,1,317,POINT (-8231495.256 4984614.359),2010-01-02,2010
3,14287,Cozy 1BD on Central Park West in New York City,56094,Joya,Manhattan,Upper West Side,Entire home/apt,151,3,80,2019-09-09,0.68,1,109,POINT (-8234311.640 4980879.230),2010-01-01,2010
4,14322,Beautiful Apartment in Manhattan!!!,56284,Francesca,Manhattan,Kips Bay,Entire home/apt,200,7,19,2019-03-25,0.21,1,316,POINT (-8235498.305 4974009.734),2012-04-15,2012


In [249]:
airbnb.drop(airbnb.iloc[:, 8:14], axis = 1, inplace = True) 
airbnb.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,room_type,price,geometry,date,year
0,5222,Best Hideaway,7516,Marilyn,Manhattan,East Village,Entire home/apt,116,POINT (-8235359.156 4972251.250),2009-02-23,2009
1,9357,Midtown Pied-a-terre,30193,Tommi,Manhattan,Hell's Kitchen,Entire home/apt,150,POINT (-8236009.262 4978056.773),2009-10-04,2009
2,12343,BEST BET IN HARLEM,47727,Earl,Manhattan,Harlem,Entire home/apt,150,POINT (-8231495.256 4984614.359),2010-01-02,2010
3,14287,Cozy 1BD on Central Park West in New York City,56094,Joya,Manhattan,Upper West Side,Entire home/apt,151,POINT (-8234311.640 4980879.230),2010-01-01,2010
4,14322,Beautiful Apartment in Manhattan!!!,56284,Francesca,Manhattan,Kips Bay,Entire home/apt,200,POINT (-8235498.305 4974009.734),2012-04-15,2012


# Get X and Y coordinates of Airbnbs

In [250]:
#create function to get x and y coordinates of airbnbs

def getPointCoords(row, geom, coord_type):
    """Calculates coordinates ('x' or 'y') of a Point geometry"""
    if coord_type == 'x':
        return row[geom].x
    elif coord_type == 'y':
        return row[geom].y

In [251]:
airbnb['x'] = airbnb.apply(getPointCoords, geom='geometry', coord_type='x', axis=1)
airbnb['y'] = airbnb.apply(getPointCoords, geom='geometry', coord_type='y', axis=1)
airbnb.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,room_type,price,geometry,date,year,x,y
0,5222,Best Hideaway,7516,Marilyn,Manhattan,East Village,Entire home/apt,116,POINT (-8235359.156 4972251.250),2009-02-23,2009,-8235359.0,4972251.0
1,9357,Midtown Pied-a-terre,30193,Tommi,Manhattan,Hell's Kitchen,Entire home/apt,150,POINT (-8236009.262 4978056.773),2009-10-04,2009,-8236009.0,4978057.0
2,12343,BEST BET IN HARLEM,47727,Earl,Manhattan,Harlem,Entire home/apt,150,POINT (-8231495.256 4984614.359),2010-01-02,2010,-8231495.0,4984614.0
3,14287,Cozy 1BD on Central Park West in New York City,56094,Joya,Manhattan,Upper West Side,Entire home/apt,151,POINT (-8234311.640 4980879.230),2010-01-01,2010,-8234312.0,4980879.0
4,14322,Beautiful Apartment in Manhattan!!!,56284,Francesca,Manhattan,Kips Bay,Entire home/apt,200,POINT (-8235498.305 4974009.734),2012-04-15,2012,-8235498.0,4974010.0


In [252]:
#Create copied dataframe for airbnbs
a_df = airbnb.drop('geometry', axis = 1).copy()

# Get x and y coordinates of Hotels

In [253]:
df_hotels.head()

Unnamed: 0,FIELD1,doc_id,street,hotel_url,zip,geometry
0,usa_new york city_inn_new_york_city,inn new york city,new york city,266 west 71st st.,10023,POINT (-8235925.271 4979765.026)
1,usa_new york city_casablanca_hotel,casablanca hotel,new york city,147 west 43rd street,10036,POINT (-8236021.696 4976490.058)
2,usa_new york city_library_hotel,library hotel,new york city,299 madison avenue,10017,POINT (-8235363.854 4975855.293)
3,usa_new york city_new_york_palace_hotel,new york palace hotel,new york city,455 madison ave,10022,POINT (-8234858.274 4976739.200)
4,usa_new york city_the_french_quarters_guest_ap...,the french quarters guest apartments,new york city,346 w. 46th street,10036,POINT (-8236504.077 4977053.409)


In [254]:
df_hotels['x'] = df_hotels.apply(getPointCoords, geom='geometry', coord_type='x', axis=1)
df_hotels['y'] = df_hotels.apply(getPointCoords, geom='geometry', coord_type='y', axis=1)

In [255]:
#Create copied dataframe for hotels
h_df = df_hotels.drop('geometry', axis = 1).copy()

# Create Bokeh Interactive Chart

In [261]:
import json
from bokeh.io import show
from bokeh.models import (CDSView, ColorBar, ColumnDataSource,
                          CustomJS, CustomJSFilter, 
                          GeoJSONDataSource, HoverTool,
                          LinearColorMapper, Slider)
from bokeh.layouts import column, row, widgetbox
from bokeh.palettes import brewer
from bokeh.plotting import figure

# Call column data source for hotels, airbnbs and the shape of manhattan
geosource = GeoJSONDataSource(geojson = DF.to_json())
asource = ColumnDataSource(a_df)
hsource = ColumnDataSource(h_df)

# Plot data with Airbnbs

In [262]:
# Define color palettes
palette = brewer['BuGn'][8]
palette = palette[::-1] # reverse order of colors so higher values have darker colors

# Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
color_mapper = LinearColorMapper(palette = palette, low = 100, high = 900)

# Define custom tick labels for color bar.
tick_labels = {'100': '100', '200': '200',
 '300':'300', '300':'300', '400':'400', '500':'500',
 '600':'600', '700':'700', '800':'800', '900': '900'}

# Create color bar.
color_bar = ColorBar(color_mapper = color_mapper, 
                     label_standoff = 8,
                     width = 500, height = 20,
                     border_line_color = None,
                     location = (0,0), 
                     orientation = 'horizontal',
                     major_label_overrides = tick_labels)

# Create figure object.
p = figure(title = 'Airbnb Neighbourhood Prices', 
           plot_height = 600, plot_width = 950, 
           toolbar_location = 'below',
           tools = 'pan, wheel_zoom, box_zoom, reset')
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

# Add patch renderer to figure.
man = p.patches('xs','ys', source = geosource,
                   fill_color = {'field' :'price',
                                 'transform' : color_mapper},
                   line_color = 'gray', 
                   line_width = 0.25, 
                   fill_alpha = 1)


# Make a slider object to toggle the year shown
slider = Slider(start = 2009, end = 2019, 
                value = 1, step = 1, title = 'year')

# This callback triggers the filter when the slider changes
callback = CustomJS(args = dict(source=asource), 
                    code = """source.change.emit();""")

slider.js_on_change('value', callback)

# Creates custom filter that selects the rows of the year based on the value in the slider
custom_filter = CustomJSFilter(args = dict(slider = slider, 
                                           source = asource), 
                               code = '''
var indices = [];
for (var i = 0; i < source.get_length(); i++){
 if (source.data['year'][i] == slider.value){
 indices.push(true);
 } else {
 indices.push(false);
 }
}
return indices;
''')

# Uses custom_filter to determine which set of airbnbs are visible
view = CDSView(source = asource, filters = [custom_filter])

p.circle('x', 'y', source = asource, color = 'blue', 
                  size = 5, alpha = 0.3, view = view)

hotels = p.square('x', 'y', source = hsource, color = 'red', 
                  size = 5, alpha = 0.3)

# Create hover tool for neighbourhood
p.add_tools(HoverTool(renderers = [hotels],
                      tooltips = [('hotel','@doc_id')]))

p.add_tools(HoverTool(renderers = [man],
                      tooltips = [('neighbourhood','@neighbourhood'),
                               ('price','@price')]))

# Specify layout
p.add_layout(color_bar, 'below')

#show(p)

# Make a column layout of widgetbox(slider) and plot, and add it to the current document
layout = column(p, widgetbox(slider))

show(layout)