In [1]:
# Import libraries
import os
import numpy as np
import pandas as pd
import math

import geopandas as gp
import json

# import holoviews as hv
# import holoviews.plotting.bokeh

from bokeh.io import output_notebook, output_file, push_notebook, show
from bokeh.plotting import figure, show
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar, NumeralTickFormatter, Div
from bokeh.palettes import brewer

from bokeh.io.doc import curdoc
from bokeh.models import Slider, HoverTool, Select, BoxSelectTool
from bokeh.layouts import widgetbox, row, column
from bokeh.client import push_session, pull_session
from bokeh.application import Application
from bokeh.application.handlers import FunctionHandler

#output_notebook()

# Import Data

In [7]:
# Importing the Neighbourhood boundaries data
dr = os.getcwd()
fdr = ''
in_f = 'data/neighbourhoods.geojson'

target = os.path.join(dr, in_f)

df_neighbourhoods = gp.read_file(target)

In [8]:
# Importing the Airbnb listings data with geopoints
dr = os.getcwd()
fdr = ''
in_f = 'data/listings.geojson'

target = os.path.join(dr, fdr, in_f)

df_airbnb = gp.read_file(target)

In [9]:
# Importing the Hotels data with geopoints
dr = os.getcwd()
fdr = ''
in_f = 'data/sorted_hotels.geojson'

target = os.path.join(dr, fdr, in_f)

df_hotels = gp.read_file(target)

# Check Coordinate Ref system

In [10]:
# encode the data so as they map onto the Web Mercator scale
df_neighbourhoods = df_neighbourhoods.to_crs(epsg=3857)
df_airbnb = df_airbnb.to_crs(epsg=3857)
df_hotels = df_hotels.to_crs(epsg=3857)

# Filtering and merging datasets

In [11]:
#remove neighbourhoods outside manhattan
df_neighbourhoods = df_neighbourhoods[df_neighbourhoods.neighbourhood_group == 'Manhattan']

In [12]:
df_neighbourhoods.head()

Unnamed: 0,neighbourhood,neighbourhood_group,geometry
18,Battery Park City,Manhattan,"MULTIPOLYGON (((-8239173.407 4970202.287, -823..."
43,Chinatown,Manhattan,"MULTIPOLYGON (((-8237641.317 4970831.912, -823..."
52,Chelsea,Manhattan,"MULTIPOLYGON (((-8236914.735 4975858.173, -823..."
53,Civic Center,Manhattan,"MULTIPOLYGON (((-8237419.012 4970060.124, -823..."
80,East Harlem,Manhattan,"MULTIPOLYGON (((-8230276.308 4985757.389, -823..."


In [13]:
#df_neighbourhoods.to_csv("/Users/alexander.roald/Desktop/df_neighbourhoods.csv")

In [15]:
# Keeping Manahattan for Airbnb
df_airbnb = df_airbnb[df_airbnb.neighbourhood_group == 'Manhattan'] 

# Import reviews and rename listing_id to match with airbnb dataframe
df_reviews = pd.read_csv("data/reviews.csv")
df_reviews = df_reviews.rename(columns= {"listing_id":"id"})

# Filter reviews to only display the first review date for each airbnb
df_reviews = pd.DataFrame(df_reviews.groupby('id')['date'].min())

# Merge airbnb with reviews. This does remove airbnbs that have never had any reviews, however. 
df_airbnb = pd.merge(df_airbnb, df_reviews, on='id')

# Extract year and month
df_airbnb['date'] = pd.to_datetime(df_airbnb['date'])
df_airbnb['year'] = df_airbnb['date'].dt.year
df_airbnb['month'] = df_airbnb['date'].dt.month

# Drop geometry
df_airbnb = df_airbnb.drop('geometry', axis=1)

# Add counter column for counting the different filters later
df_airbnb.loc[:, 'counter'] = 1 

# Create inital filter columns from airbnb dataframe

In [16]:
# Listing Type
entire = df_airbnb[df_airbnb.room_type == 'Entire home/apt']
room = df_airbnb[df_airbnb.room_type == 'Private room'] 

# Availability less or more than 100 days
a_less = df_airbnb[df_airbnb.availability_365 < 100]
a_more = df_airbnb[df_airbnb.availability_365 >= 100]

# Minimum stay less or more than 30 days
stay_less = df_airbnb[df_airbnb.minimum_nights < 30]

# Most competitive airbnbs
most_comp = df_airbnb[(df_airbnb.room_type == 'Entire home/apt') & (df_airbnb.availability_365 >= 100)
                     & (df_airbnb.minimum_nights < 30)]

# Create simplified dataframes for the different filters: year, neighbourhood count 

In [17]:
entire = entire[['neighbourhood', 'year', 'counter']]
room = room[['neighbourhood', 'year', 'counter']]

a_less = a_less[['neighbourhood', 'year', 'counter']]
a_more = a_more[['neighbourhood', 'year', 'counter']]

stay_less = stay_less[['neighbourhood', 'year', 'counter']]
most_comp = most_comp[['neighbourhood', 'year', 'counter']]

In [18]:
entire = pd.DataFrame(entire.pivot_table(index='neighbourhood', 
                columns='year', 
                values='counter',
                fill_value=0, 
                aggfunc='count').unstack().to_frame().rename(columns={0:'entire'}))

entire = entire.reset_index()

room = pd.DataFrame(room.pivot_table(index='neighbourhood', 
                columns='year', 
                values='counter',
                fill_value=0, 
                aggfunc='count').unstack().to_frame().rename(columns={0:'room'}))

room = room.reset_index()

a_less = pd.DataFrame(a_less.pivot_table(index='neighbourhood', 
                columns='year', 
                values='counter',
                fill_value=0, 
                aggfunc='count').unstack().to_frame().rename(columns={0:'a_less'}))

a_less = a_less.reset_index()

a_more = pd.DataFrame(a_more.pivot_table(index='neighbourhood', 
                columns='year', 
                values='counter',
                fill_value=0, 
                aggfunc='count').unstack().to_frame().rename(columns={0:'a_more'}))

a_more = a_more.reset_index()

stay_less = pd.DataFrame(stay_less.pivot_table(index='neighbourhood', 
                columns='year', 
                values='counter',
                fill_value=0, 
                aggfunc='count').unstack().to_frame().rename(columns={0:'stay_less'}))

stay_less = stay_less.reset_index()

most_comp = pd.DataFrame(most_comp.pivot_table(index='neighbourhood', 
                columns='year', 
                values='counter',
                fill_value=0, 
                aggfunc='count').unstack().to_frame().rename(columns={0:'most_comp'}))

most_comp = most_comp.reset_index()

In [19]:
neighbourhood_data = pd.merge(most_comp, stay_less['stay_less'], left_index=True, right_index=True, how='outer')
neighbourhood_data = pd.merge(neighbourhood_data, a_more['a_more'], left_index=True, right_index=True, how='outer')
neighbourhood_data = pd.merge(neighbourhood_data, a_less['a_less'], left_index=True, right_index=True, how='outer')
neighbourhood_data = pd.merge(neighbourhood_data, room['room'], left_index=True, right_index=True, how='outer')
neighbourhood_data = pd.merge(neighbourhood_data, entire['entire'], left_index=True, right_index=True, how='outer')
neighbourhood_data

Unnamed: 0,year,neighbourhood,most_comp,stay_less,a_more,a_less,room,entire
0,2009,Battery Park City,0,0,0,0,0,0
1,2009,Chelsea,0,1,0,1,1,0
2,2009,Chinatown,0,1,0,1,0,1
3,2009,Civic Center,0,0,0,0,0,0
4,2009,East Harlem,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...
347,2019,Two Bridges,2,17,6,12,11,7
348,2019,Upper East Side,65,228,142,140,68,201
349,2019,Upper West Side,31,189,89,147,83,144
350,2019,Washington Heights,19,165,62,106,113,54


In [20]:
# neighbourhood_data.to_csv("/Users/alexander.roald/Desktop/neighbourhood_data.csv")

# Create Colorbar to format lookup table

In [21]:
# This dictionary contains the formatting for the data in the plots
format_data = [('most_comp', 0, 250,'0', 'Most Competitive Airbnbs'),
               ('stay_less', 0, 450,'0', 'Minimum stay < 30 days'),
               ('a_more', 0, 250, '0', 'Availability > 100 days per year'),
               ('a_less', 0, 300,'0', 'Availability < 100 days per year'),
               ('room', 0, 300,'0', 'Private Rooms'),
               ('entire', 0, 300,'0', 'Entire Homes')
              ]
 
#Create a DataFrame object from the dictionary 
format_df = pd.DataFrame(format_data, columns = ['field' , 'min_range', 'max_range' , 'format', 'verbage'])
format_df.head(10)

Unnamed: 0,field,min_range,max_range,format,verbage
0,most_comp,0,250,0,Most Competitive Airbnbs
1,stay_less,0,450,0,Minimum stay < 30 days
2,a_more,0,250,0,Availability > 100 days per year
3,a_less,0,300,0,Availability < 100 days per year
4,room,0,300,0,Private Rooms
5,entire,0,300,0,Entire Homes


# Create Interactive Plot

In [23]:
#output_file('test')

# Create a function the returns json_data for the year selected by the user
def json_data(selectedYear):
    yr = selectedYear
    
    # Pull selected year from neighborhood summary data
    df_yr = neighbourhood_data[neighbourhood_data['year'] == yr]
    
    # Merge the GeoDataframe object (df_neighbourhood) with the neighborhood summary data (neighborhood)
    merged = pd.merge(df_neighbourhoods, df_yr, on='neighbourhood', how='left')
    
    # Fill the null values
    values = {'year': yr, 'most_comp': 0, 'stay_less': 0, 'a_more': 0,
              'a_less': 0, 'room': 0, 'entire': 0}
    merged = merged.fillna(value=values)
    
    # Bokeh uses geojson formatting, representing geographical features, with json
    # Convert to json
    merged_json = json.loads(merged.to_json())
    
    # Convert to json preferred string-like object 
    json_data = json.dumps(merged_json)
    return json_data

# Define the callback function: update_plot
def update_plot(attr, old, new):
    # The input yr is the year selected from the slider
    yr = slider.value
    new_data = json_data(yr)
    
    # The input cr is the criteria selected from the select box
    cr = select.value
    input_field = format_df.loc[format_df['verbage'] == cr, 'field'].iloc[0]
    
    # Update the plot based on the changed inputs
    p = make_plot(input_field)
    
    # Update the layout, clear the old document and display the new document
    layout = column(p, widgetbox(select), widgetbox(slider))
    curdoc().clear()
    curdoc().add_root(layout)
    
    # Update the data
    geosource.geojson = new_data 

# Create a plotting function
def make_plot(field_name):    
  # Set the format of the colorbar
  min_range = format_df.loc[format_df['field'] == field_name, 'min_range'].iloc[0]
  max_range = format_df.loc[format_df['field'] == field_name, 'max_range'].iloc[0]
  field_format = format_df.loc[format_df['field'] == field_name, 'format'].iloc[0]

  # Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
  color_mapper = LinearColorMapper(palette = palette, low = min_range, high = max_range)

  # Create color bar.
  format_tick = NumeralTickFormatter(format=field_format)
  color_bar = ColorBar(color_mapper=color_mapper, label_standoff=18, formatter=format_tick,
  border_line_color=None, location = (0, 0))

  # Create figure object.
  verbage = format_df.loc[format_df['field'] == field_name, 'verbage'].iloc[0]

  p = figure(title = verbage + ' by neighbourhood in Manhattan - 2009 to 2019', 
             plot_height = 650, plot_width = 850, tools=("pan,wheel_zoom,box_zoom,reset"),
             toolbar_location = "below")
  p.xgrid.grid_line_color = None
  p.ygrid.grid_line_color = None
  p.axis.visible = False

  # Add patch renderer to figure. 
  p.patches('xs','ys', source = geosource, fill_color = {'field' : field_name, 'transform' : color_mapper},
          line_color = 'black', line_width = 0.25, fill_alpha = 1)
  
  # Specify color bar layout.
  p.add_layout(color_bar, 'right')

  # Add the hover tool to the graph
  p.add_tools(hover)
  return p

# Input geojson source that contains features for plotting for:
# initial year 2018 and initial criteria most_comp
geosource = GeoJSONDataSource(geojson = json_data(2009))
input_field = 'most_comp'

# Define a sequential multi-hue color palette.
palette = brewer['Blues'][8]

# Reverse color order so that dark blue is highest obesity.
palette = palette[::-1]

# Add hover tool
hover = HoverTool(tooltips = [ ('Neighborhood','@neighborhood'),
                               ('Most Competitive', '@most_comp'),
                               ('Min stay < 30', '$stay_less'),
                               ('Avail > 100', '@a_more'),
                               ('Avail < 100', '@a_less'),
                               ('Private room', '@room'),
                               ('Entire home', '@entire')])

# Call the plotting function
p = make_plot(input_field)

# Make a slider object: slider 
slider = Slider(title = 'Year',start = 2009, end = 2019, step = 1, value = 2009)
slider.on_change('value', update_plot)

# Make a selection object: select
select = Select(title='Select Criteria:', value='Most Competitive Airbnbs', 
                options=['Most Competitive Airbnbs', 'Minimum stay < 30 days',
                        'Availability > 100 days', 'Availability < 100 days',
                        'Private Rooms', 'Entire Homes'])
select.on_change('value', update_plot)

# Make a column layout of widgetbox(slider) and plot, and add it to the current document
# Display the current document
layout = column(p, widgetbox(select), widgetbox(slider))
curdoc().add_root(layout)
show(layout)

You are generating standalone HTML/JS output, but trying to use real Python
callbacks (i.e. with on_change or on_event). This combination cannot work.

Only JavaScript callbacks may be used with standalone output. For more
information on JavaScript callbacks with Bokeh, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/interaction/callbacks.html

Alternatively, to use real Python callbacks, a Bokeh server application may
be used. For more information on building and running Bokeh applications, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/server.html

