<a href="https://colab.research.google.com/github/JimKing100/Jestimate/blob/master/Final_SF_Map_Code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## The San Francisco Real Estate Project

In [0]:
# Install fiona - need to comment out for transfer to live site.
# Turn on for running in a notebook
#%%capture
#!pip install fiona

In [0]:
# Install geopandas - need to comment out for tranfer to live site.
# Turn on for running in a notebook
#%%capture
#!pip install geopandas

In [0]:
# Import libraries
import pandas as pd
import numpy as np
import math

import geopandas
import json

from bokeh.io import output_notebook, show, output_file
from bokeh.plotting import figure
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar, NumeralTickFormatter
from bokeh.palettes import brewer

from bokeh.io.doc import curdoc
from bokeh.models import Slider, HoverTool, Select, TapTool, CustomJS, ColumnDataSource, TableColumn, DataTable, CDSView, GroupFilter
from bokeh.layouts import widgetbox, row, column, gridplot
from bokeh.models.widgets import TextInput

### Load and Clean the Data

In [99]:
# Load SF real estate data - 10 years (2009-2018) of single family home sales in San Francisco downloaded from the SF MLS
# Longitude and latitude were added to the csv file prior to loading using geocoding.geo.census.gov 
# Load the results of the price prediction
initial_data = pd.read_csv('https://raw.githubusercontent.com/JimKing100/Jestimate/master/data/SF-SFR-Sales-Final2d.csv')
results_data = pd.read_csv('https://raw.githubusercontent.com/JimKing100/Jestimate/master/data/display_data4.csv')

# Create subset with outliers removed - 1.6% of the data, and zero square foot homes removed - 16.2% of the data
mask = (
  (initial_data['baths'] < 6) &
  (initial_data['beds'] < 7) &
  (initial_data['beds'] > 0) &
  (initial_data['lot_sf'] < 10000) &
  (initial_data['rooms'] < 13) &
  (initial_data['sale_price'] < 10000000) &
  (initial_data['sf'] < 10000) &
  (initial_data['sf'] > 100)
)
initial_data = initial_data[mask]

# Clean up full_address field dropping city, state and zip, clean up the neighborhood field to remove the number
results_data['full_address'] = results_data.apply(lambda x: x['full_address'].split(',')[0], axis=1)
results_data['neighborhood'] = results_data.apply(lambda x: x['neighborhood'].split('- ')[1], axis=1)

# Rename nid to subdist_no
results_data = results_data.rename(columns={'nid': 'subdist_no'})

# Rename subdistr_desc to neighborhood
temp_data = initial_data.rename(columns={'subdist_desc': 'neighborhood'})

# Check the data
print(temp_data.shape)
temp_data.head(5)

(19497, 39)


Unnamed: 0,longitude,latitude,elevation,full_address,city,state,street_no,street_name,street_suffix,zip,area,district_no,district_desc,subdist_no,neighborhood,on_market_date,cdom,orig_list_price,sale_date,sale_price,rooms,baths,beds,sf_source,sf_source_decs,sf,lot_acres,lot_sf,year_built,zoning,lot_desc,drive_side,parking,park_leased,num_parking,shopping,transportation,type,views
0,-122.50965,37.78028,200.83,"2645 El Camino Del Mar, San Francisco, CA 94121",San Francisco,CA,2645,El Camino Del Mar,,94121,1050,1,SF District 1,1050,1 - Outer Richmond,3/14/13,4,1095000,3/22/13,1260000,8,3.5,4,T,Per Tax Records,2691,,0,1969,RH2,"RGLR,FNCD","PVDW,PVSW","ATCH,GARG",0,2,4BLK,1BLK,3STR,"PNRM,OCEN,PARK,GRDN"
1,-122.50929,37.762608,23.21,"1278 La Playa St, San Francisco, CA 94122",San Francisco,CA,1278,La Playa,St,94122,2030,2,SF District 2,2030,2 - Outer Sunset,12/3/15,144,1250000,4/25/16,1075000,9,3.0,4,T,Per Tax Records,2437,0.0689,3000,1947,RM1,RGLR,0,"ATCH,GARG,ATDR,INAC",ONST,1,2BLK,1BLK,0,0
2,-122.50929,37.762608,23.21,"1278 La Playa St, San Francisco, CA 94122",San Francisco,CA,1278,La Playa,St,94122,2030,2,SF District 2,2030,2 - Outer Sunset,5/18/17,36,1395000,8/17/17,1525000,9,5.0,5,D,Per Architect,2597,0.0689,3000,1947,RM1,RGLR,0,"ATCH,GARG,ATDR,INAC",ONST,1,2BLK,1BLK,0,0
3,-122.50924,37.77733,189.11,"590 48th Ave, San Francisco, CA 94121",San Francisco,CA,590,48th,Ave,94121,1050,1,SF District 1,1050,1 - Outer Richmond,6/27/10,42,725000,8/20/10,715000,5,1.0,2,T,Per Tax Records,1312,,0,1939,RH1,RGLR,"PVDW,PVSW",GARG,0,2,3BLK,1BLK,"ATAC,2STR,FIXR","WATR,OCEN,PARK"
4,-122.50919,37.776695,175.89,"618 48th Ave, San Francisco, CA 94121",San Francisco,CA,618,48th,Ave,94121,1050,1,SF District 1,1050,1 - Outer Richmond,6/7/12,24,1595000,7/13/12,1595000,8,4.0,4,T,Per Tax Records,3307,,0,1951,,RGLR,PVDW,"ATCH,GARG,ATDR,INAC",0,3,4BLK,2BLK,3STR,"PNRM,CTYL,OCEN,PARK"


In [100]:
# Map MLS neighborhood code (e.g. 1010) to GeoDataFrame neighborhood id (e.g. '1a') to create the sf_data for all 10 years
neighborhood_dict = {1010: '1a', 1020: '1b', 1030: '1c', 1040: '1d', 1050: '1e', 1060: '1f', 1070: '1g',
                     2010: '2a', 2020: '2b', 2030: '2c', 2040: '2d', 2050: '2e', 2060: '2f', 2070: '2g',
                     3010: '3a', 3020: '3b', 3030: '3c', 3040: '3d', 3050: '3e', 3060: '3f', 3070: '3g', 3080: '3h', 3090: '3j',
                     4010: '4a', 4020: '4b', 4030: '4c', 4040: '4d', 4050: '4e', 4060: '4f', 4070: '4g', 4080: '4h', 4090: '4j', 4100: '4k', 4110: '4m', 4120: '4n', 4130: '4p', 4140: '4r', 4150: '4s', 4160: '4t',
                     5010: '5a', 5020: '5b', 5030: '5c', 5040: '5d', 5050: '5e', 5060: '5f', 5070: '5g', 5080: '5h', 5090: '5j', 5100: '5k', 5110: '5m',
                     6010: '6a', 6020: '6b', 6030: '6c', 6040: '6d', 6050: '6e', 6060: '6f',
                     7010: '7a', 7020: '7b', 7030: '7c', 7040: '7d', 
                     8010: '8a', 8020: '8b', 8030: '8c', 8040: '8d', 8050: '8e', 8060: '8f', 8070: '8g', 8080: '8h', 8090: '8j',
                     9010: '9a', 9020: '9c', 9030: '9d', 9040: '9e', 9050: '9f', 9060: '9h', 9070: '9j', 9080: '9g', 
                     10010: '10a', 10020: '10b', 10030: '10c', 10040: '10d', 10050: '10e', 10060: '10f', 10070: '10g', 10080: '10h', 10090: '10j', 10100: '10k', 10110: '10m', 10120: '10n'
                    }

# Map subdist_no for sf_data and results_data
sf_data = temp_data.replace({'subdist_no': neighborhood_dict})
results_data = results_data.replace({'subdist_no': neighborhood_dict})
results_data.head()

Unnamed: 0.1,Unnamed: 0,full_address,neighborhood,subdist_no,sale_price,prediction,difference,pred_percent,shap
0,0,618 48th Ave,Outer Richmond,1e,3310000,3306051,-3948,-0.001194,Pros - 1: comp_price = 3605916 2: year_sold = ...
1,1,1516 Great Highway,Outer Sunset,2c,1830000,1614575,-215424,-0.133425,Pros - 1: year_sold = 2018 2: baths = 3.0 Co...
2,2,1492 La Playa St,Outer Sunset,2c,1300000,1090913,-209086,-0.191661,Pros - 1: year_sold = 2018 2: park_leased = ON...
3,3,784 48th Ave,Outer Richmond,1e,1300000,1493652,193652,0.12965,Pros - 1: year_sold = 2018 2: latitude = 37.77...
4,4,525 47th Ave,Outer Richmond,1e,1865000,1732993,-132006,-0.076172,Pros - 1: year_sold = 2018 2: latitude = 37.77...


In [101]:
# Create a year column based on the sale date
sf_data['year'] = '20' + sf_data['sale_date'].str[-2:]
sf_data['year'] = sf_data['year'].astype(int)
sf_data.head(5)

# Create a price_sf column
sf_data['price_sf'] = sf_data['sale_price'] / sf_data['sf']
sf_data.head(5)

Unnamed: 0,longitude,latitude,elevation,full_address,city,state,street_no,street_name,street_suffix,zip,area,district_no,district_desc,subdist_no,neighborhood,on_market_date,cdom,orig_list_price,sale_date,sale_price,rooms,baths,beds,sf_source,sf_source_decs,sf,lot_acres,lot_sf,year_built,zoning,lot_desc,drive_side,parking,park_leased,num_parking,shopping,transportation,type,views,year,price_sf
0,-122.50965,37.78028,200.83,"2645 El Camino Del Mar, San Francisco, CA 94121",San Francisco,CA,2645,El Camino Del Mar,,94121,1050,1,SF District 1,1e,1 - Outer Richmond,3/14/13,4,1095000,3/22/13,1260000,8,3.5,4,T,Per Tax Records,2691,,0,1969,RH2,"RGLR,FNCD","PVDW,PVSW","ATCH,GARG",0,2,4BLK,1BLK,3STR,"PNRM,OCEN,PARK,GRDN",2013,468.227425
1,-122.50929,37.762608,23.21,"1278 La Playa St, San Francisco, CA 94122",San Francisco,CA,1278,La Playa,St,94122,2030,2,SF District 2,2c,2 - Outer Sunset,12/3/15,144,1250000,4/25/16,1075000,9,3.0,4,T,Per Tax Records,2437,0.0689,3000,1947,RM1,RGLR,0,"ATCH,GARG,ATDR,INAC",ONST,1,2BLK,1BLK,0,0,2016,441.116126
2,-122.50929,37.762608,23.21,"1278 La Playa St, San Francisco, CA 94122",San Francisco,CA,1278,La Playa,St,94122,2030,2,SF District 2,2c,2 - Outer Sunset,5/18/17,36,1395000,8/17/17,1525000,9,5.0,5,D,Per Architect,2597,0.0689,3000,1947,RM1,RGLR,0,"ATCH,GARG,ATDR,INAC",ONST,1,2BLK,1BLK,0,0,2017,587.216018
3,-122.50924,37.77733,189.11,"590 48th Ave, San Francisco, CA 94121",San Francisco,CA,590,48th,Ave,94121,1050,1,SF District 1,1e,1 - Outer Richmond,6/27/10,42,725000,8/20/10,715000,5,1.0,2,T,Per Tax Records,1312,,0,1939,RH1,RGLR,"PVDW,PVSW",GARG,0,2,3BLK,1BLK,"ATAC,2STR,FIXR","WATR,OCEN,PARK",2010,544.969512
4,-122.50919,37.776695,175.89,"618 48th Ave, San Francisco, CA 94121",San Francisco,CA,618,48th,Ave,94121,1050,1,SF District 1,1e,1 - Outer Richmond,6/7/12,24,1595000,7/13/12,1595000,8,4.0,4,T,Per Tax Records,3307,,0,1951,,RGLR,PVDW,"ATCH,GARG,ATDR,INAC",0,3,4BLK,2BLK,3STR,"PNRM,CTYL,OCEN,PARK",2012,482.310251


In [102]:
high_cutoff = 2018
sf_data  = sf_data[sf_data['year'] >= high_cutoff]
print(sf_data.shape)
sf_data.head()

(1879, 41)


Unnamed: 0,longitude,latitude,elevation,full_address,city,state,street_no,street_name,street_suffix,zip,area,district_no,district_desc,subdist_no,neighborhood,on_market_date,cdom,orig_list_price,sale_date,sale_price,rooms,baths,beds,sf_source,sf_source_decs,sf,lot_acres,lot_sf,year_built,zoning,lot_desc,drive_side,parking,park_leased,num_parking,shopping,transportation,type,views,year,price_sf
5,-122.50919,37.776695,175.89,"618 48th Ave, San Francisco, CA 94121-2410",San Francisco,CA,618,48th,Ave,94121,1050,1,SF District 1,1e,1 - Outer Richmond,5/17/18,13,2749000,6/21/18,3310000,8,4.0,4,S,Per Graphic Artist,3735,0.0909,3959,1951,,0,0,"ATCH,GARG,ATDR,INAC",ONST,3,3BLK,1BLK,2STR,"PNRM,CTYL,WATR,SFRN,OCEN,PARK,GRDN,TWNP",2018,886.211513
9,-122.509056,37.75801,23.01,"1516 Great Highway, San Francisco, CA 94122",San Francisco,CA,1516,Great Highway,,94122,2030,2,SF District 2,2c,2 - Outer Sunset,4/30/18,15,1298000,5/23/18,1830000,7,3.0,4,T,Per Tax Records,2120,0.0705,3072,1948,RM1,"RGLR,OCNF",0,GARG,ONST,1,2BLK,2BLK,FULL,GRDN,2018,863.207547
17,-122.50902,37.758636,22.7,"1492 La Playa St, San Francisco, CA 94122",San Francisco,CA,1492,La Playa,St,94122,2030,2,SF District 2,2c,2 - Outer Sunset,6/14/18,19,895000,7/18/18,1300000,0,1.0,2,A,Per Appraiser,1087,0.0286,1245,1944,RM1,0,"PVDW,PVSW",ATCH,ONST,1,1BLK,1BLK,0,0,2018,1195.952162
23,-122.50898,37.77368,40.08,"784 48th Ave, San Francisco, CA 94121",San Francisco,CA,784,48th,Ave,94121,1050,1,SF District 1,1e,1 - Outer Richmond,3/5/18,3,1100000,4/9/18,1300000,6,1.5,2,T,Per Tax Records,1326,0.0619,2696,1942,RH2,"RGLR,LEVL,FNCD","PVDW,PVSW",GARG,ONST,2,1BLK,1BLK,"JR,2STR",0,2018,980.392157
57,-122.50827,37.778465,225.66,"525 47th Ave, San Francisco, CA 94121",San Francisco,CA,525,47th,Ave,94121,1050,1,SF District 1,1e,1 - Outer Richmond,4/26/18,5,1588000,5/22/18,1865000,6,2.0,3,T,Per Tax Records,1800,0.0689,3000,1931,RH1,RGLR,"PVDW,PVSW","ATCH,GARG,ATDR,INAC",ONST,2,0,1BLK,"FULL,2STR",PRTL,2018,1036.111111


### Create the Summary Data by Neighborhood

In [103]:
# Function to calculate the monthly housing payment (PITI)
def minimum_income(median_price):
    int_rate = .04
    term = 30
    down_pmt = .20
    principal_pmt = median_price * (1 - down_pmt)
    
    # Calculate insurance and taxes
    tax_pmt = (median_price * .01) / 12
    insurance_pmt = (median_price * .0038) / 12
    
    # monthly rate from annual percentage rate
    interest_rate = int_rate/(100 * 12)
    
    # total number of payments
    payment_num = term * 12
    
    # calculate monthly mortgage payment
    mortgage_pmt = principal_pmt * (interest_rate * (math.pow((1 + interest_rate), (payment_num))) / 
                               (math.pow((1 + interest_rate), (payment_num)) - 1))
    payment = mortgage_pmt + tax_pmt + insurance_pmt
    min_income = (payment / .30) * 12
    
    return min_income

neighborhood_data = sf_data.groupby(
    ['year', 'subdist_no']
).agg(
    {
    'sale_price': ['count', 'mean', 'median'],
    'sf': ['mean'],
    'price_sf': ['mean']
    }
)

#Reset the index to 1 level to fill in year
neighborhood_data = neighborhood_data.set_axis(neighborhood_data.columns.map('_'.join), axis=1, inplace=False)
neighborhood_data = neighborhood_data.reset_index(level=[0,1])

# Change data types to integer for price_sf and year
neighborhood_data = neighborhood_data.astype({'sale_price_mean': 'int'})
neighborhood_data = neighborhood_data.astype({'sale_price_median': 'int'})
neighborhood_data = neighborhood_data.astype({'sf_mean': 'int'})
neighborhood_data = neighborhood_data.astype({'price_sf_mean': 'int'})
neighborhood_data = neighborhood_data.astype({'year': 'int'})

# Add the monthly income required to buy the median house in the neighborhood
neighborhood_data['min_income'] = neighborhood_data.apply(lambda x: minimum_income(x['sale_price_median']), axis=1)
neighborhood_data = neighborhood_data.astype({'min_income': 'int'})

neighborhood_data.head()

Unnamed: 0,year,subdist_no,sale_price_count,sale_price_mean,sale_price_median,sf_mean,price_sf_mean,min_income
0,2018,10a,65,950138,880000,1438,684,119173
1,2018,10b,38,1151873,1147500,1557,783,155399
2,2018,10c,81,1115025,1080000,1369,864,146258
3,2018,10d,30,1004051,981000,1437,724,132851
4,2018,10e,59,911875,900000,1310,755,121882


### Prepare the mapping data and GeoDataFrame

In [104]:
# Read the geojson map file for Realtor Neighborhoods into a GeoDataframe object
sf = geopandas.read_file('https://raw.githubusercontent.com/JimKing100/SF_Real_Estate_Live/master/data/Realtor%20Neighborhoods.geojson')

# Set the Coordinate Referance System (crs) for projections
# ESPG code 4326 is also referred to as WGS84 lat-long projection
sf.crs = {'init': 'epsg:4326'}

# Rename columns in geojson map file
sf = sf.rename(columns={'geometry': 'geometry','nbrhood':'neighborhood_name', 'nid': 'subdist_no'}).set_geometry('geometry')

# Change neighborhood id (subdist_no) for correct code for Mount Davidson Manor and for parks
sf.loc[sf['neighborhood_name'] == 'Mount Davidson Manor', 'subdist_no'] = '4n'
sf.loc[sf['neighborhood_name'] == 'Golden Gate Park', 'subdist_no'] = '12a'
sf.loc[sf['neighborhood_name'] == 'Presidio', 'subdist_no'] = '12b'
sf.loc[sf['neighborhood_name'] == 'Lincoln Park', 'subdist_no'] = '12c'

sf.sort_values(by=['subdist_no'])
sf.head()

Unnamed: 0,neighborhood_name,subdist_no,sfar_distr,geometry
0,Alamo Square,6e,District 6 - Central North,"MULTIPOLYGON (((-122.42948 37.77510, -122.4310..."
1,Anza Vista,6a,District 6 - Central North,"MULTIPOLYGON (((-122.44746 37.77986, -122.4473..."
2,Balboa Terrace,4a,District 4 - Twin Peaks West,"MULTIPOLYGON (((-122.46451 37.73221, -122.4650..."
3,Bayview,10a,District 10 - Southeast,"MULTIPOLYGON (((-122.38759 37.75026, -122.3874..."
4,Bernal Heights,9a,District 9 - Central East,"MULTIPOLYGON (((-122.40375 37.74919, -122.4046..."


### Create colorbar formatting lookup table

In [105]:
# This dictionary contains the formatting for the data in the plots
format_data = [('sale_price_count', 0, 100,'0,0', 'Number of Sales'),
               ('sale_price_mean', 500000, 4000000,'$0,0', 'Average Sales Price'),
               ('sale_price_median', 500000, 4000000, '$0,0', 'Median Sales Price'),
               ('sf_mean', 500, 5000,'0,0', 'Average Square Footage'),
               ('price_sf_mean', 0, 2000,'$0,0', 'Average Price Per Square Foot'),
               ('min_income', 50000, 600000,'$0,0', 'Minimum Income Required')
              ]
 
#Create a DataFrame object from the dictionary 
format_df = pd.DataFrame(format_data, columns = ['field' , 'min_range', 'max_range' , 'format', 'verbage'])
format_df.head(10)

Unnamed: 0,field,min_range,max_range,format,verbage
0,sale_price_count,0,100,00,Number of Sales
1,sale_price_mean,500000,4000000,"$0,0",Average Sales Price
2,sale_price_median,500000,4000000,"$0,0",Median Sales Price
3,sf_mean,500,5000,00,Average Square Footage
4,price_sf_mean,0,2000,"$0,0",Average Price Per Square Foot
5,min_income,50000,600000,"$0,0",Minimum Income Required


### Create the Interactive Plot

In [0]:
# Create a function the returns json_data for the year selected by the user
def json_data(selectedYear):
    yr = selectedYear
    
    # Pull selected year from neighborhood summary data
    df_yr = neighborhood_data[neighborhood_data['year'] == yr]
    
    # Merge the GeoDataframe object (sf) with the neighborhood summary data (neighborhood)
    merged = pd.merge(sf, df_yr, on='subdist_no', how='left')
    
    # Fill the null values
    values = {'year': yr, 'sale_price_count': 0, 'sale_price_mean': 0, 'sale_price_median': 0,
              'sf_mean': 0, 'price_sf_mean': 0, 'min_income': 0}
    merged = merged.fillna(value=values)
    
    # Bokeh uses geojson formatting, representing geographical features, with json
    # Convert to json
    merged_json = json.loads(merged.to_json())
    
    # Convert to json preferred string-like object 
    json_data = json.dumps(merged_json)
    return json_data
    
# Create a plotting function
def make_plot(field_name):    
  # Set the format of the colorbar
  min_range = format_df.loc[format_df['field'] == field_name, 'min_range'].iloc[0]
  max_range = format_df.loc[format_df['field'] == field_name, 'max_range'].iloc[0]
  field_format = format_df.loc[format_df['field'] == field_name, 'format'].iloc[0]

  # Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
  color_mapper = LinearColorMapper(palette = palette, low = min_range, high = max_range)

  # Create color bar.
  format_tick = NumeralTickFormatter(format=field_format)
  color_bar = ColorBar(color_mapper=color_mapper, label_standoff=18, formatter=format_tick,
  border_line_color=None, location = (0, 0))

  # Create figure object.
  verbage = format_df.loc[format_df['field'] == field_name, 'verbage'].iloc[0]

  p = figure(title = verbage + ' by Neighborhood for Homes in SF 2018 - Click Neighborhood on Map, Click Property on Table', 
             plot_height = 650, plot_width = 850,
             toolbar_location = None)
  p.xgrid.grid_line_color = None
  p.ygrid.grid_line_color = None
  p.axis.visible = False

  # Add patch renderer to figure. 
  p.patches('xs','ys', source = geosource, fill_color = {'field' : field_name, 'transform' : color_mapper},
          line_color = 'black', line_width = 0.25, fill_alpha = 1)
  
  # Specify color bar layout.
  p.add_layout(color_bar, 'right')

  # Add the hover tool to the graph
  p.add_tools(tap, hover)
  
  return p

# On change of source (datatable selection by mouse-click) fill the line items with values by property address
def function_source(attr, old, new):
    try:
        selected_index = source.selected.indices[0]
        table_address.value = str(source.data['full_address'][selected_index])
        table_actual.value = '${:,}'.format((source.data['sale_price'][selected_index]))
        table_predicted.value = '${:,}'.format((source.data['prediction'][selected_index]))
        table_diff.value = '${:,}'.format(source.data['difference'][selected_index])
        table_percent.value = '{0:.0%}'.format((source.data['pred_percent'][selected_index]))
        table_shap.value = source.data['shap'][selected_index]
        
    except IndexError:
        pass

# On change of geosource (neighborhood selection by mouse-click) fill the datatable with nieghborhood sales     
def function_geosource(attr, old, new):
    try:
        selected_index = geosource.selected.indices[0]
        tap_neighborhood.value = sf.iloc[selected_index]['neighborhood_name'] 
        subdist = sf.iloc[selected_index]['subdist_no']
        hood = tap_neighborhood.value
        
        view1 = CDSView(source=source, filters=[GroupFilter(column_name='subdist_no', group=subdist)])
        columns = [TableColumn(field = 'full_address', title = 'Address')]
  
        data_table = DataTable(source = source, view = view1, columns = columns, width = 280, height = 280, editable = False)
        table_address.value = ''
        table_actual.value = ''
        table_predicted.value = ''
        table_diff.value = ''
        table_percent.value = ''
        table_shap.value = ''

        # Replace the updated datatable in the layout
        layout.children[1] = column(tap_neighborhood, data_table, table_address, table_actual, table_predicted,
                                    table_diff, table_percent)
       
    except IndexError:
        pass

### Start of Main Program
    
# Input geojson source that contains features for plotting for:
# initial year 2018 and initial criteria sale_price_median
geosource = GeoJSONDataSource(geojson = json_data(2018))
original_geosource = geosource
input_field = 'sale_price_mean'

# Initialize the datatable - set datatable source, set intial neighborhood, set initial view by neighborhhood, set columns
source = ColumnDataSource(results_data)
hood = 'Bernal Heights'
subdist = '9a'
view1 = CDSView(source=source, filters=[GroupFilter(column_name='subdist_no', group=subdist)])
columns = [TableColumn(field = 'full_address', title = 'Address')]

# Define a sequential multi-hue color palette.
palette = brewer['Blues'][8]

# Reverse color order so that dark blue is highest obesity.
palette = palette[::-1]

#Add hover tool to view neighborhood stats
hover = HoverTool(tooltips = [ ('Neighborhood','@neighborhood_name'),
                               ('# Sales', '@sale_price_count'),
                               ('Average Price', '$@sale_price_mean{,}'),
                               ('Median Price', '$@sale_price_median{,}'),
                               ('Average SF', '@sf_mean{,}'),
                               ('Price/SF ', '$@price_sf_mean{,}'),
                               ('Income Needed', '$@min_income{,}')])

# Add tap tool to select neighborhood on map
tap = TapTool()

# Call the plotting function
p = make_plot(input_field)

# Load the datatable, neighborhood, address, actual price, predicted price and difference for display
data_table = DataTable(source = source, view = view1, columns = columns, width = 280, height = 280, editable = False)
tap_neighborhood = TextInput(value = hood, title = 'Neighborhood')
table_address = TextInput(value = '', title = 'Address')
table_actual = TextInput(value = '', title = 'Actual Sale Price')
table_predicted = TextInput(value = '', title = 'Predicted Sale Price')
table_diff = TextInput(value = '', title = 'Difference')
table_percent = TextInput(value = '', title = 'Error Percentage')
table_shap = TextInput(value = '', title = 'Impact Features (SHAP Values)')

# On change of source (datatable selection by mouse-click) fill the line items with values by property address
source.selected.on_change('indices', function_source)

# On change of geosource (neighborhood selection by mouse-click) fill the datatable with nieghborhood sales
geosource.selected.on_change('indices', function_geosource)

# Layout the components with the plot in row postion (0) and the other components in a column in row position (1)
layout = row(column(p, table_shap), column(tap_neighborhood, data_table, table_address,
              table_actual, table_predicted, table_diff, table_percent))

# Add the layout to the current document
curdoc().add_root(layout)

# Use the following code to test in a notebook
# Interactive features will not show in notebook
#output_notebook()
#show(p)