In [1]:
import os
import copy
import json
import vincent
import pandas as pd
import numpy as np
import feather
import folium
from folium import plugins
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
vincent.core.initialize_notebook()

import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_colwidth', 100)

In [2]:
DATA_PATH = '../data/'
df =  feather.read_dataframe(DATA_PATH + 'housing_with_location')
boston_geojson = os.path.join(DATA_PATH, '12_brook_districts')
with open(boston_geojson) as f:
    geo_json = f.read()

In [3]:
featured_districts = ['Brookline',
            'Cambridge',
            'East Boston',
            'Fitchburg',
             'Fall River',
             'Jamaica Plain',
             'Lowell',
             'Lynn',
             'Malden',
             'Medfold',
             'New Bedford',
            'Quincy',
             'Revere',
             'Somerville',
             'Watertown',
            
            ]

In [4]:
featured_df = df[df.location.isin(featured_districts)]

In [5]:
district_prices = featured_df.groupby('location')['price'].agg([np.mean, np.median])
district_prices.reset_index(inplace=True)

In [6]:
def popup_html(feature):
    html = f"<h4>{feature}</h4>"
    supp = featured_df[(featured_df.supply == True)&(featured_df.location == feature)].shape[0]
    dema = featured_df[(featured_df.supply == False)&(featured_df.location == feature)].shape[0]
    html += f"<b>Supply - {supp}</b> "
    html += f"<b>Demand - {dema}</b><br>"
    if supp:
        html += f"<b>Demand-Supply rate - {round(dema/supp,2)}</b><br>"
    html += f"<b>mean - {np.round(featured_df[featured_df.location == feature].price.mean())}$</b> "
    html += f"<b>median - {featured_df[featured_df.location == feature].price.median()}$</b><br>"
    return html

def make_bar_plot(feature):
    bins_count = 8
    bins, bins_range = pd.cut(featured_df[featured_df.location == feature].price, bins=bins_count, labels=False,retbins=True)
    b_range = list()
    for i in range(len(bins_range)-1):
        b_range.append((bins_range[i],bins_range[i+1]))
    b_range = [(int(b[0]) // 100)*100 for b in b_range]
    tmp = pd.DataFrame(featured_df[featured_df.location == feature].price)
    tmp['bin'] = bins
    agg_bins = tmp.groupby('bin').count()
    agg_bins.reset_index(inplace=True)
    for i in range(bins_count):
        if i not in agg_bins.bin.values:
            agg_bins = agg_bins.append(pd.Series({'bin':i, 'price':0}),ignore_index=True)
    agg_bins['range'] = [f'{b_range[b]}' for b in agg_bins['bin'].values]
    bar = vincent.Bar(agg_bins.sort_values('bin').set_index('range').price, width=250,
                                height=200)
    bar.axis_titles(x='price range', y='adverts')
    return bar.to_json()

In [7]:
m = folium.Map(location=[42.34878, -71.10445], zoom_start=10)
for dist in json.loads(geo_json)['features']:
    dist_name = dist['properties']['name']
    if featured_df[featured_df.location == dist_name].shape[0] < 3:
        continue
    gj = folium.GeoJson(data=dist, control=False, smooth_factor=0, tooltip=popup_html(dist_name))
    gj.add_to(m)
    popup = folium.Popup(popup_html(dist), max_width=500)
    bar_plot_json = make_bar_plot(dist_name)
    folium.Vega(bar_plot_json, width=350, height=250).add_to(popup)
    popup.add_to(gj)
    
m