# Test data loading

In [13]:
from bokeh.io import output_notebook
output_notebook()

## Bokeh Demo

In [3]:
from bokeh.palettes import HighContrast3
from bokeh.plotting import figure, show

fruits = ["Apples", "Pears", "Nectarines", "Plums", "Grapes", "Strawberries"]
years = ["2015", "2016", "2017"]

data = {"fruits": fruits, "2015": [2, 1, 4, 3, 2, 4], "2016": [5, 3, 4, 2, 4, 6], "2017": [3, 2, 4, 4, 5, 3]}

p = figure(x_range=fruits, height=250, title="Fruit Counts by Year", toolbar_location=None, tools="hover", tooltips="$name @fruits: @$name")

p.vbar_stack(years, x="fruits", width=0.9, color=HighContrast3, source=data, legend_label=years)

p.y_range.start = 0
p.x_range.range_padding = 0.1
p.xgrid.grid_line_color = None
p.axis.minor_tick_line_color = None
p.outline_line_color = None
p.legend.location = "top_left"
p.legend.orientation = "horizontal"

show(p)

In [6]:
import json

with open('data/cleaned_business.json', 'r', encoding='utf-8') as file:
    businesses = json.load(file)

print(businesses[0])
print(businesses[0]['name'])

postal_codes = []
for business in businesses:
    postal_codes.append(business['postal_code'])

print(postal_codes[0:10])

{'business_id': 'MTSW4McQd7CbVtyjqoe9mw', 'name': 'St Honore Pastries', 'address': '935 Race St', 'city': 'Philadelphia', 'state': 'PA', 'postal_code': '19107', 'latitude': 39.9555052, 'longitude': -75.1555641, 'stars': 4.0, 'review_count': 80, 'is_open': 1, 'attributes': {'RestaurantsDelivery': 'False', 'OutdoorSeating': 'False', 'BusinessAcceptsCreditCards': 'False', 'BusinessParking': "{'garage': False, 'street': True, 'validated': False, 'lot': False, 'valet': False}", 'BikeParking': 'True', 'RestaurantsPriceRange2': '1', 'RestaurantsTakeOut': 'True', 'ByAppointmentOnly': 'False', 'WiFi': "u'free'", 'Alcohol': "u'none'", 'Caters': 'True'}, 'categories': 'Restaurants, Food, Bubble Tea, Coffee & Tea, Bakeries', 'hours': {'Monday': '7:0-20:0', 'Tuesday': '7:0-20:0', 'Wednesday': '7:0-20:0', 'Thursday': '7:0-20:0', 'Friday': '7:0-21:0', 'Saturday': '7:0-21:0', 'Sunday': '7:0-21:0'}}
St Honore Pastries
['19107', '19106', '19147', '19147', '19127', '19123', '19146', '19104', '19152', '19

In [7]:
from collections import Counter
from bokeh.plotting import figure, show
from bokeh.transform import factor_cmap
from bokeh.palettes import Spectral6


postal_codes.sort()
postal_code_counts = Counter(postal_codes)
codes = list(postal_code_counts.keys())
counts = list(postal_code_counts.values())
print(codes)

p = figure(x_range=codes, height=400, title="Distribution of Postal Codes", toolbar_location=None,tools="")

p.vbar(x=codes, top=counts, width=0.9, color="#CAB2D6")
p.xgrid.grid_line_color = None
p.y_range.start = 0
p.xaxis.axis_label = "Postal Codes"
p.yaxis.axis_label = "Frequency"
p.xaxis.major_label_orientation = 1.3 


show(p)

['', '08102', '08340', '18976', '19003', '19004', '19006', '19010', '19012', '19014', '19019', '19020', '19023', '19027', '19046', '19072', '19087', '19090', '19092', '19093', '19096', '19099', '19100', '19101', '19102', '19103', '19104', '19106', '19107', '19108', '19109', '19110', '19111', '19112', '19113', '19114', '19115', '19116', '19117', '19118', '19119', '19120', '19121', '19122', '19123', '19124', '19125', '19126', '19127', '19128', '19129', '19130', '19131', '19132', '19133', '19134', '19135', '19136', '19137', '19138', '19139', '19140', '19141', '19142', '19143', '19144', '19145', '19146', '19147', '19148', '19149', '19150', '19151', '19152', '19153', '19154', '19155', '19160', '19176', '19192', '19195', '19341', '19401', '19406', '19428', '19444', '19446', '19454', '43215', '55101']


## Map

In [None]:
#import data





In [80]:
import pandas as pd
from bokeh.plotting import figure, show
from pyproj import Proj, Transformer, CRS
import xyzservices.providers as xyz
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, CustomJS
from bokeh.models.glyphs import Scatter
from bokeh.transform import linear_cmap
from bokeh.util.hex import hexbin

output_notebook()

# load data and extract relevant columns and sample
df = pd.read_csv("data/cleaned_businessV2.csv")
#df = df.sample(100)
df = df[["name", "latitude", "longitude"]]


# calculate web mercator coordinates
in_proj = CRS.from_epsg(4326)   # WGS84
out_proj = CRS.from_epsg(3857)  # Web Mercator
transformer = Transformer.from_crs(in_proj, out_proj, always_xy=True)
def latlon_to_web_mercator(lat, lon):
    x, y = transformer.transform(lon, lat)
    return x, y
df['x'], df['y'] = zip(*df.apply(lambda x: latlon_to_web_mercator(x.latitude, x.longitude), axis=1))
max_x = df['x'].max()
min_x = df['x'].min()
max_y = df['y'].max()
min_y = df['y'].min()

# create plot
source = ColumnDataSource(df)
p = figure(title="Philadelphia Restaurants",
           x_axis_type="mercator", y_axis_type="mercator",
           x_range=(min_x, max_x), y_range=(min_y, max_y),
           width=800, height=600,
           tools="wheel_zoom,pan,reset, box_select, lasso_select")
p.add_tile(xyz.CartoDB.Positron)

p.grid.visible = False

bins = hexbin(df['x'], df['y'], size=500)
p.hex_tile(q="q", r="r",source = bins,
           size=500, line_color=None, fill_alpha=0.5,
           fill_color=linear_cmap('counts', 'Viridis256', 0, max(bins.counts)))


glyph = Scatter(x='x', y='y', size=3, fill_color='black', fill_alpha=0.1, line_color=None)
p.add_glyph(source, glyph)

source.selected.js_on_change('indices', CustomJS(args=dict(source=source), code="""
    const indices = cb_obj.indices;
    console.log('Number of selected points:', indices.length);
    """))

show(p)


In [None]:
import pandas as pd
from bokeh.plotting import figure, show
from pyproj import Proj, Transformer, CRS
import xyzservices.providers as xyz
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, CustomJS
from bokeh.models.glyphs import Scatter
from bokeh.transform import linear_cmap
from bokeh.util.hex import hexbin
from scipy.stats import gaussian_kde
import numpy as np
from bokeh.palettes import Blues9

output_notebook()

# load data and extract relevant columns and sample
df = pd.read_csv("data/cleaned_businessV2.csv")
#df = df.sample(100)
df = df[["name", "latitude", "longitude"]]


# calculate web mercator coordinates
in_proj = CRS.from_epsg(4326)   # WGS84
out_proj = CRS.from_epsg(3857)  # Web Mercator
transformer = Transformer.from_crs(in_proj, out_proj, always_xy=True)
def latlon_to_web_mercator(lat, lon):
    x, y = transformer.transform(lon, lat)
    return x, y
df['x'], df['y'] = zip(*df.apply(lambda x: latlon_to_web_mercator(x.latitude, x.longitude), axis=1))
max_x = df['x'].max()
min_x = df['x'].min()
max_y = df['y'].max()
min_y = df['y'].min()

# create plot
source = ColumnDataSource(df)
p = figure(title="Philadelphia Restaurants",
           x_axis_type="mercator", y_axis_type="mercator",
           x_range=(min_x, max_x), y_range=(min_y, max_y),
           width=800, height=600,
           tools="wheel_zoom,pan,reset, box_select, lasso_select")
p.add_tile(xyz.CartoDB.Positron)
p.grid.visible = False



# add kernel density estimation

def kde(x,y,N):
    X, Y = np.mgrid[min_x:max_x:N*1j, min_y:max_y:N*1j]
    positions = np.vstack([X.ravel(), Y.ravel()])
    values = np.vstack([x, y])
    kernel = gaussian_kde(values)
    Z = np.reshape(kernel(positions).T, X.shape)
    return X, Y, Z

density_x, density_y, density = kde(df['x'], df['y'], 100)
palette = Blues9[::-1]
levels = np.linspace(np.min(density), np.max(density), len(palette))

p.contour(density_x, density_y, density, levels=levels, fill_color=palette, line_color=None)


bins = hexbin(df['x'], df['y'], size=500)
p.hex_tile(q="q", r="r",source = bins,
           size=500, line_color=None, fill_alpha=0.5,
           fill_color=linear_cmap('counts', 'Viridis256', 0, max(bins.counts)))

# add points
glyph = Scatter(x='x', y='y', size=3, fill_color='black', fill_alpha=0.1, line_color=None)
p.add_glyph(source, glyph)

source.selected.js_on_change('indices', CustomJS(args=dict(source=source), code="""
    const indices = cb_obj.indices;
    console.log('Number of selected points:', indices.length);
    """))

show(p)


In [1]:
%pip install scipy

Collecting scipy
  Downloading scipy-1.14.1-cp312-cp312-win_amd64.whl.metadata (60 kB)
Downloading scipy-1.14.1-cp312-cp312-win_amd64.whl (44.5 MB)
   ---------------------------------------- 0.0/44.5 MB ? eta -:--:--
   --- ------------------------------------ 3.9/44.5 MB 19.6 MB/s eta 0:00:03
   ------- -------------------------------- 7.9/44.5 MB 20.3 MB/s eta 0:00:02
   ----------- ---------------------------- 12.6/44.5 MB 20.2 MB/s eta 0:00:02
   --------------- ------------------------ 16.8/44.5 MB 20.3 MB/s eta 0:00:02
   ------------------- -------------------- 21.2/44.5 MB 20.3 MB/s eta 0:00:02
   ---------------------- ----------------- 25.4/44.5 MB 20.4 MB/s eta 0:00:01
   -------------------------- ------------- 29.9/44.5 MB 20.8 MB/s eta 0:00:01
   ------------------------------ --------- 34.3/44.5 MB 20.8 MB/s eta 0:00:01
   ---------------------------------- ----- 38.8/44.5 MB 20.7 MB/s eta 0:00:01
   -------------------------------------- - 43.0/44.5 MB 20.9 MB/s eta 0:


[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip
