In [None]:
'''
dataset_ids = [{'name':'Seating','dataset_id':'dyqx-cfn5', 'desc':'Café, restaurant, bistro seats'},
               {'name':'Dwellings','dataset_id':'rm92-h5tq', 'desc':'Residential dwellings'},
               {'name':'Employment','dataset_id':'qnju-it8g', 'desc':'Employment per CLUE industry for blocks'}]

for dataset_id in dataset_ids:
    print(dataset_id['dataset_id'])
'''

# MELBOURNE OPEN DATA PLAYGROUND
# New Business Location (Cafe or Restaurant)

## Package/Library Imports

In [1]:
import os
import time

from datetime import datetime
import numpy as np
import pandas as pd
from sodapy import Socrata
import plotly.express as px

## Connect to Melbourne Open Data Portal using SodaPy

In [2]:
apptoken = os.environ.get("SODAPY_APPTOKEN") # Anonymous app token
domain = "data.melbourne.vic.gov.au"
client = Socrata(domain, apptoken) # Open Dataset connection



## Review Venue Seating Data

In [None]:
dataresource = client.get_all('dyqx-cfn5')
dataset = pd.DataFrame.from_dict(dataresource)
print(f'The shape of dataset is {dataset.shape}.')
print('Below are the first 3 rows of this dataset:')
dataset.head(3).T

## Calculate Venue Seating by Location

In [None]:
#######################################################
# Cast datatypes to enable aggregation
integer_columns = ['census_year', 'block_id', 'property_id', 'base_property_id', 'industry_anzsic4_code', 'number_of_seats']
fp_columns = ['x_coordinate', 'y_coordinate']

dataset[integer_columns] = dataset[integer_columns].astype(int)
dataset[fp_columns] = dataset[fp_columns].astype(float)
dataset = dataset.convert_dtypes() # convert remaining to string

#######################################################
# Aggregate data
groupbyfields = ['clue_small_area','block_id','y_coordinate','x_coordinate']
aggregatebyfields = {'number_of_seats': ["sum"]}

seatsByLocn = pd.DataFrame(dataset.groupby(groupbyfields, as_index=False).agg(aggregatebyfields))
seatsByLocn.columns = seatsByLocn.columns.map(''.join) # flatten column header
seatsByLocn.rename(columns={'clue_small_area': 'clue_area'}, inplace=True) #rename to match GeoJSON extract
seatsByLocn.rename(columns={'number_of_seatssum': 'number_of_seats'}, inplace=True) #rename to match GeoJSON extract

seatsByLocn['number_of_seats'] = seatsByLocn['number_of_seats'].astype(int)
seatsByLocn.head(5)

## Show Venue Seating as a Scatter Map

In [None]:
# Plot Venue seating using Scatter Map
fig = px.scatter_mapbox(seatsByLocn, lat="y_coordinate", lon="x_coordinate", size="number_of_seats",
                        mapbox_style="stamen-toner",
                        zoom=12.5,
                        center = {"lat": -37.813, "lon": 144.945},
                        opacity=0.70,
                        hover_name="clue_area",
                        hover_data={"block_id":False,"number_of_seats":True,"x_coordinate":True,"y_coordinate":True},
                        color_discrete_sequence=['red'],
                        labels={'number_of_seats':'Number of Seats', 'y_coordinate':'Lat','x_coordinate':'Long'},
                        width=950, height=800)
fig.show()

## Get Block Polygon data in GeoJSON format for Choropleth Mapping

Load the CLUE Blocks in GeoJSON format and verify the location keys.

In [None]:
from urllib.request import urlopen
import json

geoJSON_Id = 'aia8-ryiq' # Melbourne CLUE Block polygons in GeoJSON format

GeoJSONURL = 'https://'+domain+'/api/geospatial/'+geoJSON_Id+'?method=export&format=GeoJSON'
with urlopen(GeoJSONURL) as response:
    block = json.load(response)
    
block["features"][0]['properties'].keys()

## Combined Chloropleth & Scatter

In [None]:
# Get residential dwelling density
dataresource2 = client.get_all('rm92-h5tq') # Melbourne CLUE Residential Dwellings
dataset2 = pd.DataFrame.from_dict(dataresource2)

# cast datatypes
dataset2[['census_year', 'dwelling_number']] = dataset2[['census_year', 'dwelling_number']].astype(int)
dataset2[['x_coordinate', 'y_coordinate']] = dataset2[['x_coordinate', 'y_coordinate']].astype(float)
dataset2 = dataset2.convert_dtypes() # convert remaining to string
dataset2.dtypes

# create aggregate dataset
groupbyfields = ['block_id','clue_small_area']
aggregatebyfields = {'dwelling_number': ["sum"]}

dwellingsByBlock = pd.DataFrame(dataset2.groupby(groupbyfields, as_index=False).agg(aggregatebyfields))
dwellingsByBlock.columns = dwellingsByBlock.columns.map(''.join) # flatten column header
dwellingsByBlock.rename(columns={'clue_small_area': 'clue_area'}, inplace=True) #rename to match GeoJSON extract
dwellingsByBlock.rename(columns={'dwelling_numbersum': 'dwelling_count'}, inplace=True) #rename to match GeoJSON extract
dwellingsByBlock.head(10)

# Plot residential density using Choropleth Map
range_max = dwellingsByBlock['dwelling_count'].max()

fig = px.choropleth_mapbox(dwellingsByBlock, geojson=block, locations='block_id', color='dwelling_count',
                           color_continuous_scale=["#FFFF88", "yellow", "orange", "orange",
                                                   "orange", "darkorange", "red", "darkred"],
                           range_color=(0, range_max),
                           featureidkey="properties.block_id",
                           mapbox_style="stamen-toner", #"carto-positron",
                           zoom=12.5,
                           center = {"lat": -37.813, "lon": 144.945},
                           opacity=0.5,
                           hover_name='clue_area',
                           hover_data={'block_id':True,'dwelling_count':True},
                           labels={'dwelling_count':'Number of Dwellings','block_id':'CLUE Block Id'},
                           title='New Business Location',
                           width=950, height=800
                          )

# Plot Venue seating using Scatter Map
fig2 = px.scatter_mapbox(seatsByLocn, lat="y_coordinate", lon="x_coordinate", size="number_of_seats",
                        mapbox_style="stamen-toner",
                        zoom=12.5,
                        center = {"lat": -37.813, "lon": 144.945},
                        opacity=0.70,
                        hover_name="clue_area",
                        hover_data={"block_id":False,"number_of_seats":True,"x_coordinate":True,"y_coordinate":True},
                        color_discrete_sequence=['black'],
                        labels={'number_of_seats':'Number of Seats', 'y_coordinate':'Lat','x_coordinate':'Long'},
                        width=950, height=800)
fig.add_trace(fig2.data[0])
fig.update_geos(fitbounds="locations", visible=False)

fig.show()