In [1]:
%env SODAPY_APPTOKEN = HA1B7dCJquyL8zRLXZ2YlWvNQ
!pip install sodapy
!pip install -U plotly

env: SODAPY_APPTOKEN=HA1B7dCJquyL8zRLXZ2YlWvNQ
Collecting sodapy
  Downloading sodapy-2.1.0-py2.py3-none-any.whl (14 kB)
Installing collected packages: sodapy
Successfully installed sodapy-2.1.0
Collecting plotly
  Downloading plotly-5.4.0-py2.py3-none-any.whl (25.3 MB)
[K     |████████████████████████████████| 25.3 MB 1.5 MB/s 
Collecting tenacity>=6.2.0
  Downloading tenacity-8.0.1-py3-none-any.whl (24 kB)
Installing collected packages: tenacity, plotly
  Attempting uninstall: plotly
    Found existing installation: plotly 4.4.1
    Uninstalling plotly-4.4.1:
      Successfully uninstalled plotly-4.4.1
Successfully installed plotly-5.4.0 tenacity-8.0.1


In [2]:
import os
import pandas as pd
from sodapy import Socrata
import plotly.express as px
from urllib.request import urlopen
import json

##Loading data

In [24]:
#Fetching dataset
dataset_id = "dyqx-cfn5"
smallAreaLayer_id = "gei8-3w86"

apptoken = os.environ.get("SODAPY_APPTOKEN") # Anonymous app token
domain = "data.melbourne.vic.gov.au"
client = Socrata(domain, apptoken) # Open Dataset connection

dataresource = client.get_all(dataset_id)

In [25]:
dataset = pd.DataFrame(dataresource)
print(f'The shape of dataset is {dataset.shape}.')
print('Below are the first 3 rows of this dataset:')
dataset.head(3)

The shape of dataset is (3236, 13).
Below are the first 3 rows of this dataset:


Unnamed: 0,census_year,block_id,property_id,base_property_id,street_address,clue_small_area,trading_name,industry_anzsic4_code,industry_anzsic4_description,seating_type,number_of_seats,x_coordinate,y_coordinate
0,2020,1,611394,611394,545-557 Flinders Street MELBOURNE VIC 3000,Melbourne (CBD),551 Flinders Street MELBOURNE VIC 3000,4511,Cafes and Restaurants,Seats - Indoor,60,144.9565145,-37.82097941
1,2020,1,611394,611394,545-557 Flinders Street MELBOURNE VIC 3000,Melbourne (CBD),551 Flinders Street MELBOURNE VIC 3000,4511,Cafes and Restaurants,Seats - Outdoor,6,144.9565145,-37.82097941
2,2020,1,611394,611394,545-557 Flinders Street MELBOURNE VIC 3000,Melbourne (CBD),553 Flinders Street MELBOURNE VIC 3000,4512,Takeaway Food Services,Seats - Indoor,12,144.9565145,-37.82097941


##Preprocessing data

Check data types:

In [26]:
dataset.dtypes

census_year                     object
block_id                        object
property_id                     object
base_property_id                object
street_address                  object
clue_small_area                 object
trading_name                    object
industry_anzsic4_code           object
industry_anzsic4_description    object
seating_type                    object
number_of_seats                 object
x_coordinate                    object
y_coordinate                    object
dtype: object

In [27]:
dataset[['census_year','industry_anzsic4_code','block_id','number_of_seats']] = dataset[['census_year','industry_anzsic4_code','block_id','number_of_seats']].astype(int)
dataset[['x_coordinate', 'y_coordinate']] = dataset[['x_coordinate', 'y_coordinate']].astype(float)
dataset = dataset.convert_dtypes() # convert remaining to string
dataset.dtypes

census_year                       Int64
block_id                          Int64
property_id                      string
base_property_id                 string
street_address                   string
clue_small_area                  string
trading_name                     string
industry_anzsic4_code             Int64
industry_anzsic4_description     string
seating_type                     string
number_of_seats                   Int64
x_coordinate                    float64
y_coordinate                    float64
dtype: object

Check for null values:

In [28]:
print(dataset.isnull().sum())

census_year                     0
block_id                        0
property_id                     0
base_property_id                0
street_address                  0
clue_small_area                 0
trading_name                    0
industry_anzsic4_code           0
industry_anzsic4_description    0
seating_type                    0
number_of_seats                 0
x_coordinate                    0
y_coordinate                    0
dtype: int64


In [29]:
dataset[dataset['x_coordinate'].isnull()]

Unnamed: 0,census_year,block_id,property_id,base_property_id,street_address,clue_small_area,trading_name,industry_anzsic4_code,industry_anzsic4_description,seating_type,number_of_seats,x_coordinate,y_coordinate


In [30]:
dataset = dataset.dropna(axis=0)
print(dataset.isnull().sum())

census_year                     0
block_id                        0
property_id                     0
base_property_id                0
street_address                  0
clue_small_area                 0
trading_name                    0
industry_anzsic4_code           0
industry_anzsic4_description    0
seating_type                    0
number_of_seats                 0
x_coordinate                    0
y_coordinate                    0
dtype: int64


## Analysis and visualisation

In [31]:
groupbyfields = ['clue_small_area','block_id','y_coordinate','x_coordinate']
aggregatebyfields = {'number_of_seats': ["sum"]}

sumSeatsByLocn = pd.DataFrame(dataset.groupby(groupbyfields, as_index=False).agg(aggregatebyfields))
sumSeatsByLocn.columns = sumSeatsByLocn.columns.map(''.join) # flatten column header
sumSeatsByLocn.rename(columns={
    'clue_small_area': 'clue_area',
    'number_of_seatssum': 'number_of_seats'
    },
    inplace=True) #rename to match GeoJSON extract
sumSeatsByLocn['number_of_seats'] = sumSeatsByLocn['number_of_seats'].astype(int)
sumSeatsByLocn.head(10)

Unnamed: 0,clue_area,block_id,y_coordinate,x_coordinate,number_of_seats
0,Carlton,203,-37.796707,144.965534,51
1,Carlton,203,-37.79668,144.9649,42
2,Carlton,204,-37.797833,144.965174,50
3,Carlton,204,-37.797255,144.965754,120
4,Carlton,205,-37.79947,144.964893,96
5,Carlton,205,-37.799001,144.964765,80
6,Carlton,205,-37.798721,144.965257,41
7,Carlton,206,-37.800457,144.966558,51
8,Carlton,206,-37.800191,144.966716,140
9,Carlton,206,-37.800046,144.966741,115


Total Cafe and Restaurant seating capacity on Map:

In [32]:
fig = px.scatter_mapbox(sumSeatsByLocn, lat="y_coordinate", lon="x_coordinate", 
                        size="number_of_seats",
                        color="number_of_seats",
                        mapbox_style="stamen-toner", #"carto-positron",
                        zoom=12.5,
                        center = {"lat": -37.813, "lon": 144.945},
                        opacity=0.75,
                        hover_name="clue_area",
                        hover_data=["number_of_seats", "block_id"],
                        # color_discrete_sequence=['red'],
                        color_continuous_scale=px.colors.cyclical.IceFire,
                        labels={'number_of_seats':'Number of Seats','block_id':'CLUE Block Id'},
                        title='Venue Seats by Location for 2020',
                        width=950, height=800)
fig.show()

Total seating capacity by Small Area:

In [33]:
groupbyfields = ['clue_small_area']
aggregatebyfields = {'number_of_seats': ["sum"]}

totalSeatBySArea = pd.DataFrame(dataset.groupby(groupbyfields, as_index=False).agg(aggregatebyfields))
totalSeatBySArea.columns = totalSeatBySArea.columns.map(''.join) # flatten column header
totalSeatBySArea.rename(columns={
    'clue_small_area': 'clue_area',
    'number_of_seatssum': 'number_of_seats'
    },
    inplace=True) #rename to match GeoJSON extract
totalSeatBySArea['number_of_seats'] = totalSeatBySArea['number_of_seats'].astype(int)
totalSeatBySArea.head(10)

Unnamed: 0,clue_area,number_of_seats
0,Carlton,15177
1,Docklands,21585
2,East Melbourne,7181
3,Kensington,5709
4,Melbourne (CBD),88974
5,Melbourne (Remainder),8767
6,North Melbourne,4499
7,Parkville,3695
8,Port Melbourne,1251
9,South Yarra,810


In [34]:
GeoJSONURL = 'https://'+domain+'/api/geospatial/'+smallAreaLayer_id+'?method=export&format=GeoJSON'
with urlopen(GeoJSONURL) as response:
    smallAreas = json.load(response)

In [37]:
smallAreas

{'features': [{'geometry': {'coordinates': [[[[144.936867787351,
        -37.788837515833784],
       [144.93666537074685, -37.78951702007336],
       [144.93660651554083, -37.78951079872648],
       [144.93655140685004, -37.789616435261564],
       [144.93663321947957, -37.78962508334325],
       [144.93643929186538, -37.79027604924282],
       [144.93638656745185, -37.79045307530191],
       [144.93622349395974, -37.79043588954605],
       [144.93618620399278, -37.7905826749945],
       [144.9359684320301, -37.79174271357113],
       [144.9360537714767, -37.79175078721042],
       [144.9360210797648, -37.791931209979474],
       [144.93593380262564, -37.79192309604553],
       [144.9358339891002, -37.79245547178095],
       [144.93581057539407, -37.79257858972242],
       [144.93590225105388, -37.79258711259232],
       [144.93578100297566, -37.7932396786842],
       [144.93568269621238, -37.7932306661658],
       [144.9354132704244, -37.79473283389455],
       [144.9353508547406, -3

In [41]:
range_max = totalSeatBySArea['number_of_seats'].max()

fig = px.choropleth_mapbox(totalSeatBySArea, geojson=smallAreas, locations='clue_area', color='number_of_seats',
                           color_continuous_scale= "Viridis", 
                          #  range_color=(0, range_max),
                           featureidkey="properties.featurenam",
                           mapbox_style="stamen-toner", #"carto-positron",
                           zoom=12.5,
                           center = {"lat": -37.813, "lon": 144.945},
                           opacity=0.5,
                           hover_name='clue_area',
                           hover_data={'number_of_seats':True},
                           labels={'number_of_seats':'Number of Seats','block_id':'CLUE Block Id'},
                           title='Seating Density by CLUE Block Id for 2020',
                           width=950, height=800
                          )
fig.show()