In [3]:
%env SODAPY_APPTOKEN = HA1B7dCJquyL8zRLXZ2YlWvNQ
!pip install sodapy
!pip install -U plotly



In [1]:
import os
import pandas as pd
from sodapy import Socrata
import plotly.express as px
from urllib.request import urlopen
import json

##Loading data

In [4]:
#Fetching dataset
dataset_id = "vesm-c7r2"

apptoken = os.environ.get("SODAPY_APPTOKEN") # Anonymous app token
domain = "data.melbourne.vic.gov.au"
client = Socrata(domain, apptoken) # Open Dataset connection

dataresource = client.get_all(dataset_id)

In [5]:
dataset = pd.DataFrame(dataresource)
print(f'The shape of dataset is {dataset.shape}.')
print('Below are the first 3 rows of this dataset:')
dataset.head(3)

The shape of dataset is (20036, 11).
Below are the first 3 rows of this dataset:


Unnamed: 0,clue_small_area,location,y_coordinate,census_year,anzsic4_code,x_coordinate,block_id,anzsic4_description,property_id,bps_base_id,trading_name
0,Melbourne (CBD),"{'latitude': '-37.82121122', 'needs_recoding':...",-37.82121122,2020,0,144.9568736,1,Vacant Space,108843,108843,62 Rebecca Walk MELBOURNE VIC 3000
1,Melbourne (CBD),"{'latitude': '-37.82121122', 'needs_recoding':...",-37.82121122,2020,9511,144.9568736,1,Hairdressing and Beauty Services,108843,108843,14 Rebecca Walk MELBOURNE VIC 3000
2,Melbourne (CBD),"{'latitude': '-37.82121122', 'needs_recoding':...",-37.82121122,2020,0,144.9568736,1,Vacant Space,108843,108843,86 Rebecca Walk MELBOURNE VIC 3000


##Preprocessing data

Check data types:

In [6]:
dataset.dtypes

clue_small_area        object
location               object
y_coordinate           object
census_year            object
anzsic4_code           object
x_coordinate           object
block_id               object
anzsic4_description    object
property_id            object
bps_base_id            object
trading_name           object
dtype: object

In [6]:
dataset[['census_year', 'anzsic4_code','block_id']] = dataset[['census_year', 'anzsic4_code','block_id']].astype(int)
dataset[['x_coordinate', 'y_coordinate']] = dataset[['x_coordinate', 'y_coordinate']].astype(float)
dataset = dataset.convert_dtypes() # convert remaining to string
dataset.dtypes

clue_small_area         string
location                object
y_coordinate           float64
census_year              Int64
anzsic4_code             Int64
x_coordinate           float64
block_id                 Int64
anzsic4_description     string
property_id             string
bps_base_id             string
trading_name            string
dtype: object

Check for null values:

In [7]:
print(dataset.isnull().sum())

clue_small_area         0
location               29
y_coordinate           29
census_year             0
anzsic4_code            0
x_coordinate           29
block_id                0
anzsic4_description     0
property_id             0
bps_base_id             0
trading_name            1
dtype: int64


In [8]:
dataset[dataset['x_coordinate'].isnull()]

Unnamed: 0,clue_small_area,location,y_coordinate,census_year,anzsic4_code,x_coordinate,block_id,anzsic4_description,property_id,bps_base_id,trading_name
92,Melbourne (CBD),,,2020,4512,,5,Takeaway Food Services,101345,101345,"Kiosk 12, Campbell Arcade MELBOURNE VIC 3000"
93,Melbourne (CBD),,,2020,0,,5,Vacant Space,101345,101345,"Shop 3-4, Campbell Arcade MELBOURNE VIC 3000"
94,Melbourne (CBD),,,2020,0,,5,Vacant Space,101345,101345,"Shop 8A, Campbell Arcade MELBOURNE VIC 3000"
95,Melbourne (CBD),,,2020,0,,5,Vacant Space,101345,101345,"Shop 5, Campbell Arcade MELBOURNE VIC 3000"
96,Melbourne (CBD),,,2020,0,,5,Vacant Space,101345,101345,"Shop 9, Campbell Arcade MELBOURNE VIC 3000"
97,Melbourne (CBD),,,2020,0,,5,Vacant Space,101345,101345,"Shop 8, Campbell Arcade MELBOURNE VIC 3000"
98,Melbourne (CBD),,,2020,0,,5,Vacant Space,101345,101345,"Shop 11, Campbell Arcade MELBOURNE VIC 3000"
99,Melbourne (CBD),,,2020,5910,,5,Internet Service Providers and Web Search Portals,101345,101345,"Shop 6-7, Campbell Arcade MELBOURNE VIC 3000"
100,Melbourne (CBD),,,2020,4244,,5,Newspaper and Book Retailing,101345,101345,"Shop 10, Campbell Arcade MELBOURNE VIC 3000"
101,Melbourne (CBD),,,2020,4242,,5,Entertainment Media Retailing,101345,101345,"Shop 1, Campbell Arcade MELBOURNE VIC 3000"


In [7]:
dataset = dataset.dropna(axis=0)
print(dataset.isnull().sum())

clue_small_area        0
location               0
y_coordinate           0
census_year            0
anzsic4_code           0
x_coordinate           0
block_id               0
anzsic4_description    0
property_id            0
bps_base_id            0
trading_name           0
dtype: int64


## Analysis and visualisation

In [17]:
dataset.groupby(groupbyfields, as_index=False).agg(aggregatebyfields)

Unnamed: 0_level_0,clue_small_area,block_id,y_coordinate,x_coordinate,anzsic4_code
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,count
0,Carlton,201,-37.79436683,144.9662277,1
1,Carlton,201,-37.79467733,144.965947,1
2,Carlton,202,-37.79457313,144.9652988,1
3,Carlton,203,-37.7958311,144.9659805,1
4,Carlton,203,-37.79594582,144.9652129,1
...,...,...,...,...,...
4472,West Melbourne (Residential),91,-37.81258618,144.9520754,2
4473,West Melbourne (Residential),91,-37.81277865,144.9519446,1
4474,West Melbourne (Residential),91,-37.81282881,144.9517686,1
4475,West Melbourne (Residential),91,-37.81288005,144.9515889,3


In [31]:
groupbyfields = ['clue_small_area','block_id','y_coordinate','x_coordinate']
aggregatebyfields = {'anzsic4_code': ["count"]}

businessesByLocn = pd.DataFrame(dataset.groupby(groupbyfields, as_index=False).agg(aggregatebyfields))
businessesByLocn.head(10)

Unnamed: 0_level_0,clue_small_area,block_id,y_coordinate,x_coordinate,anzsic4_code
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,count
0,Carlton,201,-37.79436683,144.9662277,1
1,Carlton,201,-37.79467733,144.965947,1
2,Carlton,202,-37.79457313,144.9652988,1
3,Carlton,203,-37.7958311,144.9659805,1
4,Carlton,203,-37.79594582,144.9652129,1
5,Carlton,203,-37.79601015,144.9657585,1
6,Carlton,203,-37.79606949,144.9651384,1
7,Carlton,203,-37.79614741,144.9653038,1
8,Carlton,203,-37.79630735,144.9652805,1
9,Carlton,203,-37.79668018,144.9649,2


Business Establishment Locations on Map:

In [37]:
fig = px.scatter_mapbox(businessesByLocn, lat="y_coordinate", lon="x_coordinate",
                        hover_name="clue_small_area",
                        hover_data=["clue_small_area", "block_id"],
                        title='Business Establishments by Location for 2020',
                        zoom=12.5,
                        center = {"lat": -37.813, "lon": 144.945},
                        width=950, height=800)
fig.update_layout(mapbox_style="open-street-map")
fig.show()

Business Establishment by Small Area:

In [38]:
groupbyfields = ['block_id','clue_small_area','anzsic4_description']
aggregatebyfields = {'anzsic4_code': ["count"]}

businessesByBlock = pd.DataFrame(dataset.groupby(groupbyfields, as_index=False).agg(aggregatebyfields))
businessesByBlock.columns = businessesByBlock.columns.map(''.join) # flatten column header
businessesByBlock.rename(columns={'clue_small_area': 'clue_area'}, inplace=True) #rename to match GeoJSON extract
businessesByBlock.rename(columns={'anzsic4_codecount': 'business_count'}, inplace=True) #rename to match GeoJSON extract
businessesByBlock.head(10)

Unnamed: 0,block_id,clue_area,anzsic4_description,business_count
0,1,Melbourne (CBD),Air and Space Transport,1
1,1,Melbourne (CBD),Architectural Services,1
2,1,Melbourne (CBD),Cafes and Restaurants,2
3,1,Melbourne (CBD),Computer System Design and Related Services,1
4,1,Melbourne (CBD),Convenience Store,1
5,1,Melbourne (CBD),Credit Reporting and Debt Collection Services,1
6,1,Melbourne (CBD),Electricity Distribution,1
7,1,Melbourne (CBD),General Insurance,1
8,1,Melbourne (CBD),Hairdressing and Beauty Services,1
9,1,Melbourne (CBD),Liquor Retailing,1


In [39]:
with urlopen('https://data.melbourne.vic.gov.au/api/geospatial/aia8-ryiq?method=export&format=GeoJSON') as response:
    block = json.load(response)

In [43]:
range_max = businessesByBlock['business_count'].max()

fig = px.choropleth_mapbox(businessesByBlock, geojson=block, locations='block_id', color='business_count',
                           color_continuous_scale=["white", "#4444FF", "blue", "darkblue", "#000044"],
                           range_color=(0, 75),
                           featureidkey="properties.block_id",
                           mapbox_style="open-street-map", #"carto-positron",
                           zoom=12.5,
                           center = {"lat": -37.813, "lon": 144.945},
                           opacity=0.5,
                           hover_name='clue_area',
                           hover_data={'block_id':True,'business_count':True},
                           labels={'business_count':'Number of Businesses','block_id':'CLUE Block Id'},
                           title='Business Establishments by CLUE Block Id for 2020',
                           width=950, height=800
                          )
fig.show()