### Reading in data and assessing what sort of information we can get from it

In [34]:
# !pip install sodapy

In [35]:
import warnings
warnings.filterwarnings('ignore')

### Imports

In [36]:
# For importing the data and using API
from sodapy import Socrata
import os
import zipfile as zf
import requests
from io import BytesIO

# Working with the data
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon
#from sklearn.preprocessing import LabelEncoder
import json

# Visualisation
import matplotlib.pylab as plt
import folium
from folium import plugins
import seaborn as sns
import branca.colormap as cm

#### COM Population Dataset
https://data.melbourne.vic.gov.au/resource/sp4r-xphj.json

In [37]:
domain = "data.melbourne.vic.gov.au"
data_file = 'sp4r-xphj'

In [38]:
apptoken = os.environ.get("SODAPY_APPTOKEN") # Anonymous app token
client = Socrata(domain, apptoken) 



##### View the Population dataset head sample

In [39]:
population_data = pd.DataFrame.from_dict(client.get_all(data_file))
print(population_data.shape)
population_data.head()

(16989, 5)


Unnamed: 0,geography,year,gender,age,value
0,City of Melbourne,2020,Female,Age 0-4,2683
1,City of Melbourne,2021,Female,Age 0-4,2945
2,City of Melbourne,2022,Female,Age 0-4,3212
3,City of Melbourne,2023,Female,Age 0-4,3515
4,City of Melbourne,2024,Female,Age 0-4,3833


In [40]:
# Taking the 2022 population data to test on the mapping modules. 

pop_data_summary = population_data[['geography', 'year', 'value']]
pop_data_summary_22 = pop_data_summary[pop_data_summary['year'] == '2022']
pop_data_summary_22['value'] = pop_data_summary_22['value'].astype(float)
pop_data_summary_22 = pd.DataFrame(pop_data_summary_22.groupby('geography')['value'].sum())
pop_data_summary_22 = pop_data_summary_22.reset_index()
print(pop_data_summary_22.shape)
pop_data_summary_22

# Very much limited to the inner suburbs - Maybe best to fucus on bike pathwasy only?? 

(14, 2)


Unnamed: 0,geography,value
0,Carlton,68646.64
1,City of Melbourne,570577.87
2,Docklands,49629.55
3,East Melbourne,19080.77
4,Kensington,34963.64
5,Melbourne (CBD),171567.26
6,Melbourne (Remainder),10905.49
7,North Melbourne,52874.8
8,Parkville,31820.68
9,Port Melbourne,144.74


In [48]:
# Importing victorian geo data from json file.
url = (
    'https://data.gov.au/geoserver/vic-suburb-locality-boundaries-psma-administrative-boundaries/'
    + 'wfs?request=GetFeature&typeName=ckan_af33dd8c_0534_4e18_9245_fc64440f742e&outputFormat=json')
vic_suburbs = gpd.read_file(url)
vic_suburbs = gpd.GeoDataFrame(vic_suburbs)
vic_suburbs_reduced = vic_suburbs[['lc_ply_pid', 'vic_loca_2', 'geometry']]
vic_suburbs_reduced.head()

Unnamed: 0,lc_ply_pid,vic_loca_2,geometry
0,6670,UNDERBOOL,"MULTIPOLYGON (((141.74552 -35.07229, 141.74552..."
1,6671,NURRAN,"MULTIPOLYGON (((148.66877 -37.39571, 148.66876..."
2,6672,WOORNDOO,"MULTIPOLYGON (((142.92288 -37.97886, 142.90449..."
3,6673,DEPTFORD,"MULTIPOLYGON (((147.82336 -37.66001, 147.82313..."
4,6674,YANAC,"MULTIPOLYGON (((141.27978 -35.99859, 141.27989..."


In [42]:
# Looking for suburb matches between the datasets - we will need to group parts of inner Melbourne together.

target_locals = pd.DataFrame(pop_data_summary_22['geography'].str.upper())
mel_suburbs = pd.DataFrame(vic_suburbs_reduced.loc[vic_suburbs_reduced['vic_loca_2'].str.contains('MELBOURNE')])

for local in target_locals['geography']:
    if vic_suburbs_reduced['vic_loca_2'].eq(local).any:
        mel_suburbs = mel_suburbs.append(vic_suburbs_reduced.loc[vic_suburbs_reduced['vic_loca_2'].str.contains(local)])
        
mel_suburbs.head(20)
## Will come back to fix the warning messages 

Unnamed: 0,lc_ply_pid,vic_loca_2,geometry
503,6893,NORTH MELBOURNE,"MULTIPOLYGON (((144.95599 -37.80588, 144.95360..."
578,10519,MELBOURNE,"MULTIPOLYGON (((144.97797 -37.83867, 144.97803..."
990,7083,WEST MELBOURNE,"MULTIPOLYGON (((144.91648 -37.79826, 144.91754..."
1230,9297,EAST MELBOURNE,"MULTIPOLYGON (((144.97136 -37.80773, 144.97308..."
1525,10077,PORT MELBOURNE,"MULTIPOLYGON (((144.90749 -37.84326, 144.90652..."
2160,9010,SOUTH MELBOURNE,"MULTIPOLYGON (((144.96765 -37.83738, 144.96673..."
2857,10711,MELBOURNE AIRPORT,"MULTIPOLYGON (((144.86534 -37.67073, 144.86572..."
1860,10004,CARLTON NORTH,"MULTIPOLYGON (((144.96004 -37.79107, 144.95993..."
1883,10007,CARLTON,"MULTIPOLYGON (((144.97401 -37.80311, 144.97320..."
381,10421,DOCKLANDS,"MULTIPOLYGON (((144.95376 -37.82363, 144.95336..."


In [45]:
pop_data_summary_22

Unnamed: 0,geography,value
0,Carlton,68646.64
1,City of Melbourne,570577.87
2,Docklands,49629.55
3,East Melbourne,19080.77
4,Kensington,34963.64
5,Melbourne (CBD),171567.26
6,Melbourne (Remainder),10905.49
7,North Melbourne,52874.8
8,Parkville,31820.68
9,Port Melbourne,144.74


In [None]:
# Create base map with Folium: https://python-visualization.github.io/folium/quickstart.html#Getting-Started

map = folium.Map(
    location=[-37.81368709240999, 144.95738102347036],
    #width=500, height=300,
    #tiles='Stamen Toner',
    zoom_start=13,
    min_zoom=10)

map

In [None]:
# Not ready to run, just wanted to see what would happen.   

folium.Choropleth(
    geo_data = vic_suburbs,
    name = 'choropleth',
    data = pop_data_summary_22,
    columns = ['geography', 'value'],
    key_on='feature.id',
    fill_color = 'YlGn',
    fill_opacity = 0.7,
    line_opacity = 0.2,
    legend_name = 'Population Density',
).add_to(map)

folium.LayerControl().add_to(map)
map

KeyboardInterrupt: 

#### VicRoads Traffic Dataset

In [None]:
traffic_url = 'https://vicroadsopendata-vicroadsmaps.opendata.arcgis.com/datasets/5512df2ff41e4941bacf868053dbfba9_0.csv?outSR=%7B%22latestWkid%22%3A3111%2C%22wkid%22%3A102171%7D'

In [None]:
traffic_data = pd.read_csv(traffic_url)

In [None]:
traffic_data.head()

Unnamed: 0,OBJECTID_1,OBJECTID,TIS_ID,HMGNS_FLOW_ID,HMGNS_LNK_ID,HMGNS_LNK_DESC,LGA_SHORT_NM,RGN_LONG_NM,ROAD_NBR,DECLARED_ROAD_NM,...,TWO_WAY_AADT_TRUCKS,ALLVEH_AMPEAK_AADT,ALLVEH_PMPEAK_AADT,GROWTH_RATE,CI,AM_PEAK_SPEED,OFF_PEAK_SPEED,PM_PEAK_SPEED,YR,LABEL
0,1,743,14915,14915,2006,MARYSVILLE-WOODS POINT ROAD btwn LAKE MOUNTAI...,YARRA RANGES,METROPOLITAN SOUTH EAST REGION,4961,MARYSVILLE-WOODS POINT ROAD,...,0.0,,,0.013,0.005,,,,2020,24* (13% 3*) EAST BOUND
1,2,650,14140,14140,8786,STEELS CREEK ROAD btwn WILLOWBEND DRIVE & ELT...,YARRA RANGES,METROPOLITAN SOUTH EAST REGION,9999,Not Applicable,...,40.0,,,0.019,0.002,,,,2020,373* (6% 22*) NORTH BOUND
2,3,701,12113,12113,6035,LATROBE ROAD btwn TANJIL EAST ROAD & GORDON S...,LATROBE,EASTERN REGION,5911,MORWELL-YALLOURN NORTH ROAD,...,160.0,,,0.015,0.009,,,,2020,"1,100* (6% 61*) NORTH BOUND"
3,4,702,12897,12897,7079,CASTERTON ROAD btwn GLENELG HIGHWAY & COLERAI...,SOUTHERN GRAMPIANS,SOUTH WESTERN REGION,2670,GLENELG HIGHWAY,...,340.0,,,0.02,0.001,,,,2020,801* (21% 165*) WEST BOUND
4,5,703,9893,9893,3475,HUTTON ROAD btwn CHAPEL ROAD & GREENS ROAD,DANDENONG,METROPOLITAN SOUTH EAST REGION,5168,BRAESIDE-DANDENONG ROAD,...,1500.0,1000.0,1100.0,0.003,0.002,,,,2020,"12,000 (6% 744*) WEST BOUND"


#### VicRoads Transportation Accidents Dataset

In [None]:
# This url is for a zip file which contains multiple csv files
crash_url = 'https://vicroadsopendatastorehouse.vicroads.vic.gov.au/opendata/Road_Safety/ACCIDENT.zip'

In [None]:
crash_request = requests.get(crash_url)

Read the zip file and view filename contents

In [None]:
crash_files = zf.ZipFile(BytesIO(crash_request.content))
print(crash_files.namelist())

['ACCIDENT.csv', 'ACCIDENT_CHAINAGE.csv', 'ACCIDENT_EVENT.csv', 'ACCIDENT_LOCATION.csv', 'ATMOSPHERIC_COND.csv', 'NODE.csv', 'NODE_ID_COMPLEX_INT_ID.csv', 'PERSON.csv', 'ROAD_SURFACE_COND.csv', 'Statistic Checks.csv', 'SUBDCA.csv', 'VEHICLE.csv']


Save the 'ACCIDENT.csv' file to a temp folder and load into a dataframe

In [None]:
accident = crash_files.extract('ACCIDENT.csv', 'temp')
accident = pd.read_csv(accident, low_memory=False)
accident.head()

Unnamed: 0,ACCIDENT_NO,ACCIDENTDATE,ACCIDENTTIME,ACCIDENT_TYPE,Accident Type Desc,DAY_OF_WEEK,Day Week Description,DCA_CODE,DCA Description,DIRECTORY,...,NO_PERSONS,NO_PERSONS_INJ_2,NO_PERSONS_INJ_3,NO_PERSONS_KILLED,NO_PERSONS_NOT_INJ,POLICE_ATTEND,ROAD_GEOMETRY,Road Geometry Desc,SEVERITY,SPEED_ZONE
0,T20060000010,13/01/2006,12:42:00,1,Collision with vehicle,6,Friday,113,RIGHT NEAR (INTERSECTIONS ONLY),MEL,...,6,0,1,0,5,1,1,Cross intersection,3,60
1,T20060000018,13/01/2006,19:10:00,1,Collision with vehicle,6,Friday,113,RIGHT NEAR (INTERSECTIONS ONLY),MEL,...,4,0,1,0,3,1,2,T intersection,3,70
2,T20060000022,14/01/2006,12:10:00,7,Fall from or in moving vehicle,7,Saturday,190,FELL IN/FROM VEHICLE,MEL,...,2,1,0,0,1,1,5,Not at intersection,2,100
3,T20060000023,14/01/2006,11:49:00,1,Collision with vehicle,7,Saturday,130,REAR END(VEHICLES IN SAME LANE),MEL,...,2,1,0,0,1,1,2,T intersection,2,80
4,T20060000026,14/01/2006,10:45:00,1,Collision with vehicle,7,Saturday,121,RIGHT THROUGH,MEL,...,3,0,3,0,0,1,5,Not at intersection,3,50
