### Reading in data and assessing what sort of information we can get from it

In [1]:
# !pip install sodapy

### Imports

In [2]:
import numpy as np
import pandas as pd
from sodapy import Socrata
import os

import zipfile as zf
import requests
from io import BytesIO

#### COM Population Dataset
https://data.melbourne.vic.gov.au/resource/sp4r-xphj.json

In [3]:
domain = "data.melbourne.vic.gov.au"
data_file = 'sp4r-xphj'

In [4]:
apptoken = os.environ.get("SODAPY_APPTOKEN") # Anonymous app token
client = Socrata(domain, apptoken) 



##### View the Population dataset head sample

In [5]:
population_data = pd.DataFrame.from_dict(client.get_all(data_file))
population_data.head()

Unnamed: 0,geography,year,gender,age,value
0,City of Melbourne,2020,Female,Age 0-4,2683
1,City of Melbourne,2021,Female,Age 0-4,2945
2,City of Melbourne,2022,Female,Age 0-4,3212
3,City of Melbourne,2023,Female,Age 0-4,3515
4,City of Melbourne,2024,Female,Age 0-4,3833


#### VicRoads Traffic Dataset

In [6]:
traffic_url = 'https://vicroadsopendata-vicroadsmaps.opendata.arcgis.com/datasets/5512df2ff41e4941bacf868053dbfba9_0.csv?outSR=%7B%22latestWkid%22%3A3111%2C%22wkid%22%3A102171%7D'

In [7]:
traffic_data = pd.read_csv(traffic_url)

In [8]:
traffic_data.head()

Unnamed: 0,OBJECTID_1,OBJECTID,TIS_ID,HMGNS_FLOW_ID,HMGNS_LNK_ID,HMGNS_LNK_DESC,LGA_SHORT_NM,RGN_LONG_NM,ROAD_NBR,DECLARED_ROAD_NM,...,TWO_WAY_AADT_TRUCKS,ALLVEH_AMPEAK_AADT,ALLVEH_PMPEAK_AADT,GROWTH_RATE,CI,AM_PEAK_SPEED,OFF_PEAK_SPEED,PM_PEAK_SPEED,YR,LABEL
0,1,743,14915,14915,2006,MARYSVILLE-WOODS POINT ROAD btwn LAKE MOUNTAI...,YARRA RANGES,METROPOLITAN SOUTH EAST REGION,4961,MARYSVILLE-WOODS POINT ROAD,...,0.0,,,0.013,0.005,,,,2020,24* (13% 3*) EAST BOUND
1,2,650,14140,14140,8786,STEELS CREEK ROAD btwn WILLOWBEND DRIVE & ELT...,YARRA RANGES,METROPOLITAN SOUTH EAST REGION,9999,Not Applicable,...,40.0,,,0.019,0.002,,,,2020,373* (6% 22*) NORTH BOUND
2,3,701,12113,12113,6035,LATROBE ROAD btwn TANJIL EAST ROAD & GORDON S...,LATROBE,EASTERN REGION,5911,MORWELL-YALLOURN NORTH ROAD,...,160.0,,,0.015,0.009,,,,2020,"1,100* (6% 61*) NORTH BOUND"
3,4,702,12897,12897,7079,CASTERTON ROAD btwn GLENELG HIGHWAY & COLERAI...,SOUTHERN GRAMPIANS,SOUTH WESTERN REGION,2670,GLENELG HIGHWAY,...,340.0,,,0.02,0.001,,,,2020,801* (21% 165*) WEST BOUND
4,5,703,9893,9893,3475,HUTTON ROAD btwn CHAPEL ROAD & GREENS ROAD,DANDENONG,METROPOLITAN SOUTH EAST REGION,5168,BRAESIDE-DANDENONG ROAD,...,1500.0,1000.0,1100.0,0.003,0.002,,,,2020,"12,000 (6% 744*) WEST BOUND"


#### VicRoads Transportation Accidents Dataset

In [9]:
# This url is for a zip file which contains multiple csv files
crash_url = 'https://vicroadsopendatastorehouse.vicroads.vic.gov.au/opendata/Road_Safety/ACCIDENT.zip'

In [10]:
crash_request = requests.get(crash_url)

Read the zip file and view filename contents

In [11]:
crash_files = zf.ZipFile(BytesIO(crash_request.content))
print(crash_files.namelist())

['ACCIDENT.csv', 'ACCIDENT_CHAINAGE.csv', 'ACCIDENT_EVENT.csv', 'ACCIDENT_LOCATION.csv', 'ATMOSPHERIC_COND.csv', 'NODE.csv', 'NODE_ID_COMPLEX_INT_ID.csv', 'PERSON.csv', 'ROAD_SURFACE_COND.csv', 'Statistic Checks.csv', 'SUBDCA.csv', 'VEHICLE.csv']


Save the 'ACCIDENT.csv' file to a temp folder and load into a dataframe

In [12]:
accident = crash_files.extract('ACCIDENT.csv', 'temp')
accident = pd.read_csv(accident, low_memory=False)
accident.head()

Unnamed: 0,ACCIDENT_NO,ACCIDENTDATE,ACCIDENTTIME,ACCIDENT_TYPE,Accident Type Desc,DAY_OF_WEEK,Day Week Description,DCA_CODE,DCA Description,DIRECTORY,...,NO_PERSONS,NO_PERSONS_INJ_2,NO_PERSONS_INJ_3,NO_PERSONS_KILLED,NO_PERSONS_NOT_INJ,POLICE_ATTEND,ROAD_GEOMETRY,Road Geometry Desc,SEVERITY,SPEED_ZONE
0,T20060000010,13/01/2006,12:42:00,1,Collision with vehicle,6,Friday,113,RIGHT NEAR (INTERSECTIONS ONLY),MEL,...,6,0,1,0,5,1,1,Cross intersection,3,60
1,T20060000018,13/01/2006,19:10:00,1,Collision with vehicle,6,Friday,113,RIGHT NEAR (INTERSECTIONS ONLY),MEL,...,4,0,1,0,3,1,2,T intersection,3,70
2,T20060000022,14/01/2006,12:10:00,7,Fall from or in moving vehicle,7,Saturday,190,FELL IN/FROM VEHICLE,MEL,...,2,1,0,0,1,1,5,Not at intersection,2,100
3,T20060000023,14/01/2006,11:49:00,1,Collision with vehicle,7,Saturday,130,REAR END(VEHICLES IN SAME LANE),MEL,...,2,1,0,0,1,1,2,T intersection,2,80
4,T20060000026,14/01/2006,10:45:00,1,Collision with vehicle,7,Saturday,121,RIGHT THROUGH,MEL,...,3,0,3,0,0,1,5,Not at intersection,3,50
