# External Data Downloader

This notebook helps to download all needed the external data sets.

In [2]:
import os
import urllib
from urllib.request import urlretrieve
import zipfile
import requests
import tempfile

In [3]:
# Set the working directory
output_dir = f"../data/raw/"

### 1 - School Location

In [4]:
# URL for the school data
URL_school = "http://www.education.vic.gov.au/Documents/about/research/datavic/"

# 2018-2019 school data
URL_school_18_19 = f"{URL_school}dv279-schoollocations2019"

# Generate output location and filename
output = f"{output_dir}school_location_18_19.csv"

# Urlretrieve the file
urlretrieve(f'{URL_school_18_19}.csv', output) 

('../data/raw/school_location_18_19.csv',
 <http.client.HTTPMessage at 0x2c2499c7550>)

In [5]:
# 2020 school data
URL_school_20 = f"{URL_school}dv296-schoollocations2020"

# generate output location and filename
output = f"{output_dir}school_location_20.csv"

# Urlretrieve the file
urlretrieve(f'{URL_school_20}.csv', output) 

('../data/raw/school_location_20.csv',
 <http.client.HTTPMessage at 0x2c2499c7820>)

In [6]:
# 2021 school data
URL_school_21 = f"{URL_school}dv309_schoollocations2021"

# Generate output location and filename
output = f"{output_dir}school_location_21.csv"

# Urlretrieve the file
urlretrieve(f'{URL_school_21}.csv', output) 

('../data/raw/school_location_21.csv',
 <http.client.HTTPMessage at 0x2c2499c7460>)

In [7]:
# 2022 school data
URL_school_22 = f"{URL_school}dv331_schoollocations2022"

# Generate output location and filename
output = f"{output_dir}school_location_22.csv"

# download
urlretrieve(f'{URL_school_22}.csv', output) 

('../data/raw/school_location_22.csv',
 <http.client.HTTPMessage at 0x2c2499c4160>)

### 2 - Population data

In [8]:
# URL of data of population
URL_population = "https://www.abs.gov.au/statistics/people/population/regional-population/2021/32180DS0001_2001-21"

# Generate output location and filename
output = f"{output_dir}population.xlsx"

# Urlretrieve the file
urlretrieve(f'{URL_population}.xlsx', output) 

('../data/raw/population.xlsx', <http.client.HTTPMessage at 0x2c2499c77f0>)

### 3 - Train station

In [9]:
# URL
URL_train = "https://vicroadsopendatastorehouse.vicroads.vic.gov.au/opendata/Public_Transport/Patronage/"

# 2018-2019 train data
URL_train_18_19 = f"{URL_train}Annual%20regional%20train%20station%20entries%202018-19"

# Generate output location and filename
output = f"{output_dir}train_station_18to19.xlsx"

# urllib request
opener=urllib.request.build_opener()
opener.addheaders=[('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 \
    (KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36')]
urllib.request.install_opener(opener)

# Urlretrieve the file
urllib.request.urlretrieve(f'{URL_train_18_19}.xlsx', output)

('../data/raw/train_station_18to19.xlsx',
 <http.client.HTTPMessage at 0x2c24a161540>)

In [10]:
# 2019-2020 train data
URL_train_19_20 = f"{URL_train}Annual%20regional%20train%20station%20entries%202019-20"

# Generate output location and filename
output = f"{output_dir}train_station_19to20.xlsx"

# urllib request
opener=urllib.request.build_opener()
opener.addheaders=[('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 \
    (KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36')]
urllib.request.install_opener(opener)

# Urlretrieve the file
urllib.request.urlretrieve(f'{URL_train_19_20}.xlsx', output)


('../data/raw/train_station_19to20.xlsx',
 <http.client.HTTPMessage at 0x2c249a4c280>)

In [11]:
# 2020-2021 train data
URL_train_20_21 = f"{URL_train}Annual%20regional%20train%20station%20entries%202020-21"

# Generate output location and filename
output = f"{output_dir}train_station_20to21.xlsx"

# urllib request
opener=urllib.request.build_opener()
opener.addheaders=[('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 \
    (KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36')]
urllib.request.install_opener(opener)

# Urlretrieve the file
urllib.request.urlretrieve(f'{URL_train_20_21}.xlsx', output)

('../data/raw/train_station_20to21.xlsx',
 <http.client.HTTPMessage at 0x2c249a4c550>)

### 4 - Census Data

In [12]:
# URL for Census data in Victoria 2021
URL_census_2021 = 'https://www.abs.gov.au/census/find-census-data/datapacks/download/2021_GCP_all_for_VIC_short-header.zip'
filename = f'{output_dir}census_2021.zip'

# Download the zip file
urlretrieve(URL_census_2021, filename) 

('../data/raw/census_2021.zip', <http.client.HTTPMessage at 0x2c2499c7d60>)

In [13]:
census2021 = zipfile.ZipFile(f'{output_dir}census_2021.zip')

for file in census2021.namelist():
    if file.startswith("2021 Census GCP All Geographies for VIC/POA/"):
        census2021.extract(file, f'{output_dir}/POA')

### 5 - Suburb Past Rental data (2000-2022)

In [14]:
# URl of rental data (2002-2020)
URL_rental = "https://www.dffh.vic.gov.au/sites/default/files/documents/202209/Moving%20annual%20rent%20by%20suburb%20-%20June%20quarter%202022.xlsx"

# Generate output location and filename
output = f"{output_dir}past_rental00_22.xlsx"

# Urlretrieve the file
urlretrieve(f'{URL_rental}', output) 

('../data/raw/past_rental00_22.xlsx',
 <http.client.HTTPMessage at 0x2c2499c76a0>)

### 6 - Parks

In [15]:
def get_data():
    url = "https://s3-ap-southeast-2.amazonaws.com/cl-isd-prd-datashare-s3-delivery/Order_AEHQI8.zip"
    response = requests.get(url)
    return url, response.content
 
 
if __name__ == '__main__':
    url, data = get_data()
 
    _tmp_file = tempfile.TemporaryFile()
    print(_tmp_file)
 
    _tmp_file.write(data)
    # _tmp_file.seek(0)
 
    zf = zipfile.ZipFile(_tmp_file, mode='r')
    for names in zf.namelist():
        f = zf.extract(names, '../data/raw')
        print(f)
 
    zf.close()

<tempfile._TemporaryFileWrapper object at 0x000002C2499C7CA0>
..\data\raw\ANZVI0803004883.html
..\data\raw\Creative Commons Licence.html
..\data\raw\ll_gda2020\esrishape\whole_of_dataset\victoria\CROWNLAND\PARKRES.cpg
..\data\raw\ll_gda2020\esrishape\whole_of_dataset\victoria\CROWNLAND\PARKRES.dbf
..\data\raw\ll_gda2020\esrishape\whole_of_dataset\victoria\CROWNLAND\PARKRES.prj
..\data\raw\ll_gda2020\esrishape\whole_of_dataset\victoria\CROWNLAND\PARKRES.shp
..\data\raw\ll_gda2020\esrishape\whole_of_dataset\victoria\CROWNLAND\PARKRES.shx
..\data\raw\ll_gda2020\esrishape\whole_of_dataset\victoria\CROWNLAND\PARKRES_column_names.txt


### 7 - Yearly Income

In [16]:
# URl of yearly income in AUS (2014-2019)
URL_yearly_income = 'https://www.abs.gov.au/statistics/labour/earnings-and-working-conditions/personal-income-australia/2014-15-2018-19/6524055002_DO001.xlsx'

# Generate output location and filename
output = f"{output_dir}Total_income.xlsx"

# Urlretrieve the file
urlretrieve(f'{URL_yearly_income}', output) 

('../data/raw/Total_income.xlsx', <http.client.HTTPMessage at 0x2c249351270>)

### 8 - LGA Past Rental data (2000-2021)

In [17]:
# URl of past rental dataset (2000-2021)
URL_rent = 'https://www.dhhs.vic.gov.au/quarterly-median-rents-local-government-area-march-quarter-2021-excel'

# Generate output location and filename
output = f"{output_dir}external_rent.xlsx"

# Urlretrieve the file
urlretrieve(f'{URL_rent}', output) 

('../data/raw/external_rent.xlsx', <http.client.HTTPMessage at 0x2c24a161360>)

### 9 - VIC map

If you meet problem about downloading these zip, try "chmod 700 filename".

In [18]:
# download suburb map
import urllib.request
import zipfile
filename='../data/raw/suburb_map.zip'
urllib.request.urlretrieve("https://www.abs.gov.au/statistics/standards/australian-statistical-geography-standard-asgs-edition-3/jul2021-jun2026/access-and-downloads/digital-boundary-files/SAL_2021_AUST_GDA2020_SHP.zip", filename)

# unzip the file
with zipfile.ZipFile(filename,'r') as zip_ref:
    zip_ref.extractall("../data/raw/")

In [19]:
# download postcode map
import urllib.request
import zipfile
filename='../data/raw/population_map.zip'
urllib.request.urlretrieve("https://www.abs.gov.au/statistics/standards/australian-statistical-geography-standard-asgs-edition-3/jul2021-jun2026/access-and-downloads/digital-boundary-files/POA_2021_AUST_GDA2020_SHP.zip", filename)

# unzip the file
with zipfile.ZipFile(filename,'r') as zip_ref:
    zip_ref.extractall("../data/raw/")

In [20]:
# download SA2 map
import urllib.request
import zipfile
filename='../data/raw/SA2_map.zip'
urllib.request.urlretrieve("https://www.abs.gov.au/statistics/standards/australian-statistical-geography-standard-asgs-edition-3/jul2021-jun2026/access-and-downloads/digital-boundary-files/SA2_2021_AUST_SHP_GDA2020.zip", filename)

# unzip the file
with zipfile.ZipFile(filename,'r') as zip_ref:
    zip_ref.extractall("../data/raw/")