This notebook download all external dataset used for this project

1. VIC school location data 2020 - 2023
2. VIC park & reserve location data (latest)
3. VIC train station location data 2023 (latest)
4. VIC population data 2021 (latest)
5. VIC annual income by region data 2016 - 2020 (latest)

In [1]:
from pathlib import Path
import requests
import zipfile
import os

In [2]:
# Creat external data directory
data_folder = Path.cwd().parent / 'data'
school_dir = data_folder / 'raw' / 'external' / 'school'
park_dir = data_folder / 'raw' / 'external' / 'park'
train_station_dir = data_folder / 'raw' / 'external' / 'train_station'
population_dir = data_folder / 'raw' / 'external' / 'population'
income_dir = data_folder / 'raw' / 'external' / 'income'



school_dir.mkdir(parents=True, exist_ok=True)
park_dir.mkdir(parents=True, exist_ok=True)
train_station_dir.mkdir(parents=True, exist_ok=True)
population_dir.mkdir(parents=True, exist_ok=True)
income_dir.mkdir(parents=True, exist_ok=True)




1. Download VIC school location data 2020 - 2023

In [3]:
# Download school location data 2020-2023

files = [
    ("https://www.education.vic.gov.au/Documents/about/research/datavic/dv296-schoollocations2020.csv", "school_location_2020.csv", school_dir / "school_location_2020.csv"),
    ("https://www.education.vic.gov.au/Documents/about/research/datavic/dv309_schoollocations2021.csv", "school_location_2021.csv", school_dir / "school_location_2021.csv"),
    ("https://www.education.vic.gov.au/Documents/about/research/datavic/dv331_schoollocations2022.csv", "school_location_2022.csv",school_dir / "school_location_2022.csv"),
    ("https://www.education.vic.gov.au/Documents/about/research/datavic/dv346-schoollocations2023.csv", "school_location_2023.csv",school_dir / "school_location_2023.csv")
]


for url, file_name, destination in files:
    response = requests.get(url)
    response.raise_for_status()  
    
    with open(destination, 'wb') as f:
        f.write(response.content)

    print(f"{file_name} downloaded and saved to {destination} ")


school_location_2020.csv downloaded and saved to c:\Users\29557\Documents\GitHub\project-2-group-real-estate-industry-project-7\data\raw\external\school\school_location_2020.csv 
school_location_2021.csv downloaded and saved to c:\Users\29557\Documents\GitHub\project-2-group-real-estate-industry-project-7\data\raw\external\school\school_location_2021.csv 
school_location_2022.csv downloaded and saved to c:\Users\29557\Documents\GitHub\project-2-group-real-estate-industry-project-7\data\raw\external\school\school_location_2022.csv 
school_location_2023.csv downloaded and saved to c:\Users\29557\Documents\GitHub\project-2-group-real-estate-industry-project-7\data\raw\external\school\school_location_2023.csv 


2. Download VIC park & reserve location data

In [4]:
import requests
from pathlib import Path
import zipfile

# Define the paths
zip_folder = park_dir / 'park_reserve'
park_file = 'park_reserve.zip'
park_file_path = park_dir / park_file

# URL of the park reserve zip file
url = "https://s3.ap-southeast-2.amazonaws.com/cl-isd-prd-datashare-s3-delivery/Order_3QOIU3.zip"

# Ensure the directories exist
park_dir.mkdir(parents=True, exist_ok=True)
zip_folder.mkdir(parents=True, exist_ok=True)

# Download the file and save it to the park_dir
response = requests.get(url)
response.raise_for_status()  # Check for successful download

with open(park_file_path, 'wb') as f:
    f.write(response.content)

print(f"{park_file} downloaded and saved to {park_file_path}")

# Unzip the file into the zip_folder
with zipfile.ZipFile(park_file_path, 'r') as zip_ref:
    zip_ref.extractall(zip_folder)

print(f"File unzipped to {zip_folder}")




park_reserve.zip downloaded and saved to c:\Users\29557\Documents\GitHub\project-2-group-real-estate-industry-project-7\data\raw\external\park\park_reserve.zip
File unzipped to c:\Users\29557\Documents\GitHub\project-2-group-real-estate-industry-project-7\data\raw\external\park\park_reserve


3. Download VIC train station location data

In [5]:
files = [

    
    ("https://vicroadsopendatastorehouse.vicroads.vic.gov.au/opendata/Public_Transport/Patronage/Annual_metropolitan_train_station_entries/Annual_Metropolitan_Train_Station_Entries_2023-24.csv", "metropolitan_train_location_2023.csv", train_station_dir / "metropolitan_train_location_2023.csv"),
    ("https://vicroadsopendatastorehouse.vicroads.vic.gov.au/opendata/Public_Transport/Patronage/Annual%20regional%20train%20station%20entries/Annual_Regional_Train_Station_Entries_2023-24.csv", "regional_train_location_2023.csv", train_station_dir / "regional_train_location_2023.csv"),
    
]

# add to above files variable to download past years train station data
#    ("https://vicroadsopendatastorehouse.vicroads.vic.gov.au/opendata/Public_Transport/Patronage/Annual%20metropolitan%20train%20station%20entries/Annual%20metropolitan%20train%20station%20entries%202020-21.csv", "metropolitan_train_location_2020.csv", train_station_dir / "metropolitan_train_location_2020.csv"),
#    ("https://vicroadsopendatastorehouse.vicroads.vic.gov.au/opendata/Public_Transport/Patronage/Annual%20regional%20train%20station%20entries/Annual%20regional%20train%20station%20entries%202020-21.csv", "regional_train_location_2020.csv", train_station_dir / "regional_train_location_2020.csv"),
    
#    ("https://vicroadsopendatastorehouse.vicroads.vic.gov.au/opendata/Public_Transport/Patronage/Annual%20metropolitan%20train%20station%20entries/Annual%20metropolitan%20train%20station%20entries%202021-22.csv", "metropolitan_train_location_2021.csv", train_station_dir / "metropolitan_train_location_2021.csv"),
#    ("https://vicroadsopendatastorehouse.vicroads.vic.gov.au/opendata/Public_Transport/Patronage/Annual%20regional%20train%20station%20entries/Annual%20regional%20train%20station%20entries%202021-22.csv", "regional_train_location_2021.csv", train_station_dir / "regional_train_location_2021.csv"),
    
#    ("https://vicroadsopendatastorehouse.vicroads.vic.gov.au/opendata/Public_Transport/Patronage/Annual_metropolitan_train_station_entries/Annual_metropolitan_train_station_entries_2022_23.csv", "metropolitan_train_location_2022.csv", train_station_dir / "metropolitan_train_location_2022.csv"),
#    ("https://vicroadsopendatastorehouse.vicroads.vic.gov.au/opendata/Public_Transport/Patronage/Annual%20regional%20train%20station%20entries/Annual%20regional%20train%20station%20entries%202022-23.csv", "regional_train_location_2022.csv", train_station_dir / "regional_train_location_2022.csv"),


for url, file_name ,destination in files:
    response = requests.get(url)
    response.raise_for_status()  
    
    with open(destination, 'wb') as f:
        f.write(response.content)

    print(f"{file_name} downloaded and saved to {destination} ")

metropolitan_train_location_2023.csv downloaded and saved to c:\Users\29557\Documents\GitHub\project-2-group-real-estate-industry-project-7\data\raw\external\train_station\metropolitan_train_location_2023.csv 
regional_train_location_2023.csv downloaded and saved to c:\Users\29557\Documents\GitHub\project-2-group-real-estate-industry-project-7\data\raw\external\train_station\regional_train_location_2023.csv 


4. Download VIC population data 2001-2023

In [6]:
# Download population dataset
url = 'https://www.abs.gov.au/statistics/people/population/regional-population/2022-23/32180DS0003_2001-23.xlsx'

# The filename to save the downloaded file
file_name = "population_2001-23.xlsx"
destination = population_dir / file_name

# Send a GET request to the URL
response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Open the file in write mode and save the content
    with open(destination, 'wb') as file:
        file.write(response.content)
    print(f"Excel file downloaded and saved as {destination}")
else:
    print(f"Failed to download the Excel file. Status code: {response.status_code}")

Excel file downloaded and saved as c:\Users\29557\Documents\GitHub\project-2-group-real-estate-industry-project-7\data\raw\external\population\population_2001-23.xlsx


5. VIC annual income by region data 2016 - 2020 (latest)

In [9]:
# Download annual income data 1016 - 2020
url = "https://www.abs.gov.au/statistics/labour/earnings-and-working-conditions/personal-income-australia/2020-21-financial-year/Table%201%20-%20Total%20income%2C%20earners%20and%20summary%20statistics%20by%20geography%2C%202016-17%20to%202020-21.xlsx"
file_name = "income_2016_2020.xlsx"
destination = income_dir / file_name
response = requests.get(url)
response.raise_for_status()  

with open(destination, 'wb') as f:
    f.write(response.content)

print(f"{file_name} downloaded and saved to {destination}")

income_2016_2020.xlsx downloaded and saved to c:\Users\29557\Documents\GitHub\project-2-group-real-estate-industry-project-7\data\raw\external\income\income_2016_2020.xlsx


6. SA2 Shapefile

In [8]:
# URL of the zip file
url = "https://www.abs.gov.au/statistics/standards/australian-statistical-geography-standard-asgs-edition-3/jul2021-jun2026/access-and-downloads/digital-boundary-files/SA2_2021_AUST_SHP_GDA2020.zip"

# Define file names
zip_filename = "../data/raw/external/SA2_2021_AUST_SHP_GDA2020.zip"
extract_folder = "../data/raw/external/SA2_2021_ShapeFile"

# Download the zip file
response = requests.get(url)
with open(zip_filename, "wb") as file:
    file.write(response.content)

# Create a directory to extract files
if not os.path.exists(extract_folder):
    os.makedirs(extract_folder)

# Unzip the file
with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
    zip_ref.extractall(extract_folder)

print(f"File downloaded and extracted to {extract_folder}")

File downloaded and extracted to ../data/raw/external/SA2_2021_ShapeFile
