This notebook download all external dataset used for this project

1. VIC school location data 2020 - 2023
2. VIC park & reserve location data (latest)
3. VIC train station location data 2023 (latest)
4. VIC population data 2021 (latest)
5. VIC annual income by region data 2016 - 2020 (latest)

In [2]:
from pathlib import Path
import requests
import zipfile

In [3]:
# Creat external data directory
data_folder = Path.cwd().parent / 'data'
school_dir = data_folder / 'raw' / 'external' / 'school'
park_dir = data_folder / 'raw' / 'external' / 'park'
train_station_dir = data_folder / 'raw' / 'external' / 'train_station'
population_dir = data_folder / 'raw' / 'external' / 'population'
income_dir = data_folder / 'raw' / 'external' / 'income'



school_dir.mkdir(parents=True, exist_ok=True)
park_dir.mkdir(parents=True, exist_ok=True)
train_station_dir.mkdir(parents=True, exist_ok=True)
population_dir.mkdir(parents=True, exist_ok=True)
income_dir.mkdir(parents=True, exist_ok=True)




1. Download VIC school location data 2020 - 2023

In [4]:
# Download school location data 2020-2023

files = [
    ("https://www.education.vic.gov.au/Documents/about/research/datavic/dv296-schoollocations2020.csv", "school_location_2020.csv", school_dir / "school_location_2020.csv"),
    ("https://www.education.vic.gov.au/Documents/about/research/datavic/dv309_schoollocations2021.csv", "school_location_2021.csv", school_dir / "school_location_2021.csv"),
    ("https://www.education.vic.gov.au/Documents/about/research/datavic/dv331_schoollocations2022.csv", "school_location_2022.csv",school_dir / "school_location_2022.csv"),
    ("https://www.education.vic.gov.au/Documents/about/research/datavic/dv346-schoollocations2023.csv", "school_location_2023.csv",school_dir / "school_location_2023.csv")
]


for url, file_name, destination in files:
    response = requests.get(url)
    response.raise_for_status()  
    
    with open(destination, 'wb') as f:
        f.write(response.content)

    print(f"{file_name} downloaded and saved to {destination} ")


school_location_2020.csv downloaded and saved to c:\Users\29557\Documents\GitHub\project-2-group-real-estate-industry-project-7\data\raw\external\school\school_location_2020.csv 
school_location_2021.csv downloaded and saved to c:\Users\29557\Documents\GitHub\project-2-group-real-estate-industry-project-7\data\raw\external\school\school_location_2021.csv 
school_location_2022.csv downloaded and saved to c:\Users\29557\Documents\GitHub\project-2-group-real-estate-industry-project-7\data\raw\external\school\school_location_2022.csv 
school_location_2023.csv downloaded and saved to c:\Users\29557\Documents\GitHub\project-2-group-real-estate-industry-project-7\data\raw\external\school\school_location_2023.csv 


2. Download VIC park & reserve location data

In [5]:
import requests
from pathlib import Path
import zipfile

# Define the paths
zip_folder = park_dir / 'park_reserve'
park_file = 'park_reserve.zip'
park_file_path = park_dir / park_file

# URL of the park reserve zip file
url = "https://s3.ap-southeast-2.amazonaws.com/cl-isd-prd-datashare-s3-delivery/Order_3QOIU3.zip"

# Ensure the directories exist
park_dir.mkdir(parents=True, exist_ok=True)
zip_folder.mkdir(parents=True, exist_ok=True)

# Download the file and save it to the park_dir
response = requests.get(url)
response.raise_for_status()  # Check for successful download

with open(park_file_path, 'wb') as f:
    f.write(response.content)

print(f"{park_file} downloaded and saved to {park_file_path}")

# Unzip the file into the zip_folder
with zipfile.ZipFile(park_file_path, 'r') as zip_ref:
    zip_ref.extractall(zip_folder)

print(f"File unzipped to {zip_folder}")




KeyboardInterrupt: 

3. Download VIC train station location data

In [8]:
files = [

    
    ("https://vicroadsopendatastorehouse.vicroads.vic.gov.au/opendata/Public_Transport/Patronage/Annual_metropolitan_train_station_entries/Annual_Metropolitan_Train_Station_Entries_2023-24.csv", "metropolitan_train_location_2023.csv", train_station_dir / "metropolitan_train_location_2023.csv"),
    ("https://vicroadsopendatastorehouse.vicroads.vic.gov.au/opendata/Public_Transport/Patronage/Annual%20regional%20train%20station%20entries/Annual_Regional_Train_Station_Entries_2023-24.csv", "regional_train_location_2023.csv", train_station_dir / "regional_train_location_2023.csv"),
    
]

# add to above files variable to download past years train station data
#    ("https://vicroadsopendatastorehouse.vicroads.vic.gov.au/opendata/Public_Transport/Patronage/Annual%20metropolitan%20train%20station%20entries/Annual%20metropolitan%20train%20station%20entries%202020-21.csv", "metropolitan_train_location_2020.csv", train_station_dir / "metropolitan_train_location_2020.csv"),
#    ("https://vicroadsopendatastorehouse.vicroads.vic.gov.au/opendata/Public_Transport/Patronage/Annual%20regional%20train%20station%20entries/Annual%20regional%20train%20station%20entries%202020-21.csv", "regional_train_location_2020.csv", train_station_dir / "regional_train_location_2020.csv"),
    
#    ("https://vicroadsopendatastorehouse.vicroads.vic.gov.au/opendata/Public_Transport/Patronage/Annual%20metropolitan%20train%20station%20entries/Annual%20metropolitan%20train%20station%20entries%202021-22.csv", "metropolitan_train_location_2021.csv", train_station_dir / "metropolitan_train_location_2021.csv"),
#    ("https://vicroadsopendatastorehouse.vicroads.vic.gov.au/opendata/Public_Transport/Patronage/Annual%20regional%20train%20station%20entries/Annual%20regional%20train%20station%20entries%202021-22.csv", "regional_train_location_2021.csv", train_station_dir / "regional_train_location_2021.csv"),
    
#    ("https://vicroadsopendatastorehouse.vicroads.vic.gov.au/opendata/Public_Transport/Patronage/Annual_metropolitan_train_station_entries/Annual_metropolitan_train_station_entries_2022_23.csv", "metropolitan_train_location_2022.csv", train_station_dir / "metropolitan_train_location_2022.csv"),
#    ("https://vicroadsopendatastorehouse.vicroads.vic.gov.au/opendata/Public_Transport/Patronage/Annual%20regional%20train%20station%20entries/Annual%20regional%20train%20station%20entries%202022-23.csv", "regional_train_location_2022.csv", train_station_dir / "regional_train_location_2022.csv"),


for url, file_name ,destination in files:
    response = requests.get(url)
    response.raise_for_status()  
    
    with open(destination, 'wb') as f:
        f.write(response.content)

    print(f"{file_name} downloaded and saved to {destination} ")

metropolitan_train_location_2023.csv downloaded and saved to c:\Users\29557\Documents\GitHub\project-2-group-real-estate-industry-project-7\data\raw\external\train_station\metropolitan_train_location_2023.csv 
regional_train_location_2023.csv downloaded and saved to c:\Users\29557\Documents\GitHub\project-2-group-real-estate-industry-project-7\data\raw\external\train_station\regional_train_location_2023.csv 


4. Download VIC population data 2021

In [9]:
#Download population data
url = "https://www.abs.gov.au/census/find-census-data/datapacks/download/2021_GCP_STE_for_VIC_short-header.zip"
file_name = "population_2021.zip"
destination = population_dir / file_name
response = requests.get(url)
response.raise_for_status()  

with open(destination, 'wb') as f:
    f.write(response.content)

print(f"{file_name} downloaded and saved to {destination}")

#Unzip the file
extraction_path = population_dir / "population_2021"
extraction_path.mkdir(parents=True, exist_ok=True)

with zipfile.ZipFile(destination, 'r') as zip_ref:
    zip_ref.extractall(extraction_path)

print(f"{file_name} unzipped and files extracted to {extraction_path}")

population_2021.zip downloaded and saved to c:\Users\29557\Documents\GitHub\project-2-group-real-estate-industry-project-7\data\raw\external\population\population_2021.zip
population_2021.zip unzipped and files extracted to c:\Users\29557\Documents\GitHub\project-2-group-real-estate-industry-project-7\data\raw\external\population\population_2021


5. VIC annual income by region data 2016 - 2020 (latest)

In [10]:
# Download annual income data 1016 - 2020
url = "https://www.abs.gov.au/statistics/labour/earnings-and-working-conditions/personal-income-australia/2020-21-financial-year/Table%201%20-%20Total%20income%2C%20earners%20and%20summary%20statistics%20by%20geography%2C%202016-17%20to%202020-21.xlsx"
file_name = "income_2016_2020.xlsx"
destination = income_dir / file_name
response = requests.get(url)
response.raise_for_status()  

with open(destination, 'wb') as f:
    f.write(response.content)

print(f"{file_name} downloaded and saved to {destination}")

income_2016_2020.xlsx downloaded and saved to c:\Users\29557\Documents\GitHub\project-2-group-real-estate-industry-project-7\data\raw\external\income\income_2016_2020.xlsx


     Stop_ID                                        Stop_name   latitude  \
207    19842                                Melbourne Central -37.809939   
209    19854                                  Flinders Street -37.818305   
206    19841                                        Flagstaff -37.811981   
208    19843                                       Parliament -37.811054   
315    64408  Southern Cross Railway Station (Melbourne City) -37.818245   
213    22180                                   Southern Cross -37.817936   
212    19979                                         Jolimont -37.816527   
211    19973                                  North Melbourne -37.806310   
137    19978                                    West Richmond -37.814949   
136    19977                                   North Richmond -37.810398   

      longitude  distance  
207  144.962593  0.408811  
209  144.966964  0.623303  
206  144.955654  0.679847  
208  144.972911  0.908948  
315  144.951691  1.1291