In [1]:
import os
import requests
import json
from urllib.parse import urlencode
import zipfile
from urllib.request import urlretrieve
import io
import shutil
import os

def fetch_data_from_api(ids, start_period, dimension_at_observation, output_file):
    base_url = "https://api.data.abs.gov.au/data/ABS,ABS_REGIONAL_ASGS2021,/.."
    params = {
        'startPeriod': start_period,
        'dimensionAtObservation': dimension_at_observation,
    }
    
    # Ensure the directory exists
    os.makedirs(os.path.dirname(output_file), exist_ok=True)

    for i in range(0, len(ids), 50):  # Split IDs into batches of 50
        batch_ids = ids[i:i + 50]
        url = base_url + '+'.join(map(str, batch_ids)) + '.A?' + urlencode(params)

        try:
            print(f"Fetching data from the API with batch: {batch_ids}...")
            response = requests.get(url, timeout=60)  # Set a longer timeout if necessary
            response.raise_for_status()  # Raise an error for bad responses

            # Parsing the response
            data = response.json()
            print("Data fetched successfully.")

            # Append or save the data (in this example, it appends to a file)
            with open(output_file, 'a') as file:
                json.dump(data, file, indent=4)  # indent=4 for pretty printing
            print(f"Data saved successfully to {output_file}.")

        except requests.exceptions.HTTPError as http_err:
            print(f"HTTP error occurred: {http_err}")
        except requests.exceptions.RequestException as req_err:
            print(f"Request error occurred: {req_err}")
        except Exception as err:
            print(f"Other error occurred: {err}")
            
data_dir = '../data/'
landing_dir = data_dir + 'landing/'
raw_dir = data_dir + 'raw/'




In [2]:
ids=["206041124"]

fetch_data_from_api(ids, '2020', 'AllDimensions', f'{raw_dir}raw_abs.json')

Fetching data from the API with batch: ['206041124']...
Request error occurred: Expecting value: line 1 column 1 (char 0)


In [3]:
# Define the API URL from the "Data query"
url = "https://api.data.abs.gov.au/data/ABS,ABS_REGIONAL_ASGS2021,/..206041124.A?startPeriod=2020&dimensionAtObservation=AllDimensions&format=csv"
# Define the output file path
output_file = f'{landing_dir}raw_abs.csv'

urlretrieve(url, output_file)

('../data/landing/raw_abs.csv', <http.client.HTTPMessage at 0x103d43d90>)

In [4]:
# Define the API URL from the "Data query"
url = "https://www.education.vic.gov.au/Documents/about/research/datavic/dv346-schoollocations2023.csv"
# Define the output file path
output_file = f'{landing_dir}school_locations.csv'

urlretrieve(url, output_file)

('../data/landing/school_locations.csv',
 <http.client.HTTPMessage at 0x103dc50d0>)

In [5]:
# Get points of interest shapefile
foi_url = "https://s3.ap-southeast-2.amazonaws.com/cl-isd-prd-datashare-s3-delivery/Order_MYWBSS.zip"
output_dir = f'{landing_dir}FOI'
zip_dir = f"{output_dir}.zip"

urlretrieve(foi_url, zip_dir) 

# Opens zip file
with zipfile.ZipFile(zip_dir, 'r') as zip_ref:
    zip_ref.extractall(output_dir)

# Deletes the zip file as the unzipped file has its own folder now
os.remove(zip_dir)

# Data is hidden in folders 
shapefile_dir = output_dir + '/ll_gda94/esrishape/whole_of_dataset/victoria/VMFEAT/'

# Move all files into the topmost folder for ease of coding
file_names = os.listdir(shapefile_dir)
for file_name in file_names:
    path = os.path.join(shapefile_dir, file_name)
    # Force move even if file exists
    if os.path.samefile(path, os.path.join(f"{output_dir}/", file_name)):
        shutil.move(path, f"{output_dir}/")
        continue

# Remove the empty folder within the folder within the folder etc
shutil.rmtree(f"{output_dir}/ll_gda94")


In [7]:
# Step 1: URL for the Parks and Reserves shapefile
url = "https://data.casey.vic.gov.au/api/v2/catalog/datasets/parks-and-reserves1/exports/shp"

# Step 2: Send a request to download the file
response = requests.get(url)

# Step 3: Check if the request was successful
if response.status_code == 200:
    print("Download successful, extracting the file...")
    
    # Step 4: Open the downloaded zip file in memory and extract its contents
    zip_file = zipfile.ZipFile(io.BytesIO(response.content))
    extract_path = f"{landing_dir}parks_and_reserves_shapefile"
    zip_file.extractall(extract_path)
    
    # Step 5: Verify the extracted files
    extracted_files = os.listdir(extract_path)
    print("Extracted files:", extracted_files)
else:
    print("Failed to download the shapefile. Status code:", response.status_code)

Download successful, extracting the file...
Extracted files: ['parks-and-reserves1.dbf', 'parks-and-reserves1.shx', 'parks-and-reserves1.cpg', 'parks-and-reserves1.shp', 'parks-and-reserves1.prj']
