Code for API v2.1 City of Melburne data collection

In [None]:
# Import required modules
import requests
import pandas as pd

In [None]:
# Function to get data from website using API
def get_data(base, data_url, offset=0):    
    # Set the filters, limit retrieves 20 rows at a time, offset says where to start data collection
    filters = f'records?limit={50}&offset={offset}&timezone=UTC'
    # Make the url from base, data url and filters variables stored ouside loop
    url = f'{base}{data_url}/{filters}'
    # Use the requests function to get the data
    result = requests.get(url)
    # Check that the request works, error code 200 = successful
    if result.status_code == 200:
        # Save results as a json file
        result_json = result.json()
        # Store a variable of max_results with total of dataset
        max_results = result_json['total_count']
        # Save the results key data to a list variable
        records = result_json['results']
    else:
        # If data is not collected correctly return the error
        print("ERROR GETTING DATA: ", result.status_code)
        max_results = 0
        records = []
    # At end of function, return the json results in records, max_results count and offset
    return [records, max_results, offset]



# Collect data from API
# Set offset increment
# (needs to match offset in get data function)
OFFSET_INCREMENT = 50
# Base url (this should be the same for all datasets)
BASE_URL = 'https://data.melbourne.vic.gov.au/api/explore/v2.1/catalog/datasets/'
# Set specific url 
# (change this variable for the dataset you are working with)
SPECIFIC_PATH = 'pre-colonial-plant-list'
# Call the get data function, passing in variables above, save to result
result = get_data(BASE_URL, SPECIFIC_PATH)
# Save the records data returned in the get_data function to records list variable
records = result[0]
# Save the dataset size data returned in the get_data function to max_results variable
max_results = result[1] 
# Increase the offset returned in the get_data function (result[2]) by the offset increment
offset = result[2] + OFFSET_INCREMENT
# Check the length of the data returned and compare it against the max_results variable
# If the length o fthe data is less than the max_results, run the while loop
while len(records) != max_results:
    # Call the get data function again, passing in url, specific path and new offset value
    data = get_data(BASE_URL, SPECIFIC_PATH, offset)
    # Add the data collected to the existing records list
    records += data[0]
    # Increase the offset by the offset increment
    offset += OFFSET_INCREMENT
# Convert the records list of dictionaries into a pandas dataframe 
df = pd.DataFrame(records)
# Print the dataframe
df

    

    

    