Code for API v2.1 City of Melburne data collection

In [6]:
# Import required modules
import requests
import pandas as pd

In [9]:
# Function to get data from website using API
def get_data_single(base, SPECIFIC_PATH, apikey, offset=0):    
    # Set the filters, limit retrieves 20 rows at a time, offset says where to start data collection
    filters = f'records?limit={100}&offset={offset}&timezone=UTC'
    # Make the url from base, data url and filters variables stored ouside loop
    url = f'{base}{SPECIFIC_PATH}/{filters}&apikey={apikey}'
    # print(url) - can be used locally to check if code is working if 404 error given
    # Use the requests function to get the data
    result = requests.get(url)
    # Check that the request works, error code 200 = successful
    if result.status_code == 200:
        # Save results as a json file
        result_json = result.json()
        # Store a variable of max_results with total of dataset
        max_results = result_json['total_count']
        # Save the results key data to a list variable
        records = result_json['results']
    else:
        # If data is not collected correctly return the error
        print("ERROR GETTING DATA: ", result.status_code)
        max_results = 0
        records = []
    # At end of function, return the json results in records, max_results count and offset
    return [records, max_results, offset]


def fetch_all_data(SPECIFIC_PATH):
    # Collect data from API
    # Set offset increment
    # (needs to match offset in get data function)
    OFFSET_INCREMENT = 100
    # Base url (this should be the same for all datasets)
    base_url = 'https://data.melbourne.vic.gov.au/api/explore/v2.1/catalog/datasets/'
    # Set your API key by user data entry
    apikey = input("Please enter your api key") 
    # Call the get data function, passing in variables above, save to result
    result = get_data_single(base_url, SPECIFIC_PATH, apikey)
    # Save the records data returned in the get_data function to records list variable
    records = result[0]
    # Save the dataset size data returned in the get_data function to max_results variable
    max_results = result[1] 
    # Increase the offset returned in the get_data function (result[2]) by the offset increment
    offset = result[2] + OFFSET_INCREMENT
    # Check the length of the data returned and compare it against the max_results variable
    # If the length o fthe data is less than the max_results, run the while loop
    while len(records) != max_results:
        # Call the get data function again, passing in url, specific path and new offset value
        data = get_data_single(base_url, SPECIFIC_PATH, apikey, offset)
        # Add the data collected to the existing records list
        records += data[0]
        # Increase the offset by the offset increment
        offset += OFFSET_INCREMENT
    # Convert the records list of dictionaries into a pandas dataframe 
    df = pd.DataFrame(records)
    # Print the dataframe
    return df


In [10]:
# Set specific url 
# (change this variable for the dataset you are working with)
SPECIFIC_PATH = 'laneways-with-greening-potential'
new_df = fetch_all_data(SPECIFIC_PATH) 
print(new_df)


                                          geo_point_2d  \
0    {'lon': 144.9623447558369, 'lat': -37.81980050...   
1    {'lon': 144.97170834375964, 'lat': -37.8109763...   
2    {'lon': 144.97244046374837, 'lat': -37.8111115...   
3    {'lon': 144.97077449834737, 'lat': -37.8116123...   
4    {'lon': 144.9713761243581, 'lat': -37.81247349...   
..                                                 ...   
241  {'lon': 144.9713271143983, 'lat': -37.80873201...   
242  {'lon': 144.96646689470336, 'lat': -37.8118915...   
243  {'lon': 144.96051751381623, 'lat': -37.8098084...   
244  {'lon': 144.96798312015957, 'lat': -37.8075281...   
245  {'lon': 144.97079029463796, 'lat': -37.8091174...   

                                             geo_shape  segid_1  mapbase_mc  \
0    {'type': 'Feature', 'geometry': {'coordinates'...    10714     10714.0   
1    {'type': 'Feature', 'geometry': {'coordinates'...    20229     20229.0   
2    {'type': 'Feature', 'geometry': {'coordinates'...    20230   