In [None]:
import requests
import pandas as pd

# Function to get data from website using API
def get_data(base, data_url, offset=0):    
    # Set the filters, limit retrieves 20 rows at a time, offset says where to start data collection
    filters = f'records?limit={50}&offset={offset}&timezone=UTC'
    # Make the url from base, data url, and filters variables stored outside the loop
    url = f'{base}{data_url}/{filters}'
    # Use the requests function to get the data
    result = requests.get(url)
    # Check that the request works, error code 200 = successful
    if result.status_code == 200:
        # Save results as a json file
        result_json = result.json()
        # Store a variable of max_results with total of the dataset
        max_results = result_json['total_count']
        # Save the results key data to a list variable
        records = result_json['results']
    else:
        # If data is not collected correctly return the error
        print("ERROR GETTING DATA: ", result.status_code)
        max_results = 0
        records = []
    # At the end of the function, return the json results in records, max_results count, and offset
    return [records, max_results, offset]

# Collect data from API

# Set offset increment
OFFSET_INCREMENT = 50

# Base url (this should be the same for all datasets)
BASE_URL = 'https://data.melbourne.vic.gov.au/api/explore/v2.1/catalog/datasets/'

# Set specific urls for the two datasets
SPECIFIC_PATH_1 = 'bus-stops'
SPECIFIC_PATH_2 = 'pedestrian-counting-system-past-hour-counts-per-minute'

# Call the get data function for the first dataset
result_1 = get_data(BASE_URL, SPECIFIC_PATH_1)

# Save the records data returned in the get_data function to records list variable
records_1 = result_1[0]

# Save the dataset size data returned in the get_data function to max_results variable
max_results_1 = result_1[1] 

# Increase the offset returned in the get_data function (result[2]) by the offset increment
offset_1 = result_1[2] + OFFSET_INCREMENT

# Check the length of the data returned and compare it against the max_results variable
# If the length of the data is less than the max_results, run the while loop
while len(records_1) != max_results_1:
    # Call the get data function again, passing in url, specific path, and new offset value
    data_1 = get_data(BASE_URL, SPECIFIC_PATH_1, offset_1)
    # Add the data collected to the existing records list
    records_1 += data_1[0]
    # Increase the offset by the offset increment
    offset_1 += OFFSET_INCREMENT

# Call the get data function for the second dataset
result_2 = get_data(BASE_URL, SPECIFIC_PATH_2)

# Save the records data returned in the get_data function to records list variable
records_2 = result_2[0]

# Save the dataset size data returned in the get_data function to max_results variable
max_results_2 = result_2[1] 

# Increase the offset returned in the get_data function (result[2]) by the offset increment
offset_2 = result_2[2] + OFFSET_INCREMENT


# Check the length of the data returned and compare it against the max_results variable
# If the length of the data is less than the max_results, run the while loop
while len(records_2) != max_results_2:
    # Call the get data function again, passing in url, specific path, and new offset value
    data_2 = get_data(BASE_URL, SPECIFIC_PATH_2, offset_2)
    # Add the data collected to the existing records list
    records_2 += data_2[0]
    # Increase the offset by the offset increment
    offset_2 += OFFSET_INCREMENT

# Convert the records list of dictionaries into pandas dataframes
df_1 = pd.DataFrame(records_1)

df_1  # Display DataFrame 1


In [20]:

# Convert the records list of dictionaries into pandas dataframes
df_2 = pd.DataFrame(records_2)

# Display DataFrame 2
df_2


Length of records_2: 0
Max results for df_2: 0
