In [1]:
import requests
import pandas as pd

## Import data API 2.0 from Open Data Melbourne

In [2]:
# Function that extracts the data from the JSON 
def get_data(base, data_url, offset = 0):
    
    filters = f'records?limit={100}&offset={offset}&timezone=UTC'
    url = f'{base}{data_url}/{filters}'
    result = requests.get(url)
    # if you need to extract more than 9900 records, enter the api key like below
    #result = requests.get(url + f'&apikey={API_KEY}')
    result_json = result.json()
    max_results = result_json['total_count']
    links = result_json['links']
    records = result_json['records']
    records_df = pd.json_normalize(records)
    
    # fix columns names
    records_df.drop(columns=['links'],inplace=True)
    column_names = records_df.columns.values.tolist()
    
    column_names = [i.split('.')[-1] for i in column_names]
    records_df.columns = column_names
    
    next_url = None
    
    # get next url
    if records_df.shape[0] != max_results:
        for l in links:
            if l['rel']=='next':
                next_url = l['href']
                
    return [records_df, next_url, column_names]

In [3]:
# Appends each set of data 
def add_to_dataframe(dataframe, new_data):
    df = pd.concat([dataframe, new_data])
    return df

In [4]:
# Extracts the data from the url to produce a pandas dataset
def extract_dataframe(dataset):
    df_new, url_next, column_names = get_data(base_url, dataset)
    df = pd.DataFrame(columns=column_names)
    while url_next != None:
        df_new, url_next, column_names = get_data(base_url, dataset, df.shape[0])
        df = add_to_dataframe(df, df_new)
        # This limits the dataset to only 9900 entries, should be removed if you want to import more data
        # To get more than 9900, enter the API key in the get data function
        if df.shape[0] == 9900:
            break
    return df

In [6]:
# Base url for v2 api
base_url = 'https://data.melbourne.vic.gov.au/api/explore/v2.0/catalog/datasets/'

# A filter at the end of the url
number_of_records = 100
filters = f'records?limit={number_of_records}&offset=0&timezone=UTC'

# The generated url
# dataset_url = f'{base_url}{dataset}/{filters}'

In [7]:
# Call the function and produce a dataframe
df = extract_dataframe("butterfly-biodiversity-survey-2017")

In [8]:
df

Unnamed: 0,id,timestamp,size,site,sloc,walk,date,time,vegwalktime,vegspecies,...,brow,csem,aand,jvil,paur,ogyr,gmac,datetime,lon,lat
0,afe0da3ab3424a01ead1e972c2f6bb4de47723c6,2022-11-13T10:02:25.181Z,281,Womens Peace Gardens,2,1,2017-02-26,0001-01-01T11:42:00+00:00,1.3128,Schinus molle,...,0,0,0,0,0,0,0,2017-02-26T11:42:00+00:00,144.9244,-37.7912
1,d4c7508ee7a01eb0fc4620ffa5fae11acacb1a56,2022-11-13T10:02:25.181Z,279,Argyle Square,1,1,2017-11-02,0001-01-01T10:30:00+00:00,0.3051,Rosmarinus officinalis,...,0,0,0,0,0,0,0,2017-02-11T10:30:00+00:00,144.9665,-37.8023
2,26abf5c9ec285ef6231504f5bc6253c840eac3b7,2022-11-13T10:02:25.181Z,273,Argyle Square,2,1,2017-12-01,0001-01-01T10:35:00+00:00,0.3620,Euphorbia sp.,...,0,0,0,0,0,0,0,2017-01-12T10:35:00+00:00,144.9665,-37.8026
3,007380812d63dd3fd3b6256314ea829b2305ef4f,2022-11-13T10:02:25.181Z,277,Westgate Park,4,1,2017-03-03,0001-01-01T11:44:00+00:00,3.1585,Melaleuca lanceolata,...,0,0,0,0,0,0,0,2017-03-03T11:44:00+00:00,144.9089,-37.8316
4,1fec14405d39a3e5d45686c997a2de65d3be6c20,2022-11-13T10:02:25.181Z,267,Argyle Square,1,3,2017-01-15,0001-01-01T12:33:00+00:00,0.4432,Mentha sp.,...,0,0,0,0,0,0,0,2017-01-15T12:33:00+00:00,144.9662,-37.8027
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51,4083e32695c6dd35343652c7074ac9acd3f2c268,2022-11-13T10:02:25.181Z,280,Fitzroy-Treasury Gardens,3,2,2017-06-02,0001-01-01T17:44:00+00:00,0.5132,Tagetes sp.,...,0,0,0,0,0,0,0,2017-02-06T17:44:00+00:00,144.9819,-37.8136
52,4a3fce451cafdd252e70c5c3792de2f155acf082,2022-11-13T10:02:25.181Z,284,Westgate Park,4,2,2017-02-02,0001-01-01T13:57:00+00:00,2.1947,Myoporum parvifolium,...,0,0,0,0,0,0,0,2017-02-02T13:57:00+00:00,144.9092,-37.8311
53,04c84bf565bf345efc4d7e14bd0e8076e29949de,2022-11-13T10:02:25.181Z,274,Westgate Park,5,3,2017-06-03,0001-01-01T15:43:00+00:00,4.2408,Cassinia arcuata,...,0,0,0,0,0,0,0,2017-03-06T15:43:00+00:00,144.9106,-37.8299
54,ee1fa2574a6615106088141d467ed4ebcbedaca6,2022-11-13T10:02:25.181Z,278,Westgate Park,4,1,2017-02-02,0001-01-01T11:05:00+00:00,1.5948,Xerochrysum viscosum,...,0,0,0,0,0,0,0,2017-02-02T11:05:00+00:00,144.9093,-37.8316
