In [1]:
import urllib.request, json
import sys
import os

#savePath="../data/xeno-canto-dataset-Australia-call/"
savePath="../data/xeno-canto-dataset-Australia-song/"

In [2]:
def save_json(searchTerms, birdName, country):
    numPages = 1
    page = 1
    # create a path to save json files and recordings
    path = savePath + birdName.replace(':', '') + "/" + country
    if not os.path.exists(path):
        print("Creating subdirectory " + path + " for downloaded files...")
        os.makedirs(path)
        # download a json file for every page found in a query
    while page < numPages + 1:
        print("Loading page " + str(page) + "...")
        url = 'https://www.xeno-canto.org/api/2/recordings?query={0}&page={1}'.format(searchTerms.replace(' ', '%20'),page)
        print(url)
        jsonPage = urllib.request.urlopen(url)
        jsondata = json.loads(jsonPage.read().decode('utf-8'))
        filename = path + "/jsondata_p" + str(page) + ".json"
        with open(filename, 'w') as outfile:
            json.dump(jsondata, outfile)
        # check number of pages
        numPages = jsondata['numPages']
        page = page + 1
    print("Found ", numPages, " pages in total.")
    # return number of files in json
    # each page contains 500 results, the last page can have less than 500 records
    print("Saved json for ", (numPages - 1) * 500 + len(jsondata['recordings']), " files")
    return path


# reads the json and return the list of values for selected json part
# i.e. "id" - ID number, "type": type of the bird sound such as call or song
# for all Xeno Canto files found with the given search terms.
def read_data(searchTerm, path):
    data = []
    numPages = 1
    page = 1
    # read all pages and save results in a list
    while page < numPages + 1:
        # read file
        with open(path + "/jsondata_p" + str(page) + ".json", 'r') as jsonfile:
            jsondata = jsonfile.read()
        jsondata = json.loads(jsondata)
        # check number of pages
        numPages = jsondata['numPages']
        # find "recordings" in a json and save a list with a search term
        for k in range(len(jsondata['recordings'])):
            data.append(jsondata["recordings"][k][searchTerm])
        page = page + 1
    return data


# downloads all sound files found with the search terms into xeno-canto directory
# into catalogue named after the search term (i.e. Apus apus)
# filename have two parts: the name of the bird in latin and ID number
def download(searchTerms, birdName, country):
    # create data/xeno-canto-dataset directory
    path = save_json(searchTerms, birdName, country)
    # get filenames: recording ID and bird name in latin from json
    filenamesID = read_data('id', path)
    filenamesCountry = read_data('cnt', path)
    # get website recording http download address from json
    fileaddress = read_data('file', path)
    numfiles = len(filenamesID)
    print("A total of ", numfiles, " files will be downloaded")
    for i in range(0, numfiles):
        print("Saving file ", i + 1, "/", numfiles,
              savePath + birdName.replace(':', '') + filenamesID[
                  i] + ".mp3")
        print(fileaddress[i], path + "/" + birdName + filenamesID[i] + ".mp3")
        
        urllib.request.urlretrieve(fileaddress[i], path + "/" + birdName + filenamesID[i] + ".mp3")

In [3]:
import ssl

ssl._create_default_https_context = ssl._create_unverified_context

#https://australian.museum/learn/animals/birds/birds-in-backyards-top-30-urban-birds/
AusBirds = [
            'Alisterus scapularis',
            'Gymnorhina tibicen',
            'Corvus coronoides',
            'Eudynamys orientalis',
            'Acridotheres tristis',
            'Ocyphaps lophotes',
            'Platycercus elegans',
            'Acanthorhynchus tenuirostris',
            'Eopsaltria australis',
            'Eolophus roseicapilla',
            'Cracticus torquatus',
            'Rhipidura albiscapa',
            'Passer domesticus',
            'Dacelo novaeguineae',
            'Anthochaera chrysoptera',
            'Grallina cyanoleuca',
            'Phylidonyris novaehollandiae',
            'Manorina melanocephala',
            'Strepera graculina',
            'Trichoglossus moluccanus','Trichoglossus ornatus','Trichoglossus flavoviridis','Trichoglossus chlorolepidotus', #*
            'Anthochaera carunculata',
            'Neochmia temporalis', 'Neochmia phaeton','Neochmia ruficauda', #**
            'Pycnonotus jocosus',
            'Zosterops lateralis',
            'Pardalotus punctatus', 'Pardalotus quadragintus', 'Pardalotus rubricatus', 'Pardalotus striatus', #**
            'Spilopelia chinensis',
            'Cacatua galerita',
            'Malurus cyaneus',
            'Rhipidura leucophrys',
            'Calyptorhynchus funereus'           
]
for bird in AusBirds:
    download(bird + ' type:call', bird.replace(' ', ''), 'countries')
    
for bird in AusBirds:
    download(bird + ' type:song', bird.replace(' ', ''), 'countries')


Creating subdirectory ../data/xeno-canto-dataset-Australia-song/Pardalotusquadragintus/countries for downloaded files...
Loading page 1...
https://www.xeno-canto.org/api/2/recordings?query=Pardalotus%20quadragintus%20type:song&page=1
Found  1  pages in total.
Saved json for  0  files
A total of  0  files will be downloaded
Creating subdirectory ../data/xeno-canto-dataset-Australia-song/Pardalotusrubricatus/countries for downloaded files...
Loading page 1...
https://www.xeno-canto.org/api/2/recordings?query=Pardalotus%20rubricatus%20type:song&page=1
Found  1  pages in total.
Saved json for  12  files
A total of  12  files will be downloaded
Saving file  1 / 12 ../data/xeno-canto-dataset-Australia-song/Pardalotusrubricatus641216.mp3
https://xeno-canto.org/641216/download ../data/xeno-canto-dataset-Australia-song/Pardalotusrubricatus/countries/Pardalotusrubricatus641216.mp3
Saving file  2 / 12 ../data/xeno-canto-dataset-Australia-song/Pardalotusrubricatus521834.mp3
https://xeno-canto.org/

Saving file  21 / 42 ../data/xeno-canto-dataset-Australia-song/Pardalotusstriatus197584.mp3
https://xeno-canto.org/197584/download ../data/xeno-canto-dataset-Australia-song/Pardalotusstriatus/countries/Pardalotusstriatus197584.mp3
Saving file  22 / 42 ../data/xeno-canto-dataset-Australia-song/Pardalotusstriatus171818.mp3
https://xeno-canto.org/171818/download ../data/xeno-canto-dataset-Australia-song/Pardalotusstriatus/countries/Pardalotusstriatus171818.mp3
Saving file  23 / 42 ../data/xeno-canto-dataset-Australia-song/Pardalotusstriatus86003.mp3
https://xeno-canto.org/86003/download ../data/xeno-canto-dataset-Australia-song/Pardalotusstriatus/countries/Pardalotusstriatus86003.mp3
Saving file  24 / 42 ../data/xeno-canto-dataset-Australia-song/Pardalotusstriatus706584.mp3
https://xeno-canto.org/706584/download ../data/xeno-canto-dataset-Australia-song/Pardalotusstriatus/countries/Pardalotusstriatus706584.mp3
Saving file  25 / 42 ../data/xeno-canto-dataset-Australia-song/Pardalotusstriat