# Download bird songs
A script to download bird sound files from the www.xeno-canto.org archives with metadata
The program downloads all the files found with the search terms into subdirectory data/xeno-canto-dataset/searchTerm with corresponding json files.


In [4]:
import urllib.request, json
import sys
import os

savePath="./data/xeno-canto-dataset-full-all-Countries/"
bPath = "./data/"
basePath = "data/xeno-canto-dataset"


## Defining necessary functions
* Creates the subdirectory data/xeno-canto-dataset if necessary
* Downloads and saves json files for number of pages in a query
* and directory path to saved json's

In [5]:
def save_json(searchTerms, birdName, country):
    numPages = 1
    page = 1
    # create a path to save recordings
    path = bPath + birdName.replace(':', '') + "/" + country
    if not os.path.exists(path):
        print("Creating subdirectory " + path + " for downloaded files...")
        os.makedirs(path)
        # download a json file for every page found in a query
    while page < numPages + 1:
        print("Loading page " + str(page) + "...")
        url = 'https://www.xeno-canto.org/api/2/recordings?query={0}&page={1}'.format(searchTerms.replace(' ', '%20'),
                                                                                      page)
        print(url)
        jsonPage = urllib.request.urlopen(url)
        jsondata = json.loads(jsonPage.read().decode('utf-8'))
        filename = path + "/jsondata_p" + str(page) + ".json"
        with open(filename, 'w') as outfile:
            json.dump(jsondata, outfile)
        # check number of pages
        numPages = jsondata['numPages']
        page = page + 1
    print("Found ", numPages, " pages in total.")
    print("Saved json for ", (numPages - 1) * 500 + len(jsondata['recordings']), " files")
    return path

def read_data(searchTerm, path):
    data = []
    numPages = 1
    page = 1
    # read all pages and save results in a list
    while page < numPages + 1:
        # read file
        with open(path + "/jsondata_p" + str(page) + ".json", 'r') as jsonfile:
            jsondata = jsonfile.read()
        jsondata = json.loads(jsondata)
        # check number of pages
        numPages = jsondata['numPages']
        # find "recordings" in a json and save a list with a search term
        for k in range(len(jsondata['recordings'])):
            data.append(jsondata["recordings"][k][searchTerm])
        page = page + 1
    return data

def download(searchTerms, birdName, country):
    # create data/xeno-canto-dataset directory
    path = save_json(searchTerms, birdName, country)
    # get filenames: recording ID and bird name in latin from json
    filenamesID = read_data('id', path)
    filenamesCountry = read_data('cnt', path)

    fileaddress = read_data('file', path) #json file
    
    numfiles = len(filenamesID)
    print("A total of ", numfiles, " files will be downloaded")
    for i in range(0, numfiles):
        print("Saving file ", i + 1, "/", numfiles,
              path + "/" + birdName.replace(':', '') + filenamesID[i] + ".mp3")
        urllib.request.urlretrieve(fileaddress[i],
                                   path + "/" + birdName + filenamesID[i] + ".mp3")


## Download
Example download with all sounds type song, from bird list recorded in selected countries 

*example query*: query = "Dendrocopos major cnt:Poland type:song"
can be downloaded as:

download(query)

Other options can be specified according to xeno-canto query list
http://www.xeno-canto.org/explore?query=common+snipe

In [10]:
countries = ['France']
birds = [
         'Phoenicurus ochruros', #268
         'Turdus merula', #353
         'Phylloscopus collybita', #288
         'Phoenicurus phoenicurus', #163
         'Erithacus rubecula', #537
         'Parus major', #424
         'Alauda arvensis', #195
         'Turdus philomelos', #351
         'Troglodytes troglodytes', #331
         'Fringilla coelebs' #462
]

for country in countries:
    for bird in birds:
        download(bird + ' cnt:' + country + ' type:song', bird.replace(' ', ''), country)
for bird in birds:
    download(bird + ' type:song', bird.replace(' ', ''), 'countries')

Loading page 1...
https://www.xeno-canto.org/api/2/recordings?query=Emberiza%20citrinella%20cnt:France%20type:song&page=1
Found  1  pages in total.
Saved json for  93  files
A total of  93  files will be downloaded
Saving file  1 / 93 data/xeno-canto-datasetEmberizacitrinella895089.mp3
./data/Emberizacitrinella/France
https://xeno-canto.org/895089/download ./data/Emberizacitrinella/France/Emberizacitrinella895089.mp3
Saving file  2 / 93 data/xeno-canto-datasetEmberizacitrinella895088.mp3
./data/Emberizacitrinella/France
https://xeno-canto.org/895088/download ./data/Emberizacitrinella/France/Emberizacitrinella895088.mp3
Saving file  3 / 93 data/xeno-canto-datasetEmberizacitrinella890281.mp3
./data/Emberizacitrinella/France
https://xeno-canto.org/890281/download ./data/Emberizacitrinella/France/Emberizacitrinella890281.mp3
Saving file  4 / 93 data/xeno-canto-datasetEmberizacitrinella826623.mp3
./data/Emberizacitrinella/France
https://xeno-canto.org/826623/download ./data/Emberizacitrinel

KeyboardInterrupt: 