In [1]:
#import the necessary libraries
import os
import re
import requests
from tqdm import tqdm
import ipywidgets as wg
import urllib.request, json
from collections import Counter

In [2]:

def speciesOfInterest(bird):
    """ Returns a list that contains dictionaries that contain links
        to download bird recordings, file names among other details.
    
    Args: 
        country: a string of the name of the country the recordings were done
    """
    recordings = []
    
    """the following three lines take care of countries with more than one name e.g. 'United Kingdom'
        to match the format required for advanced search in Xeno-canto"""
    
    link='https://www.xeno-canto.org/api/2/recordings?query=" "' 
    link = link.replace(' ', bird)
    link = link.replace(' ', '%20')
    with urllib.request.urlopen(link) as url:
        data = json.loads(url.read().decode())
        
    pages = data['numPages'] # get the number of pages available for the query
    
    for page in range(pages): # iterate through the pages
        page  = '&page=' + str(page + 1)
        link='https://www.xeno-canto.org/api/2/recordings?query=" "' + page #specify the page in the search query
        link = link.replace(' ', bird)
        link = link.replace(' ', '%20')
        with urllib.request.urlopen(link) as url:
            content = json.loads(url.read().decode())
            content = content['recordings']
            for dic in content:
                recordings.append(dic)
        
    
    return recordings

In [3]:
def fileSelect(birds):
    """Returns a dictionary that comprise of links to download the 
    recordings with the the file names as the keys
    
    Args:
        country: a string of the country the recordings were taken
    
    """
    birds_dict = {}
    extension = '.mp3'
    
    for bird in birds:
        recordings = speciesOfInterest(bird)
        file_plus_links = {}
        count = 0
        for recording in recordings:
            if recording['type'] == 'call':
                name = bird + str(count) + extension
                count += 1
                
                file_plus_links.update({name:'https:' + recording['file']})
                
        birds_dict.update({bird:file_plus_links})

    return birds_dict

In [4]:
def fileDownload(birds):
    """ Downloads and save the audio recordings in the specified path
    Args:
        country: a string of the country the recordings were taken
    """
    parent_dir = './xenocanto'
    birds_dict = fileSelect(birds)
    
    
    for bird in birds:
        

        audio_dir = os.path.join(parent_dir, bird)
        if not os.path.exists(audio_dir):
            os.makedirs(audio_dir)
        comp_file_list = list(birds_dict[bird])
        already_downloaded = os.listdir(audio_dir)
        comp_file_list = list(set(comp_file_list) - set(already_downloaded))
        comp_file_list = tqdm(comp_file_list)
        for file in comp_file_list:
            comp_file_list.set_description("Downloading %s" % bird)
            url = birds_dict[bird][file]
            myfile = requests.get(url)
            open(os.path.join(audio_dir, file), 'wb').write(myfile.content)

In [5]:
birds = ["grey-backed camaroptera", "tropical boubou", "hartlaub's turaco"]
fileDownload(birds)

Downloading grey-backed camaroptera: 100%|███████████████████████████████████████████| 125/125 [04:57<00:00,  2.38s/it]
Downloading tropical boubou: 100%|█████████████████████████████████████████████████████| 32/32 [01:18<00:00,  2.44s/it]
Downloading hartlaub's turaco: 100%|███████████████████████████████████████████████████| 15/15 [00:33<00:00,  2.25s/it]
