In [20]:
import requests
import os
import urllib.parse 

In [21]:
# Define the API base URL
api_base_url = "https://xeno-canto.org/api/2/recordings"

# List of bird genera
bird_genera = ["Carpodacus", "Luscinia", "Podiceps", "Bucephala", "Acanthis", "Glaucidium", "Pluvialis", "Hydroprogne", "Sylvia", "Calcarius", "Panurus", "Uria", "Perdix", "Eptesicus", "Ardea", "Spatula", "Pica", "Prunella", "Parus", "Oriolus", "Calandrella", "Stercorarius", "Rallus", "Sternula", "Branta", "Actitis", "Sitta", "Barbastella", "Picoides", "Chroicocephalus", "Surnia", "Iduna", "Anser", "Nucifraga", "Mergellus", "Lymnocryptes", "Sonus", "Dendrocopos", "Riparia", "Jynx", "Numenius", "Pinicola", "Garrulus", "Cecropis", "Porzana", "Picus", "Nycticorax", "Limosa", "Calidris", "Lanius", "Phylloscopus", "Oenanthe", "Lullula", "Nyctalus", "Lyrurus", "Phalacrocorax", "Fringilla", "Troglodytes", "Milvus", "Hirundo", "Aythya", "Locustella", "Ficedula", "Plecotus", "Alcedo", "Dryobates", "Aegithalos", "Poecile", "Cepphus", "Remiz", "Metrioptera", "Columba", "Meconema", "Ciconia", "Bucanetes", "Aix", "Fulica", "Recurvirostra", "Perisoreus", "Delichon", "Ichthyaetus", "Linaria", "Alauda", "Scolopax", "Chlidonias", "Alca", "Vespertilio", "Tetrastes", "Hydrocoloeus", "Coturnix", "Periparus", "Pernis", "Phoenicurus", "Charadrius", "Crex", "Aquila", "Certhia", "Motacilla", "Grus", "Mystery", "Phalaropus", "Coloeus", "Erithacus", "Thalasseus", "Arenaria", "Pipistrellus", "Vanellus", "Gryllus", "Plectrophenax", "Myotis", "Dryocopus", "Passer", "Muscicapa", "Zapornia", "Strix", "Eremophila", "Xenus", "Gallinago", "Clangula", "Serinus", "Caprimulgus", "Streptopelia", "Otus", "Chloris", "Asio", "Mergus", "Psophus", "Lagopus", "Larus", "Gavia"]


# Maximum number of files to download for each genus
max_files_per_genus = 500

# Create the main directory if it doesn't exist
main_directory = "Bird_Recordings"
os.makedirs(main_directory, exist_ok=True)

In [22]:
# Iterate through each bird genus
for bird_genus in bird_genera:
    # Define the query parameters for the current genus
    query_params = {
        "query": f"gen:{bird_genus}",
    }

    # Make the API request for the current genus
    response = requests.get(f"{api_base_url}?{urllib.parse.urlencode(query_params)}")

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        data = response.json()

        # Check the number of sounds for the current genus
        num_sounds = len(data['recordings'])

        # Proceed only if the genus has 200 or more sounds
        if num_sounds >= 200:
            # Create a folder for each bird genus within the main directory
            genus_folder = os.path.join(main_directory, bird_genus)
            os.makedirs(genus_folder, exist_ok=True)

            # Counter for the number of files downloaded for the current genus
            files_downloaded = 0

            # Process the data for the current genus
            for recording in data['recordings']:
                # Check if the recording has an audio file URL in the "file" field
                if 'file' in recording and recording['file']:
                    audio_url = recording['file']
                    filename = os.path.join(genus_folder, f"{recording['gen']}_{recording['id']}.mp3")

                    try:
                        # Download and save the audio file
                        audio_data = requests.get(audio_url).content
                        with open(filename, 'wb') as audio_file:
                            audio_file.write(audio_data)

                        # Increment the counter
                        files_downloaded += 1

                        # Break out of the loop if the maximum number of files is reached
                        if files_downloaded >= max_files_per_genus:
                            break
                    except Exception as e:
                        print(f"Error downloading {audio_url}: {e}")

            print(f"{files_downloaded} audio files for {bird_genus} downloaded and organized.")
        else:
            print(f"Skipping {bird_genus} because it has less than 200 sounds.")
    else:
        print(f"API request for {bird_genus} failed with status code {response.status_code}")

500 audio files for Carpodacus downloaded and organized.
500 audio files for Luscinia downloaded and organized.
500 audio files for Podiceps downloaded and organized.
310 audio files for Bucephala downloaded and organized.
500 audio files for Acanthis downloaded and organized.
500 audio files for Glaucidium downloaded and organized.
500 audio files for Pluvialis downloaded and organized.
365 audio files for Hydroprogne downloaded and organized.
500 audio files for Sylvia downloaded and organized.
500 audio files for Calcarius downloaded and organized.
365 audio files for Panurus downloaded and organized.
Skipping Uria because it has less than 200 sounds.
336 audio files for Perdix downloaded and organized.
Skipping Eptesicus because it has less than 200 sounds.
500 audio files for Ardea downloaded and organized.
500 audio files for Spatula downloaded and organized.
500 audio files for Pica downloaded and organized.
500 audio files for Prunella downloaded and organized.
500 audio files 

KeyboardInterrupt: 