In [6]:
import requests
import pandas as pd
import os


SAVE_PATH = "../data/xeno-canto/"

In [7]:
def download_bird_recordings(bird_species_df, save_directory = SAVE_PATH):
    for index, row in bird_species_df.iterrows():
        species_name = row['Latin name']
        if os.listdir(os.path.join(save_directory, species_name)):
            print(f"Skipping downloading for species {species_name}...")
            continue
        page = 1
        numPages = 1
        while page < numPages + 1:
            url = f"https://www.xeno-canto.org/api/2/recordings?query={species_name}&page={page}"
            response = requests.get(url)
            if response.status_code == 200:
                data = response.json()
                numPages = data['numPages']
                if int(data['numRecordings']) > 0:
                    recordings = data['recordings']
                    for i, recording in enumerate(recordings):
                        if recording['q'] in ['A', 'B', 'C', 'no score']:
                            recording_id = recording['id']
                            recording_url = recording['file']
                            file_name = f"{species_name}/{recording_id}.mp3"
                            file_path = os.path.join(save_directory, file_name)
                            with open(file_path, 'wb') as f:
                                f.write(requests.get(recording_url).content)
                            print(f"Downloaded recording {i+1} for {species_name}")
                        else:
                            continue
                    page += 1
                else:
                    break
            else:
                print(f"Failed to retrieve data for {species_name}")
                break

### Load names of species with `priority` = 1

In [8]:
bird_species_df = pd.read_csv('../data/selected_species.csv', sep=',')

In [9]:
bird_species_priority_df = bird_species_df[bird_species_df['Priority'] == 1]

In [10]:
species = pd.DataFrame(bird_species_priority_df['Latin name'])

In [11]:
species

Unnamed: 0,Latin name
1,Riparia riparia
8,Corvus frugilegus
10,Anser albifrons
11,Anser fabalis
13,Columba livia domestica
15,Columba palumbus
17,Apus apus
18,Coloeus monedula
19,Sitta europaea
20,Anas platyrhynchos


### Create necessary folder when data will be saved

In [12]:
def create_folders_for_bird_species(bird_species_df, base_directory=SAVE_PATH):
    for index, row in bird_species_df.iterrows():
        species_name = row['Latin name']  
        folder_path = os.path.join(base_directory, species_name)
        os.makedirs(folder_path, exist_ok=True)
        print(f"Created folder for species: {species_name} at {folder_path}")

In [13]:
create_folders_for_bird_species(species)

Created folder for species: Riparia riparia at ../data/xeno-canto/Riparia riparia
Created folder for species: Corvus frugilegus at ../data/xeno-canto/Corvus frugilegus
Created folder for species: Anser albifrons at ../data/xeno-canto/Anser albifrons
Created folder for species: Anser fabalis at ../data/xeno-canto/Anser fabalis
Created folder for species: Columba livia domestica at ../data/xeno-canto/Columba livia domestica
Created folder for species: Columba palumbus at ../data/xeno-canto/Columba palumbus
Created folder for species: Apus apus at ../data/xeno-canto/Apus apus
Created folder for species: Coloeus monedula at ../data/xeno-canto/Coloeus monedula
Created folder for species: Sitta europaea at ../data/xeno-canto/Sitta europaea
Created folder for species: Anas platyrhynchos at ../data/xeno-canto/Anas platyrhynchos
Created folder for species: Cuculus canorus at ../data/xeno-canto/Cuculus canorus
Created folder for species: Fulica atra at ../data/xeno-canto/Fulica atra
Created fold

### Download recordings

In [14]:
download_bird_recordings(species)

Downloaded recording 1 for Riparia riparia
Downloaded recording 2 for Riparia riparia
Downloaded recording 3 for Riparia riparia
Downloaded recording 4 for Riparia riparia
Downloaded recording 5 for Riparia riparia
Downloaded recording 6 for Riparia riparia
Downloaded recording 7 for Riparia riparia
Downloaded recording 8 for Riparia riparia
Downloaded recording 9 for Riparia riparia
Downloaded recording 10 for Riparia riparia
Downloaded recording 11 for Riparia riparia
Downloaded recording 12 for Riparia riparia
Downloaded recording 13 for Riparia riparia
Downloaded recording 14 for Riparia riparia
Downloaded recording 15 for Riparia riparia
Downloaded recording 16 for Riparia riparia
Downloaded recording 17 for Riparia riparia
Downloaded recording 18 for Riparia riparia
Downloaded recording 19 for Riparia riparia
Downloaded recording 20 for Riparia riparia
Downloaded recording 21 for Riparia riparia
Downloaded recording 22 for Riparia riparia
Downloaded recording 23 for Riparia ripar

KeyboardInterrupt: 

### Split data on train/test/val

In [3]:
import splitfolders
import shutil

splitfolders.ratio(SAVE_PATH, output=SAVE_PATH,
    seed=1337, ratio=(.8, .1, .1), group_prefix=None, move=True)

for folder_name in os.listdir(SAVE_PATH):
    folder_path = os.path.join(SAVE_PATH, folder_name)
    if folder_name not in ["train", "val", "test"] and os.path.isdir(folder_path):
        # check if folder is empty
        if not os.listdir(folder_path):
            shutil.rmtree(folder_path)
            print(f"Empty directory: {folder_path} has been deleted.")
        else:
            print(f"Directory: {folder_path} is not empty, deletion cancelled.")

Copying files: 0 files [00:00, ? files/s]

Copying files: 26239 files [00:51, 506.17 files/s]

Empty directory: ../data/xeno-canto/Alauda arvensis has been deleted.
Empty directory: ../data/xeno-canto/Anas platyrhynchos has been deleted.
Empty directory: ../data/xeno-canto/Anser albifrons has been deleted.
Empty directory: ../data/xeno-canto/Anser fabalis has been deleted.
Empty directory: ../data/xeno-canto/Apus apus has been deleted.
Empty directory: ../data/xeno-canto/Chroicocephalus ridibundus has been deleted.
Empty directory: ../data/xeno-canto/Columba livia domestica has been deleted.
Empty directory: ../data/xeno-canto/Columba palumbus has been deleted.
Empty directory: ../data/xeno-canto/Corvus frugilegus has been deleted.
Empty directory: ../data/xeno-canto/Corvus monedula has been deleted.
Empty directory: ../data/xeno-canto/Cuculus canorus has been deleted.
Empty directory: ../data/xeno-canto/Cyanistes caeruleus has been deleted.
Empty directory: ../data/xeno-canto/Delichon urbicum has been deleted.
Empty directory: ../data/xeno-canto/Fulica atra has been deleted.
Em




### Unsplit data

In [None]:
# Merge splitted data in case there is a need to do something on a whole dataset
import os
import shutil

create_folders_for_bird_species(species)

split_folders = ["train", "val", "test"]

for folder_name in split_folders:
    folder_path = os.path.join(SAVE_PATH, folder_name)
    
    if os.path.isdir(folder_path):
        for subfolder_name in os.listdir(folder_path):
            subfolder_path = os.path.join(folder_path, subfolder_name)
            
            if os.path.isdir(subfolder_path):
                for file_name in os.listdir(subfolder_path):
                    file_path = os.path.join(subfolder_path, file_name)
                    destination_path = os.path.join(SAVE_PATH, subfolder_name, file_name)
                    
                    shutil.move(file_path, destination_path)
                
                if not os.listdir(subfolder_path):
                    shutil.rmtree(subfolder_path)
                    print(f"Empty directory: {subfolder_path} has been deleted.")
            else:
                print(f"{subfolder_name} is not a directory, skipping.")
        if not os.listdir(folder_path):
            shutil.rmtree(folder_path)
            print(f"Empty split folder: {folder_path} has been deleted.")

Empty directory: ../data/xeno-canto/train\Alauda arvensis has been deleted.
Empty directory: ../data/xeno-canto/train\Anas platyrhynchos has been deleted.
Empty directory: ../data/xeno-canto/train\Anser albifrons has been deleted.
Empty directory: ../data/xeno-canto/train\Anser fabalis has been deleted.
Empty directory: ../data/xeno-canto/train\Apus apus has been deleted.
Empty directory: ../data/xeno-canto/train\Chroicocephalus ridibundus has been deleted.
Empty directory: ../data/xeno-canto/train\Columba livia domestica has been deleted.
Empty directory: ../data/xeno-canto/train\Columba palumbus has been deleted.
Empty directory: ../data/xeno-canto/train\Corvus frugilegus has been deleted.
Empty directory: ../data/xeno-canto/train\Corvus monedula has been deleted.
Empty directory: ../data/xeno-canto/train\Cuculus canorus has been deleted.
Empty directory: ../data/xeno-canto/train\Cyanistes caeruleus has been deleted.
Empty directory: ../data/xeno-canto/train\Delichon urbicum has been