In [4]:
import pandas as pd
from pathlib import Path
import os
from maad import util

# Directory where to download the audio files from Xeno-canto
XC_ROOTDIR = './data/'
# Name of the dataset
XC_DIR = 'woodpecker_dataset'

# Define the woodpecker species data
data = [
    ['Eurasian Three-toed', 'Picoides tridactylus'],
    ['White-backed', 'Dendrocopos leucotos'],
    ['Lesser Spotted', 'Dryobates minor'],
    ['Great Spotted', 'Dendrocopos major'],
    ['Black', 'Dryocopus martius'],
    ['Grey-headed', 'Picus canus'],
    ['Syrian', 'Dendrocopos syriacus'],
    ['Wryneck', 'Jynx torquilla'],
    ['Green', 'Picus viridis'],
    ['Middle Spotted', 'Dendrocoptes medius']
]

# Create a DataFrame with the species information
df_species = pd.DataFrame(data, columns=['english name', 'scientific name'])

# Separate genus and species for Xeno-canto query
gen, sp = [], []
for name in df_species['scientific name']:
    gen.append(name.split()[0])
    sp.append(name.split()[1])

# Build the query dataframe
df_query = pd.DataFrame()
df_query['param1'] = gen
df_query['param2'] = sp
df_query['param3'] = 'type:drumming'    # Target drumming sound
df_query['param4'] = 'area:europe'      # Limit to Europe

# Query Xeno-canto for recordings
df_dataset = util.xc_multi_query(df_query, format_time=False, format_date=False, verbose=True)

# Select files with specific conditions
df_dataset = util.xc_selection(df_dataset, 
                               max_nb_files=20,   # Up to 20 recordings per species
                               max_length='01:00',
                               min_length='00:10',
                               min_quality='B',   # Minimum quality
                               verbose=True)

# Download audio files into a subdirectory for each species
util.xc_download(df_dataset, 
                 rootdir=XC_ROOTDIR, 
                 dataset_name=XC_DIR, 
                 overwrite=False, 
                 save_csv=True, 
                 verbose=True)

# Grab all downloaded audio filenames in the directory and create a dataframe
filelist = []
for root, dirs, files in os.walk(os.path.join(XC_ROOTDIR, XC_DIR)):
    for name in files:
        if name.endswith('.mp3'):
            filelist.append(os.path.join(root, name))

df_files = pd.DataFrame(filelist, columns=['fullfilename'])
df_files['filename'] = df_files['fullfilename'].apply(lambda x: Path(x).stem)
df_files['species'] = df_files['fullfilename'].apply(lambda x: Path(x).parts[-2])

print('=====================================================')
print(f'Number of files: {len(df_files)}')
print(f'Number of species: {len(df_files.species.unique())}')
print('=====================================================')


Loading page 1...
https://www.xeno-canto.org/api/2/recordings?query=Picoides%20tridactylus%20type:drumming%20area:europe&page=1
Found 1 pages in total.
Saved metadata for 236 files
Loading page 1...
https://www.xeno-canto.org/api/2/recordings?query=Dendrocopos%20leucotos%20type:drumming%20area:europe&page=1
Found 1 pages in total.
Saved metadata for 177 files
Loading page 1...
https://www.xeno-canto.org/api/2/recordings?query=Dryobates%20minor%20type:drumming%20area:europe&page=1
Loading page 2...
https://www.xeno-canto.org/api/2/recordings?query=Dryobates%20minor%20type:drumming%20area:europe&page=2
Found 2 pages in total.
Saved metadata for 513 files
Loading page 1...
https://www.xeno-canto.org/api/2/recordings?query=Dendrocopos%20major%20type:drumming%20area:europe&page=1
Loading page 2...
https://www.xeno-canto.org/api/2/recordings?query=Dendrocopos%20major%20type:drumming%20area:europe&page=2
Found 2 pages in total.
Saved metadata for 927 files
Loading page 1...
https://www.xeno-c

  pd.concat([pd.read_csv(filename_csv,sep=';',index_col='id'),


Saving file  7 / 152 : https://xeno-canto.org/707004/download
Saving file  8 / 152 : https://xeno-canto.org/317487/download
Saving file  9 / 152 : https://xeno-canto.org/110735/download
Saving file  10 / 152 : https://xeno-canto.org/172699/download
Saving file  11 / 152 : https://xeno-canto.org/815299/download
Saving file  12 / 152 : https://xeno-canto.org/235716/download
Saving file  13 / 152 : https://xeno-canto.org/707179/download
Saving file  14 / 152 : https://xeno-canto.org/710418/download
Saving file  15 / 152 : https://xeno-canto.org/910366/download
Saving file  16 / 152 : https://xeno-canto.org/655702/download
Saving file  17 / 152 : https://xeno-canto.org/106806/download
Saving file  18 / 152 : https://xeno-canto.org/174915/download
Saving file  19 / 152 : https://xeno-canto.org/655773/download
Saving file  20 / 152 : https://xeno-canto.org/555653/download
Creating subdirectory data/woodpecker_dataset/Dendrocopos leucotos_White-backed Woodpecker for downloaded files...
Saving

  pd.concat([pd.read_csv(filename_csv,sep=';',index_col='id'),


Saving file  23 / 152 : https://xeno-canto.org/635688/download
Saving file  24 / 152 : https://xeno-canto.org/747211/download
Saving file  25 / 152 : https://xeno-canto.org/293037/download
Saving file  26 / 152 : https://xeno-canto.org/466907/download
Saving file  27 / 152 : https://xeno-canto.org/539794/download
Saving file  28 / 152 : https://xeno-canto.org/362609/download
Saving file  29 / 152 : https://xeno-canto.org/690515/download
Saving file  30 / 152 : https://xeno-canto.org/402989/download
Saving file  31 / 152 : https://xeno-canto.org/409437/download
Saving file  32 / 152 : https://xeno-canto.org/626956/download


  pd.concat([pd.read_csv(filename_csv,sep=';',index_col='id'),


Saving file  33 / 152 : https://xeno-canto.org/28081/download
Saving file  34 / 152 : https://xeno-canto.org/632893/download
Saving file  35 / 152 : https://xeno-canto.org/28079/download
Saving file  36 / 152 : https://xeno-canto.org/793758/download
Saving file  37 / 152 : https://xeno-canto.org/99692/download
Saving file  38 / 152 : https://xeno-canto.org/28074/download
Saving file  39 / 152 : https://xeno-canto.org/688747/download
Saving file  40 / 152 : https://xeno-canto.org/375864/download
Creating subdirectory data/woodpecker_dataset/Dryobates minor_Lesser Spotted Woodpecker for downloaded files...
Saving file  41 / 152 : https://xeno-canto.org/488408/download
Saving file  42 / 152 : https://xeno-canto.org/557939/download
Saving file  43 / 152 : https://xeno-canto.org/528227/download
Saving file  44 / 152 : https://xeno-canto.org/173209/download
Saving file  45 / 152 : https://xeno-canto.org/630233/download
Saving file  46 / 152 : https://xeno-canto.org/401830/download
Saving fil

  pd.concat([pd.read_csv(filename_csv,sep=';',index_col='id'),


Saving file  109 / 152 : https://xeno-canto.org/361470/download
Saving file  110 / 152 : https://xeno-canto.org/712209/download
Saving file  111 / 152 : https://xeno-canto.org/409222/download
Saving file  112 / 152 : https://xeno-canto.org/628964/download
Saving file  113 / 152 : https://xeno-canto.org/713049/download
Saving file  114 / 152 : https://xeno-canto.org/530159/download
Saving file  115 / 152 : https://xeno-canto.org/530161/download
Saving file  116 / 152 : https://xeno-canto.org/639919/download
Saving file  117 / 152 : https://xeno-canto.org/720327/download
Saving file  118 / 152 : https://xeno-canto.org/343659/download
Saving file  119 / 152 : https://xeno-canto.org/175478/download
Saving file  120 / 152 : https://xeno-canto.org/897209/download
Creating subdirectory data/woodpecker_dataset/Dendrocopos syriacus_Syrian Woodpecker for downloaded files...


  pd.concat([pd.read_csv(filename_csv,sep=';',index_col='id'),


Saving file  121 / 152 : https://xeno-canto.org/463578/download
Saving file  122 / 152 : https://xeno-canto.org/910636/download
Saving file  123 / 152 : https://xeno-canto.org/357745/download
Saving file  124 / 152 : https://xeno-canto.org/465211/download
Saving file  125 / 152 : https://xeno-canto.org/314893/download
Saving file  126 / 152 : https://xeno-canto.org/465212/download
Saving file  127 / 152 : https://xeno-canto.org/480291/download
Saving file  128 / 152 : https://xeno-canto.org/314891/download
Saving file  129 / 152 : https://xeno-canto.org/904300/download
Saving file  130 / 152 : https://xeno-canto.org/910637/download


  pd.concat([pd.read_csv(filename_csv,sep=';',index_col='id'),


Saving file  131 / 152 : https://xeno-canto.org/910641/download
Saving file  132 / 152 : https://xeno-canto.org/611212/download
Saving file  133 / 152 : https://xeno-canto.org/412366/download
Saving file  134 / 152 : https://xeno-canto.org/467671/download
Saving file  135 / 152 : https://xeno-canto.org/937506/download
Creating subdirectory data/woodpecker_dataset/Picus viridis_European Green Woodpecker for downloaded files...
Saving file  136 / 152 : https://xeno-canto.org/496898/download
Saving file  137 / 152 : https://xeno-canto.org/541591/download
Saving file  138 / 152 : https://xeno-canto.org/324359/download
Saving file  139 / 152 : https://xeno-canto.org/809376/download
Saving file  140 / 152 : https://xeno-canto.org/496894/download
Saving file  141 / 152 : https://xeno-canto.org/496895/download
Saving file  142 / 152 : https://xeno-canto.org/496896/download
Creating subdirectory data/woodpecker_dataset/Dendrocoptes medius_Middle Spotted Woodpecker for downloaded files...
Saving

In [2]:
import pandas as pd
from maad import util
import os

# Define the path where the audio files will be saved relative to the current directory
XC_ROOTDIR = '~/Users/solomongreene/DIS./data/'
# Dataset directory name
XC_DIR = 'setophaga_americana_single'

# Ensure the directory exists
output_dir = os.path.join(XC_ROOTDIR, XC_DIR)
os.makedirs(output_dir, exist_ok=True)

# Define the species data for American Redstart
data = [['American Redstart', 'Setophaga americana']]

# Create a DataFrame with the species information
df_species = pd.DataFrame(data, columns=['english name', 'scientific name'])

# Split genus and species for query
gen = df_species['scientific name'].apply(lambda x: x.split()[0])
sp = df_species['scientific name'].apply(lambda x: x.split()[1])

# Create the query dataframe
df_query = pd.DataFrame()
df_query['param1'] = gen
df_query['param2'] = sp
df_query['param3'] = 'type:song'  # Adjust as needed for sound type

# Query Xeno-canto for the first available recording
df_dataset = util.xc_multi_query(df_query, format_time=False, format_date=False, verbose=True)

# Select only the first available file
df_dataset = util.xc_selection(df_dataset, max_nb_files=1, verbose=True)

# Print the paths and dataset info to debug
print(f"Saving files to: {output_dir}")
print(f"Dataset info: {df_dataset.head()}")

# Download the audio file into the specified directory
util.xc_download(df_dataset, rootdir=XC_ROOTDIR, dataset_name=XC_DIR, overwrite=False, save_csv=True, verbose=True)

# Check if the file exists in the output directory
file_path = os.path.join(output_dir, 'Setophaga_americana_1.mp3')
if os.path.exists(file_path):
    print(f"File successfully downloaded to {file_path}")
else:
    print("The file was not downloaded to the expected location.")


Loading page 1...
https://www.xeno-canto.org/api/2/recordings?query=Setophaga%20americana%20type:song&page=1
Found 1 pages in total.
Saved metadata for 282 files
Setophaga americana
    ... request  1 files of quality A
    --> found  1 files of quality A and 00:10<length<01:00
    total files :  1
-----------------------------------------
Saving files to: ~/Users/solomongreene/DIS./data/setophaga_americana_single
Dataset info:         id        gen         sp ssp  group               en          rec  \
64  357621  Setophaga  americana      birds  Northern Parula  Paul Marvin   

              cnt                                                loc     lat  \
64  United States  Little Big Econ SF - Brumley Rd. Trailhead, Se...  28.666   

    ...  rmk bird-seen animal-seen playback-used temp regnr auto dvc mic  \
64  ...            yes         yes            no              no           

      smp  
64  48000  

[1 rows x 38 columns]
The file was not downloaded to the expected location

In [6]:
import pandas as pd
import os
import requests

# Define the path where the audio files will be saved relative to the current directory
XC_ROOTDIR = './DIS/DIS Core Course/Bird Songs/'
# Dataset directory name
XC_DIR = 'setophaga_americana_single'

# Ensure the directory exists
output_dir = os.path.join(XC_ROOTDIR, XC_DIR)
os.makedirs(output_dir, exist_ok=True)

# Define the species data for American Redstart
data = [['American Redstart', 'Setophaga americana']]

# Create a DataFrame with the species information
df_species = pd.DataFrame(data, columns=['english name', 'scientific name'])

# Split genus and species for query
gen = df_species['scientific name'].apply(lambda x: x.split()[0])
sp = df_species['scientific name'].apply(lambda x: x.split()[1])

# Create the query dataframe
df_query = pd.DataFrame()
df_query['param1'] = gen
df_query['param2'] = sp
df_query['param3'] = 'type:song'  # Adjust as needed for sound type

# Query Xeno-canto for the first available recording
df_dataset = util.xc_multi_query(df_query, format_time=False, format_date=False, verbose=True)

# Select only the first available file
df_dataset = util.xc_selection(df_dataset, max_nb_files=1, verbose=True)

# Print the paths and dataset info to debug
print(f"Saving files to: {output_dir}")
print(f"Dataset info: {df_dataset.head()}")

# Check the URL for the first audio file
file_url = f"https://www.xeno-canto.org/{df_dataset.iloc[0]['rec']}/download"

# Print the URL of the audio file to check
print(f"Downloading from URL: {file_url}")

# Manually download the file
response = requests.get(file_url)

# Check if the download was successful
if response.status_code == 200:
    # Save the file with the correct name
    audio_file_path = os.path.join(output_dir, f"Setophaga_americana_1.mp3")
    with open(audio_file_path, 'wb') as f:
        f.write(response.content)
    print(f"File successfully downloaded to {audio_file_path}")
else:
    print("Failed to download the file.")


Loading page 1...
https://www.xeno-canto.org/api/2/recordings?query=Setophaga%20americana%20type:song&page=1
Found 1 pages in total.
Saved metadata for 282 files
Setophaga americana
    ... request  1 files of quality A
    --> found  1 files of quality A and 00:10<length<01:00
    total files :  1
-----------------------------------------
Saving files to: ./DIS/DIS Core Course/Bird Songs/setophaga_americana_single
Dataset info:         id        gen         sp ssp  group               en          rec  \
64  357621  Setophaga  americana      birds  Northern Parula  Paul Marvin   

              cnt                                                loc     lat  \
64  United States  Little Big Econ SF - Brumley Rd. Trailhead, Se...  28.666   

    ...  rmk bird-seen animal-seen playback-used temp regnr auto dvc mic  \
64  ...            yes         yes            no              no           

      smp  
64  48000  

[1 rows x 38 columns]
Downloading from URL: https://www.xeno-canto.org/Pa

In [7]:
import pandas as pd
import os
import requests

# Define the path where the audio files will be saved relative to the current directory
XC_ROOTDIR = './DIS/DIS Core Course/Bird Songs/'
# Dataset directory name
XC_DIR = 'setophaga_americana_single'

# Ensure the directory exists
output_dir = os.path.join(XC_ROOTDIR, XC_DIR)
os.makedirs(output_dir, exist_ok=True)

# Define the species data for American Redstart
data = [['American Redstart', 'Setophaga americana']]

# Create a DataFrame with the species information
df_species = pd.DataFrame(data, columns=['english name', 'scientific name'])

# Split genus and species for query
gen = df_species['scientific name'].apply(lambda x: x.split()[0])
sp = df_species['scientific name'].apply(lambda x: x.split()[1])

# Create the query dataframe
df_query = pd.DataFrame()
df_query['param1'] = gen
df_query['param2'] = sp
df_query['param3'] = 'type:song'  # Adjust as needed for sound type

# Query Xeno-canto for the first available recording
df_dataset = util.xc_multi_query(df_query, format_time=False, format_date=False, verbose=True)

# Select only the first available file
df_dataset = util.xc_selection(df_dataset, max_nb_files=1, verbose=True)

# Print the paths and dataset info to debug
print(f"Saving files to: {output_dir}")
print(f"Dataset info: {df_dataset.head()}")

# Correctly extract the recording ID and build the download URL
rec_id = df_dataset.iloc[0]['rec']
file_url = f"https://www.xeno-canto.org/{rec_id}/download"

# Print the URL of the audio file to check
print(f"Downloading from URL: {file_url}")

# Manually download the file
response = requests.get(file_url)

# Check if the download was successful
if response.status_code == 200:
    # Save the file with the correct name
    audio_file_path = os.path.join(output_dir, f"Setophaga_americana_{rec_id}.mp3")
    with open(audio_file_path, 'wb') as f:
        f.write(response.content)
    print(f"File successfully downloaded to {audio_file_path}")
else:
    print(f"Failed to download the file. Status code: {response.status_code}")


Loading page 1...
https://www.xeno-canto.org/api/2/recordings?query=Setophaga%20americana%20type:song&page=1
Found 1 pages in total.
Saved metadata for 282 files
Setophaga americana
    ... request  1 files of quality A
    --> found  1 files of quality A and 00:10<length<01:00
    total files :  1
-----------------------------------------
Saving files to: ./DIS/DIS Core Course/Bird Songs/setophaga_americana_single
Dataset info:         id        gen         sp ssp  group               en          rec  \
64  357621  Setophaga  americana      birds  Northern Parula  Paul Marvin   

              cnt                                                loc     lat  \
64  United States  Little Big Econ SF - Brumley Rd. Trailhead, Se...  28.666   

    ...  rmk bird-seen animal-seen playback-used temp regnr auto dvc mic  \
64  ...            yes         yes            no              no           

      smp  
64  48000  

[1 rows x 38 columns]
Downloading from URL: https://www.xeno-canto.org/Pa