# SCRIPT TO SEND QUERY TO XENOCANTO AND DOWNLOAD IT TO /DATASETS

## Walkthrough importing sound data from xenocanto to .wav
### Insert your own paths.

In [None]:
from xenopy import Query
import pandas as pd
import os
datapath_wav = '/Users/Paul/Paul/Desktop/My_projects/Bioacoustics/Maputo_dash/datasets/wav/xenocanto2/'
datapath_table = '/Users/Paul/Paul/Desktop/My_projects/Bioacoustics/Maputo_Dash/datasets/tables/'
relevant_features = ['loc','cnt','rec','length','gen','sp','q','type','lat','lng','bird-seen','time','date','uploaded']

In [6]:
#Query Xenocanto. List possible of arguments above ^
'''
    name: Species Name. Specify the name of bird you intend to retrieve data from. Both English names and Latin names are acceptable.
    gen: Genus. Genus is part of a species' latin name, so it is searched by default when performing a basic search (as mentioned above).
    ssp: subspecies
    rec: recordist. Search for all recordings from a particular recordist.
    cnt: country. Search for all recordings from a particular country.
    loc: location. Search for all recordings from a specific location.
    rmk: remarks. Many recordists leave remarks about the recording,and this field can be searched using the rmk tag. For example, rmk:playback will return a list of recordings for which the recordist left a comment about the use of playback. This field accepts a 'matches' operator.
    lat: latitude.
    lon: longtitude
    box: search for recordings that occur within a given rectangle. The general format of the box tag is as follows: box:LAT_MIN,LON_MIN,LAT_MAX,LON_MAX. Note that there must not be any spaces between the coordinates.
    also: To search for recordings that have a given species in the background.
    type: Search for recordings of a particular sound type, e.g., type='song'
    nr: number. To search for a known recording number, use the nr tag: for example nr:76967. You can also search for a range of numbers as nr:88888-88890.
    lc: license.
    q: quality ratings. 
    q_lt: quality ratings less than
    q_gt: quality ratings better than
        Usage Examples:
            Recordings are rated by quality. Quality ratings range from A (highest quality) to E (lowest quality). To search for recordings that match a certain quality rating, use the q, q_lt, and q_gt tags. For example:
                - q:A will return recordings with a quality rating of A.
                - q:0 search explicitly for unrated recordings
                - q_lt:C will return recordings with a quality rating of D or E.
                - q_gt:C will return recordings with a quality rating of B or A.
    len: recording length control parameter.
    len_lt: recording length less than
    len_gt: recording length greater than
        Usage Examples:
            len:10 will return recordings with a duration of 10 seconds (with a margin of 1%, so actually between 9.9 and 10.1 seconds)
            len:10-15 will return recordings lasting between 10 and 15 seconds.
            len_lt:30 will return recordings half a minute or shorter in length.
            len_gt:120 will return recordings longer than two minutes in length.
    area: continents. Valid values for this tag: africa, america, asia, australia, europe.
    since: 
        Usage Examples:
            - since=3, since the past three days
            - since=YYYY-MM-DD, since the particular date
    year: year
    month: month. year and month tags allow you to search for recordings that were recorded on a certain date. 
    '''

q = Query('''cnt:mozambique box:-26,30,-25.55,35''')

query: cnt:mozambique box:-26,30,-25.55,35


## Inspect query results before downloading

In [7]:
# retrieve metadata, store in df_metafiles as pandas DataFrame
metafiles = q.retrieve_meta(verbose=True)
df_metafiles = pd.DataFrame(metafiles['recordings'])


... retrieving metadata ...
https://www.xeno-canto.org/api/2/recordings?query=cnt:mozambique%20box:-26,30,-25.55,35&page=1


In [8]:
df_metafiles.describe()

Unnamed: 0,id,gen,sp,ssp,group,en,rec,cnt,loc,lat,...,rmk,bird-seen,animal-seen,playback-used,temp,regnr,auto,dvc,mic,smp
count,73,73,73,73.0,73,73,73,73,73,73.0,...,73,73,73,73,73.0,73.0,73,73.0,73.0,73
unique,73,31,36,6.0,1,36,8,1,12,18.0,...,62,3,3,3,1.0,1.0,1,1.0,1.0,2
top,457592,Mystery,mystery,,birds,Identity unknown,Gary Allport,Mozambique,"Dona Alice - Costa do Sol, Maputo",-25.9061,...,"Editing: High-pass filter, cutoff frequency 1k...",yes,yes,no,,,no,,,44100
freq,1,10,10,58.0,73,10,30,73,19,19.0,...,5,40,40,59,73.0,73.0,73,73.0,73.0,52


In [45]:
df_metafiles.columns

Index(['id', 'gen', 'sp', 'ssp', 'group', 'en', 'rec', 'cnt', 'loc', 'lat',
       'lng', 'alt', 'type', 'sex', 'stage', 'method', 'url', 'file',
       'file-name', 'sono', 'osci', 'lic', 'q', 'length', 'time', 'date',
       'uploaded', 'also', 'rmk', 'bird-seen', 'animal-seen', 'playback-used',
       'temp', 'regnr', 'auto', 'dvc', 'mic', 'smp'],
      dtype='object')

In [46]:
df_metafiles.loc[:,'loc'].unique()

array(['Inhaca island', 'Lopes, Matutuíne District, Maputo Province',
       'Marracuene, Maputo', 'Dona Alice - Costa do Sol, Maputo',
       'Golf Course, Maputo, Maputo',
       'First Wetland Zone 4, Macaneta, Incomati Delta, Marracuene District, Maputo Province',
       'Maputo, Marracuene, Cidade de Maputo', 'Maputo, Cidade de Maputo',
       'Maputo', 'Marracuene District, Marracuene, Maputo',
       'Matutuíne District, Maputo',
       'Matutuíne District, Maputo Province',
       'Gala Gala Eco Resort, Ponta do Ouro', 'Maputo, Maputo',
       'Reserva Maputo, Maputo', 'Hotel Cardoso, Maputo',
       'Ponta do Ouro, Matutuíne', 'Matutuíne, Maputo',
       'Tunduru Botanical Gardens (near  Maputo), Maputo',
       'Marracuene District, Marracuene District, Maputo Province'],
      dtype=object)

In [47]:
df_metafiles.loc[:,relevant_features].describe()


Unnamed: 0,loc,cnt,rec,length,gen,sp,q,type,lat,lng,bird-seen,time,date,uploaded
count,127,127,127,127,127,127,127,127,127.0,127.0,127,127,127,127
unique,20,1,11,67,61,69,5,12,34.0,35.0,3,44,53,53
top,Inhaca island,Mozambique,Antonio Xeira,0:06,Mystery,mystery,B,song,-26.023,32.907,yes,06:00,2012-06-17,2010-08-15
freq,31,127,33,6,11,11,81,65,21.0,31.0,78,18,12,15


## Create .csv in datasets/table

In [51]:
df_metafiles.to_csv(datapath_table+'metafiles_xenocanto.csv')

DOWNLOAD DATA, STORE IN xenocanto/wav/
data sorted by gen-species into folders containing .wav(s)

In [52]:
# retrieve recordings
os.chdir(datapath_wav)
q.retrieve_recordings(multiprocess=True, nproc=10, attempts=10, outdir=datapath_wav)

... retrieving metadata ...


process 8437: 100%|██████████| 13/13 [00:43<00:00,  3.33s/it]
process 8434: 100%|██████████| 13/13 [00:56<00:00,  4.37s/it]
process 8444: 100%|██████████| 13/13 [00:59<00:00,  4.57s/it]
process 8441: 100%|██████████| 13/13 [01:06<00:00,  5.09s/it]
process 8450: 100%|██████████| 12/12 [01:11<00:00,  5.94s/it]
process 8446: 100%|██████████| 12/12 [01:14<00:00,  6.20s/it]
process 8435: 100%|██████████| 13/13 [01:17<00:00,  5.97s/it]
process 8445: 100%|██████████| 13/13 [01:27<00:00,  6.73s/it]
process 8451: 100%|██████████| 12/12 [01:37<00:00,  8.13s/it]
process 8436: 100%|██████████| 13/13 [01:45<00:00,  8.09s/it]


... finished ...


