# SCRIPT TO SEND QUERY TO XENOCANTO AND DOWNLOAD IT TO /DATASETS

## Walkthrough importing sound data from xenocanto to .wav
### Insert your own paths.

In [1]:
from xenopy import Query
import pandas as pd
import os
datapath_wav = '/Users/Paul/Paul/Desktop/My_projects/Bioacoustics/Maputo_dash/datasets/wav/xenocanto2/'
datapath_table = '/Users/Paul/Paul/Desktop/My_projects/Bioacoustics/Maputo_Dash/datasets/tables/'
relevant_features = ['loc','cnt','rec','length','gen','sp','q','type','lat','lng','bird-seen','time','date','uploaded']

In [29]:
#Query Xenocanto. List possible of arguments above ^
'''
    name: Species Name. Specify the name of bird you intend to retrieve data from. Both English names and Latin names are acceptable.
    gen: Genus. Genus is part of a species' latin name, so it is searched by default when performing a basic search (as mentioned above).
    ssp: subspecies
    rec: recordist. Search for all recordings from a particular recordist.
    cnt: country. Search for all recordings from a particular country.
    loc: location. Search for all recordings from a specific location.
    rmk: remarks. Many recordists leave remarks about the recording,and this field can be searched using the rmk tag. For example, rmk:playback will return a list of recordings for which the recordist left a comment about the use of playback. This field accepts a 'matches' operator.
    lat: latitude.
    lon: longtitude
    box: search for recordings that occur within a given rectangle. The general format of the box tag is as follows: box:LAT_MIN,LON_MIN,LAT_MAX,LON_MAX. Note that there must not be any spaces between the coordinates.
    also: To search for recordings that have a given species in the background.
    type: Search for recordings of a particular sound type, e.g., type='song'
    nr: number. To search for a known recording number, use the nr tag: for example nr:76967. You can also search for a range of numbers as nr:88888-88890.
    lc: license.
    q: quality ratings. 
    q_lt: quality ratings less than
    q_gt: quality ratings better than
        Usage Examples:
            Recordings are rated by quality. Quality ratings range from A (highest quality) to E (lowest quality). To search for recordings that match a certain quality rating, use the q, q_lt, and q_gt tags. For example:
                - q:A will return recordings with a quality rating of A.
                - q:0 search explicitly for unrated recordings
                - q_lt:C will return recordings with a quality rating of D or E.
                - q_gt:C will return recordings with a quality rating of B or A.
    len: recording length control parameter.
    len_lt: recording length less than
    len_gt: recording length greater than
        Usage Examples:
            len:10 will return recordings with a duration of 10 seconds (with a margin of 1%, so actually between 9.9 and 10.1 seconds)
            len:10-15 will return recordings lasting between 10 and 15 seconds.
            len_lt:30 will return recordings half a minute or shorter in length.
            len_gt:120 will return recordings longer than two minutes in length.
    area: continents. Valid values for this tag: africa, america, asia, australia, europe.
    since: 
        Usage Examples:
            - since=3, since the past three days
            - since=YYYY-MM-DD, since the particular date
    year: year
    month: month. year and month tags allow you to search for recordings that were recorded on a certain date. 
    '''
#short dataset
# q = Query('''cnt:mozambique box:-26,30,-25.95,33''')
#shit query
q = Query('''gfs''')
#pmaputo special reserve
# q = Query('''cnt:mozambique box:-26,30,-25.55,35''')

query: gfs


## Inspect query results before downloading

In [30]:
# retrieve metadata, store in df_metafiles as pandas DataFrame
metafiles = q.retrieve_meta(verbose=True, recordings_only=True)        
all_recordings = [rec for rec in metafiles if rec['file']]


... retrieving metadata ...
https://www.xeno-canto.org/api/2/recordings?query=gfs&page=1


In [31]:
metafiles

[]

In [27]:
df_metafiles = pd.DataFrame(columns=[
                'id', 'rec', 'loc', 'gen', 'sp', 'lat', 'lng', 'alt', 'type', 'q', 'length', 'bird-seen', 'file', 'en'])

In [32]:
df_metafiles.to_dict('records')

[]

In [26]:
df_metafiles = pd.DataFrame(metafiles['recordings'])


TypeError: list indices must be integers or slices, not str

In [68]:
df_metafiles

Unnamed: 0,id,gen,sp,ssp,group,en,rec,cnt,loc,lat,...,rmk,bird-seen,animal-seen,playback-used,temp,regnr,auto,dvc,mic,smp
0,394493,Vanellus,lugubris,,birds,Senegal Lapwing,Gary Allport,Mozambique,Maputo,-25.962,...,Nocturnal flight call analysis from open micro...,no,no,no,,,no,,,44100
1,103956,Poicephalus,cryptoxanthus,,birds,Brown-headed Parrot,Bram Piot,Mozambique,Maputo,-25.962,...,Recorded while feeding in a small park in a re...,unknown,unknown,unknown,,,no,,,44100
2,704993,Prinia,subflava,,birds,Tawny-flanked Prinia,Antonio Xeira,Mozambique,"Hotel Cardoso, Maputo",-25.9782,...,"Editing: High-pass filter, some amplification....",no,no,no,,,no,,,44100
3,704997,Turdus,libonyana,,birds,Kurrichane Thrush,Antonio Xeira,Mozambique,"Hotel Cardoso, Maputo",-25.9782,...,"Editing: High-pass filter, some amplification,...",no,no,no,,,no,,,44100
4,444789,Turdus,libonyana,,birds,Kurrichane Thrush,Antonio Xeira,Mozambique,"Hotel Cardoso, Maputo",-25.9782,...,"Editing: High-pass filter, cutoff frequency 1k...",yes,yes,no,,,no,,,44100
5,370805,Cossypha,natalensis,,birds,Red-capped Robin-Chat,Gary Allport,Mozambique,Maputo,-25.962,...,This bird was not seen and is assumed to be a ...,no,no,no,,,no,,,44100
6,705003,Hedydipna,collaris,,birds,Collared Sunbird,Antonio Xeira,Mozambique,"Tunduru Botanical Gardens (near Maputo), Maputo",-25.9711,...,"Editing: High-pass filter, some amplification....",yes,yes,no,,,no,,,44100
7,704996,Mystery,mystery,,birds,Identity unknown,Antonio Xeira,Mozambique,"Hotel Cardoso, Maputo",-25.9782,...,"Editing: High-pass filter, some amplification,...",no,no,no,,,no,,,44100


In [56]:
all_recordings[0]

{'id': '394493',
 'gen': 'Vanellus',
 'sp': 'lugubris',
 'ssp': '',
 'group': 'birds',
 'en': 'Senegal Lapwing',
 'rec': 'Gary Allport',
 'cnt': 'Mozambique',
 'loc': 'Maputo',
 'lat': '-25.962',
 'lng': '32.599',
 'alt': '50',
 'type': 'flight call',
 'sex': '',
 'stage': '',
 'method': 'field recording',
 'url': '//xeno-canto.org/394493',
 'file': 'https://xeno-canto.org/394493/download',
 'file-name': 'XC394493-28 29 Nov 4 18 25 Lesser Black winged Plovers longer version.mp3',
 'sono': {'small': '//xeno-canto.org/sounds/uploaded/XMZZQLHWHS/ffts/XC394493-small.png',
  'med': '//xeno-canto.org/sounds/uploaded/XMZZQLHWHS/ffts/XC394493-med.png',
  'large': '//xeno-canto.org/sounds/uploaded/XMZZQLHWHS/ffts/XC394493-large.png',
  'full': '//xeno-canto.org/sounds/uploaded/XMZZQLHWHS/ffts/XC394493-full.png'},
 'osci': {'small': '//xeno-canto.org/sounds/uploaded/XMZZQLHWHS/wave/XC394493-small.png',
  'med': '//xeno-canto.org/sounds/uploaded/XMZZQLHWHS/wave/XC394493-med.png',
  'large': '//xe

In [45]:
df_metafiles.describe()

Unnamed: 0,id,gen,sp,ssp,group,en,rec,cnt,loc,lat,...,rmk,bird-seen,animal-seen,playback-used,temp,regnr,auto,dvc,mic,smp
count,8,8,8,8.0,8,8,8,8,8,8.0,...,8,8,8,8,8.0,8.0,8,8.0,8.0,8
unique,8,7,7,1.0,1,7,3,1,3,3.0,...,7,3,3,2,1.0,1.0,1,1.0,1.0,1
top,394493,Turdus,libonyana,,birds,Kurrichane Thrush,Antonio Xeira,Mozambique,"Hotel Cardoso, Maputo",-25.9782,...,"Editing: High-pass filter, some amplification,...",no,no,no,,,no,,,44100
freq,1,2,2,8.0,8,2,5,8,4,4.0,...,2,5,5,7,8.0,8.0,8,8.0,8.0,8


In [4]:
df_metafiles.loc[:,'loc'].unique()

array(['Marracuene District, Marracuene District, Maputo Province',
       'Marracuene, Maputo', 'Dona Alice - Costa do Sol, Maputo',
       'Golf Course, Maputo, Maputo',
       'First Wetland Zone 4, Macaneta, Incomati Delta, Marracuene District, Maputo Province',
       'Maputo, Marracuene, Cidade de Maputo', 'Maputo, Cidade de Maputo',
       'Maputo', 'Marracuene District, Marracuene, Maputo',
       'Maputo, Maputo', 'Hotel Cardoso, Maputo',
       'Tunduru Botanical Gardens (near  Maputo), Maputo'], dtype=object)

In [5]:
df_metafiles.loc[:,relevant_features].describe()


Unnamed: 0,loc,cnt,rec,length,gen,sp,q,type,lat,lng,bird-seen,time,date,uploaded
count,73,73,73,73,73,73,73,73,73.0,73.0,73,73,73,73
unique,12,1,8,48,31,36,5,11,18.0,19.0,3,21,38,41
top,"Dona Alice - Costa do Sol, Maputo",Mozambique,Gary Allport,0:06,Acrocephalus,mystery,B,song,-25.9061,32.6335,yes,06:00,2018-11-25,2019-01-21
freq,19,73,30,5,10,8,40,41,19.0,19.0,40,13,8,6


## Create .csv in datasets/table

In [10]:
df_metafiles.to_csv(datapath_table+'metafiles_xenocanto.csv')

DOWNLOAD DATA, STORE IN xenocanto/wav/
data sorted by gen-species into folders containing .wav(s)

In [47]:
os.getcwd()

'/Users/Paul/Paul/Desktop/My_projects/Bioacoustics/Maputo_Dash/notebooks'

In [52]:
# retrieve recordings
os.chdir(datapath_wav)
q.retrieve_recordings(multiprocess=True, nproc=10, attempts=10, outdir=datapath_wav)

... retrieving metadata ...


process 8437: 100%|██████████| 13/13 [00:43<00:00,  3.33s/it]
process 8434: 100%|██████████| 13/13 [00:56<00:00,  4.37s/it]
process 8444: 100%|██████████| 13/13 [00:59<00:00,  4.57s/it]
process 8441: 100%|██████████| 13/13 [01:06<00:00,  5.09s/it]
process 8450: 100%|██████████| 12/12 [01:11<00:00,  5.94s/it]
process 8446: 100%|██████████| 12/12 [01:14<00:00,  6.20s/it]
process 8435: 100%|██████████| 13/13 [01:17<00:00,  5.97s/it]
process 8445: 100%|██████████| 13/13 [01:27<00:00,  6.73s/it]
process 8451: 100%|██████████| 12/12 [01:37<00:00,  8.13s/it]
process 8436: 100%|██████████| 13/13 [01:45<00:00,  8.09s/it]


... finished ...


