In [243]:
import os
import requests
import json
import time
import random

### Download Argentinian Passeriformes dataset from Xeno-Canto

#### Query Xeno-Canto API and save response as JSON file.

In [175]:
dataset_location = '..\\datasets\\xeno-canto_argentina\\'

# Query variables
country = 'argentina'
group = 'birds'
length = '10-120'
since = '2000-01-01'

In [188]:
url = "https://xeno-canto.org/api/2/recordings?query="
params = f"cnt:{country}+grp:{group}+len:{length}+since:{since}"

response = requests.get(url + params)

print(f'• Query result: status-code {response.status_code}')

if response.status_code == 200:
  data = response.json()
  
  n_rec = data['numRecordings']
  pages = data['numPages']
  print(f'• Found {n_rec} recordings in {pages} pages.')

• Query result: status-code 200
• Found 10047 recordings in 21 pages.


In [192]:
# Write json files for all pages
for p in range(1, pages + 1):
  response = requests.get(url + params + f'&page={p}')
  data = response.json()
  
  filename = f"query_{str(p)}.json"
  with open(dataset_location + filename, "w") as file:
    json.dump(data, file, sort_keys=True, indent=4)
    print(f'• Saved page {p} as {filename}')

• Saved page 1 as query_1.json
• Saved page 2 as query_2.json
• Saved page 3 as query_3.json
• Saved page 4 as query_4.json
• Saved page 5 as query_5.json
• Saved page 6 as query_6.json
• Saved page 7 as query_7.json
• Saved page 8 as query_8.json
• Saved page 9 as query_9.json
• Saved page 10 as query_10.json
• Saved page 11 as query_11.json
• Saved page 12 as query_12.json
• Saved page 13 as query_13.json
• Saved page 14 as query_14.json
• Saved page 15 as query_15.json
• Saved page 16 as query_16.json
• Saved page 17 as query_17.json
• Saved page 18 as query_18.json
• Saved page 19 as query_19.json
• Saved page 20 as query_20.json
• Saved page 21 as query_21.json


#### Download files to dataset audio folder.

In [193]:
# Create audio folder inside dataset.
audio_location = dataset_location + 'audio\\'
try:
  os.mkdir(audio_location)
  print(f'Created {audio_location}')
except:
  print('Folder already existed.')

Folder already existed.


In [242]:
MAX_FILES = n_rec # Used to stop downloading after n files
for file in os.listdir(dataset_location):
  if file.endswith('.json'):
    with open(dataset_location + file) as f:
      data = json.load(f)
      recordings = data['recordings']
      
      i = 0
      for r in recordings:
        i += 1
        # Get metadata from json
        id = r['id']
        bird = r['en']
        download = r['file']
        ext = '.' + r['file-name'].split('.')[-1]
        
        # Download file
        with open(audio_location + bird + id + ext, 'wb') as out_file:
          content = requests.get(download, stream=True).content
          out_file.write(content)
          
        # Wait required time between recordings (randomized)
        time.sleep(random.randrange(1.01, 1.5))
        
        # Stop the loop
        if i > 4:
          break
  # Wait required time between json pages (randomized)     
  time.sleep(random.randrange(1, 5))