In [None]:
%cd /content/drive/MyDrive/BirdCallClassification

/content/drive/MyDrive/BirdCallClassification


In [None]:
import urllib.request, json
import sys
import os

savePath="/content/drive/MyDrive/BirdCallClassification/data/"

In [None]:
def save_json(searchTerms, birdName, country):
    numPages = 1
    page = 1
    # create a path to save json files and recordings
    path = "/content/drive/MyDrive/BirdCallClassification/data/" + birdName.replace(':', '') + "/" + country
    if not os.path.exists(path):
        print("Creating subdirectory " + path + " for downloaded files...")
        os.makedirs(path)
        # download a json file for every page found in a query
    while page < numPages + 1:
        print("Loading page " + str(page) + "...")
        url = 'https://www.xeno-canto.org/api/2/recordings?query={0}&page={1}'.format(searchTerms.replace(' ', '%20'),
                                                                                      page)
        print(url)
        jsonPage = urllib.request.urlopen(url)
        jsondata = json.loads(jsonPage.read().decode('utf-8'))
        filename = path + "/jsondata_p" + str(page) + ".json"
        with open(filename, 'w') as outfile:
            json.dump(jsondata, outfile)
        # check number of pages
        numPages = jsondata['numPages']
        page = page + 1
    print("Found ", numPages, " pages in total.")
    # return number of files in json
    # each page contains 500 results, the last page can have less than 500 records
    print("Saved json for ", (numPages - 1) * 500 + len(jsondata['recordings']), " files")
    return path


# reads the json and return the list of values for selected json part
# i.e. "id" - ID number, "type": type of the bird sound such as call or song
# for all Xeno Canto files found with the given search terms.
def read_data(searchTerm, path):
    data = []
    numPages = 1
    page = 1
    # read all pages and save results in a list
    while page < numPages + 1:
        # read file
        with open(path + "/jsondata_p" + str(page) + ".json", 'r') as jsonfile:
            jsondata = jsonfile.read()
        jsondata = json.loads(jsondata)
        # check number of pages
        numPages = jsondata['numPages']
        # find "recordings" in a json and save a list with a search term
        for k in range(len(jsondata['recordings'])):
            data.append(jsondata["recordings"][k][searchTerm])
        page = page + 1
    return data


# downloads all sound files found with the search terms into xeno-canto directory
# into catalogue named after the search term (i.e. Apus apus)
# filename have two parts: the name of the bird in latin and ID number
def download(searchTerms, birdName, country):
    # create data/xeno-canto-dataset directory
    path = save_json(searchTerms, birdName, country)
    # get filenames: recording ID and bird name in latin from json
    filenamesID = read_data('id', path)
    filenamesCountry = read_data('cnt', path)
    # get website recording http download address from json
    fileaddress = read_data('file', path)
    numfiles = len(filenamesID)
    print("A total of ", numfiles, " files will be downloaded")
    for i in range(0, numfiles):
        print("Saving file ", i + 1, "/", numfiles,
              '/content/drive/MyDrive/BirdCallClassification/data/' + birdName.replace(':', '') + filenamesID[
                  i] + ".mp3")
        urllib.request.urlretrieve(fileaddress[i],
                                   path + "/" + birdName + filenamesID[i] + ".mp3")

In [None]:
birds= ['Dendrocopos major', 'Coccothraustescoccothraustes', 'Delichon urbicum', 'Apus apus', 'Turdus pilaris', 'Passer montanus', 'Garrulus glandarius', 'Podiceps gallardoi', 'Antilophia bokermanni'] 

In [None]:
for bird in birds:
    download(bird + ' type:song', bird.replace(' ', ''), 'countries')

Creating subdirectory /content/drive/MyDrive/BirdCallClassification/data/Garrulusglandarius/countries for downloaded files...
Loading page 1...
https://www.xeno-canto.org/api/2/recordings?query=Garrulus%20glandarius%20type:song&page=1
Found  1  pages in total.
Saved json for  222  files
A total of  222  files will be downloaded
Saving file  1 / 222 /content/drive/MyDrive/BirdCallClassification/data/Garrulusglandarius728323.mp3
Saving file  2 / 222 /content/drive/MyDrive/BirdCallClassification/data/Garrulusglandarius728320.mp3
Saving file  3 / 222 /content/drive/MyDrive/BirdCallClassification/data/Garrulusglandarius728319.mp3
Saving file  4 / 222 /content/drive/MyDrive/BirdCallClassification/data/Garrulusglandarius728318.mp3
Saving file  5 / 222 /content/drive/MyDrive/BirdCallClassification/data/Garrulusglandarius728317.mp3
Saving file  6 / 222 /content/drive/MyDrive/BirdCallClassification/data/Garrulusglandarius721175.mp3
Saving file  7 / 222 /content/drive/MyDrive/BirdCallClassificati