In [2]:
import pandas as pd
import requests
import json

## Gets a list of all the occurrence datasets published in GBIF

By using the registry and the occurrence API, this scripst get a list of all the occurrence datasets published on GBIF and get the number of occurrences they contain.
Running the script might take a while.

Alternatively, if you would like to get all the datasets with occurrences (which also include sampling-events and some checklists), you can simply use the folowing API call:

```
https://api.gbif.org/v1/occurrence/search?facet=datasetKey&limit=0&facetLimit=50000
```
(this is much faster)

In [3]:
GBIF_API = "http://api.gbif.org/v1/"
root_URL = "https://www.gbif.org/dataset/"
step = 500

In [4]:
datasets_with_count = pd.DataFrame()

endOfRecords = False
offset = 0
while not endOfRecords:
    param = {
        "offset": offset,
        "limit": step,
        "type": "OCCURRENCE"
    }
    # Get dataset
    response = requests.get(GBIF_API + "dataset", param)
    if response.ok:
        dataset_list = response.json()
        for dataset in dataset_list["results"]:
            datasets_with_count.at[dataset["key"], "URL"] = root_URL + dataset["key"]
            datasets_with_count.at[dataset["key"], "title"] = dataset["title"]
            if "description" in dataset:
                datasets_with_count.at[dataset["key"], "description"] = dataset["description"]
            
            # Get occurrence count
            response_occ_count = requests.get(GBIF_API + "occurrence/count", {"datasetKey": dataset["key"]})
            if response_occ_count.ok:
                datasets_with_count.at[dataset["key"], "count_occ"] = response_occ_count.json()
            else:
                print(response_occ_count)
                print(dataset["key"])
        offset += step
        endOfRecords = dataset_list["endOfRecords"]
    else:
        print(response)
        endOfRecords = True

In [5]:
datasets_with_count.to_csv('occurrence_datasets_with_count.txt', sep = '\t', index=False)

In [6]:
datasets_with_count

Unnamed: 0,URL,title,description,count_occ
0d7c6a1a-0aab-47dc-8256-f23fefac69cd,https://www.gbif.org/dataset/0d7c6a1a-0aab-47d...,Wild bees of Belgium,Multidiciplinary assessment of BELgian wild BE...,236585.0
a0a4d131-f53f-43b2-a1ba-254473b8a006,https://www.gbif.org/dataset/a0a4d131-f53f-43b...,Marine Invertebrate voucher specimens at the F...,This dataset is generated from the records for...,99609.0
73416d1c-e675-43b3-bbe4-672d5a9f9e8b,https://www.gbif.org/dataset/73416d1c-e675-43b...,Monitoreo de Flora y Fauna de los Acuerdos de ...,Estos datos dan cuenta de los inventarios de b...,2413.0
36cd5465-6625-4bf5-929d-e51a57583c9c,https://www.gbif.org/dataset/36cd5465-6625-4bf...,Caracterización de la avifauna asociada a la c...,"El Atrato, es considerado como uno de los río...",313.0
4bef41c6-daf5-48b0-9868-0548976dead0,https://www.gbif.org/dataset/4bef41c6-daf5-48b...,Caracterización de la fauna reptiliana asociad...,"El Atrato, es considerado como uno de los río...",60.0
...,...,...,...,...
85727f1e-f762-11e1-a439-00145eb45e9a,https://www.gbif.org/dataset/85727f1e-f762-11e...,"Herbarium Willing at Herbarium Berolinense, Be...",The “Herbarium Willing” is a private Phaneroga...,111899.0
85739778-f762-11e1-a439-00145eb45e9a,https://www.gbif.org/dataset/85739778-f762-11e...,"Lichens at Herbarium Berolinense, Berlin (B)",This database contains label information for a...,129129.0
8575f23e-f762-11e1-a439-00145eb45e9a,https://www.gbif.org/dataset/8575f23e-f762-11e...,PonTaurus collection,Plant specimens gathered in the Toroslar mount...,1534.0
85771146-f762-11e1-a439-00145eb45e9a,https://www.gbif.org/dataset/85771146-f762-11e...,"Staatliches Museum für Naturkunde Stuttgart, H...",,27351.0
