In [1]:
import requests
import json
from tqdm import tqdm
import os  # Import os module for directory management
import zipfile
import csv
import sys

In [26]:
# Define the API endpoint and initial parameters
api_url = "https://api.gbif.org/v1/literature/search"
params = {
    "contentType": "literature",
    "literatureType": ["JOURNAL", "WORKING_PAPER"],
    "relevance": "GBIF_CITED",
    "peerReview": "true",
    "limit": 10,
    "offset": 0  # Start from the beginning
}

In [27]:
# Function to get data from the API
def fetch_data(params):
    response = requests.get(api_url, params=params)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Failed to fetch data: {response.status_code}")
        return None

In [28]:
# Function to extract all entries and filter those with content in gbifDownloadKey
def extract_filtered_entries():
    all_entries = []
    params['offset'] = 0  # Ensure offset starts at 0
    
    # Fetch initial data to determine total number of results
    initial_data = fetch_data(params)
    if not initial_data or 'count' not in initial_data:
        print("Failed to fetch initial data or count not available.")
        return []
    
    total_results = initial_data['count']
    print(f"Total results to fetch: {total_results}")
    
    with tqdm(total=total_results, desc="Fetching entries") as pbar:
        while True:
            data = fetch_data(params)
            if data and 'results' in data:
                # Filter entries that have content in gbifDownloadKey
                filtered_entries = [entry for entry in data['results'] if entry.get('gbifDownloadKey')]
                all_entries.extend(filtered_entries)
                pbar.update(len(data['results']))
                if len(data['results']) < params['limit']:
                    # No more data to fetch
                    break
                else:
                    # Move to the next page
                    params['offset'] += params['limit']
            else:
                break
            
    return all_entries

In [30]:
# Extract and filter entries
filtered_entries = extract_filtered_entries()

# Optionally, save the data to a file
with open('filtered_gbif_entries.json', 'w') as f:
    json.dump(filtered_entries, f, indent=2)

# Print the number of filtered entries fetched
print(f"Total filtered entries fetched: {len(filtered_entries)}")

Total results to fetch: 6071


Fetching entries: 100%|██████████| 6071/6071 [04:43<00:00, 21.43it/s]

Total filtered entries fetched: 454





### Summary:
- **Increase Field Size Limit**: The script sets the field size limit for CSV processing to 1,000,000 characters to handle large fields.
- **Load and Save Processed DOIs**: Functions to load and save DOIs to track which entries have been processed.
- **Download and Process Data**: The main function to download, unzip, process, and filter data, ensuring only preserved specimens are kept, and appending results to an output file on the D drive.
- **Directory Checks**: Ensures necessary directories exist before writing files.

In [31]:
# Increase the CSV field size limit to the maximum value
max_int = sys.maxsize
while True:
    # Decrease the max size until the csv.field_size_limit() function works
    try:
        csv.field_size_limit(max_int)
        break
    except OverflowError:
        max_int = int(max_int / 10)

# Function to load processed DOIs from skip file
def load_processed_dois(skip_file):
    print(f"Loading processed DOIs from {skip_file}")
    if os.path.exists(skip_file):
        with open(skip_file, 'r', encoding='utf-8') as file:
            return set(line.strip() for line in file)
    return set()

# Function to save a DOI to the skip file
def save_processed_doi(skip_file, doi):
    print(f"Saving DOI {doi} to {skip_file}")
    with open(skip_file, 'a', encoding='utf-8') as file:
        file.write(doi + '\n')

# Function to load downloaded keys from a file
def load_downloaded_keys(downloaded_keys_file):
    print(f"Loading downloaded keys from {downloaded_keys_file}")
    if os.path.exists(downloaded_keys_file):
        with open(downloaded_keys_file, 'r', encoding='utf-8') as file:
            return set(line.strip() for line in file)
    return set()

# Function to save a downloaded key to a file
def save_downloaded_key(downloaded_keys_file, key):
    print(f"Saving downloaded key {key} to {downloaded_keys_file}")
    with open(downloaded_keys_file, 'a', encoding='utf-8') as file:
        file.write(key + '\n')

# Function to download, unzip, process data using gbifDownloadKey, and delete zip files and extracted contents
def download_and_process_gbif_data(filtered_entries, skip_file, downloaded_keys_file):
    base_url = "https://api.gbif.org/v1/occurrence/download/request/"
    download_dir = "D:/gbif_downloads"  # Change to D drive
    error_log = "D:/gbif_errors/error_log.txt"  # Change to D drive and use a subdirectory
    output_file = "D:/gbif_outputs/output_data.csv"  # Change to D drive and use a subdirectory
    
    # Ensure the directories exist
    if not os.path.exists(download_dir):
        os.makedirs(download_dir)
    if not os.path.exists(os.path.dirname(error_log)):
        os.makedirs(os.path.dirname(error_log))
    if not os.path.exists(os.path.dirname(output_file)):
        os.makedirs(os.path.dirname(output_file))
    if not os.path.exists(os.path.dirname(skip_file)):
        os.makedirs(os.path.dirname(skip_file))
    if not os.path.exists(os.path.dirname(downloaded_keys_file)):
        os.makedirs(os.path.dirname(downloaded_keys_file))
    
    # Load processed DOIs
    processed_dois = load_processed_dois(skip_file)
    print(f"Loaded {len(processed_dois)} processed DOIs")

    # Load downloaded keys
    downloaded_keys = load_downloaded_keys(downloaded_keys_file)
    print(f"Loaded {len(downloaded_keys)} downloaded keys")
    
    # Determine if we need to write the header
    write_header = not os.path.exists(output_file)
    
    # Open the output CSV file in append mode
    with open(output_file, 'a', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['gbifID', 'year', 'countryCode', 'gbifDownloadKey', 'doi']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        
        # Write the header if the file did not exist before
        if write_header:
            writer.writeheader()
            print(f"Wrote header to {output_file}")
        
        with open(error_log, 'w') as error_file:
            for entry in tqdm(filtered_entries, desc="Downloading and processing GBIF data"):
                try:
                    identifiers = entry.get('identifiers', {})
                    doi = identifiers.get('doi', '')
                    if doi in processed_dois:
                        print(f"Skipping already processed DOI: {doi}")
                        continue
                    
                    key = entry.get('gbifDownloadKey', [])[0]
                    if key in downloaded_keys:
                        print(f"Skipping already downloaded key: {key}")
                        continue
                    
                    file_path = os.path.join(download_dir, f"{key}.zip")
                    
                    # Check if file already exists
                    if os.path.exists(file_path):
                        print(f"File {file_path} already exists. Skipping download.")
                        continue
                    
                    # Download the zip file
                    download_url = f"{base_url}{key}.zip"
                    print(f"Downloading {download_url}")
                    response = requests.get(download_url, stream=True)
                    if response.status_code == 200:
                        with open(file_path, 'wb') as file:
                            for chunk in response.iter_content(chunk_size=1024):
                                file.write(chunk)
                        print(f"Downloaded {file_path}")
                        # Save the downloaded key
                        save_downloaded_key(downloaded_keys_file, key)
                        downloaded_keys.add(key)
                    elif response.status_code == 404:
                        error_message = f"Failed to download data for key {key}: 404 Not Found"
                        error_file.write(error_message + '\n')
                        print(error_message)
                        continue
                    else:
                        error_message = f"Failed to download data for key {key}: {response.status_code}"
                        error_file.write(error_message + '\n')
                        print(error_message)
                        continue
                
                    # Unzip the downloaded file and extract required information
                    try:
                        print(f"Unzipping {file_path}")
                        with zipfile.ZipFile(file_path, 'r') as zip_ref:
                            zip_ref.extractall(download_dir)
                            extracted_files = zip_ref.namelist()
                            
                            # Check for occurrence.txt (Darwin Core archive) or single CSV file
                            occurrence_file_path = None
                            if 'occurrence.txt' in extracted_files:
                                occurrence_file_path = os.path.join(download_dir, 'occurrence.txt')
                            else:
                                csv_file_name = f"{key}.csv"
                                if csv_file_name in extracted_files:
                                    occurrence_file_path = os.path.join(download_dir, csv_file_name)
                            
                            if occurrence_file_path:
                                print(f"Processing {occurrence_file_path}")
                                with open(occurrence_file_path, newline='', encoding='utf-8') as occurrence_file:
                                    reader = csv.DictReader(occurrence_file, delimiter='\t')
                                    # Normalize column names to lower case
                                    reader.fieldnames = [field.lower() for field in reader.fieldnames]
                                    # Print the column names for debugging
                                    print(f"Column names: {reader.fieldnames}")
                                    for row in reader:
                                        # Check for the presence of necessary columns
                                        if 'gbifid' not in row or 'year' not in row or 'countrycode' not in row:
                                            raise KeyError("One or more expected columns are missing.")
                                        # Check for both 'basisofrecord' in a case-insensitive manner
                                        basis_of_record = row.get('basisofrecord', '').lower()
                                        if basis_of_record == 'preserved_specimen'.lower():
                                            writer.writerow({
                                                'gbifID': row['gbifid'],
                                                'year': row['year'],
                                                'countryCode': row['countrycode'],
                                                'gbifDownloadKey': key,
                                                'doi': doi
                                            })
                                print(f"Processed {occurrence_file_path}")
                                # Ensure the file is closed before deleting it
                                del reader
                                os.remove(occurrence_file_path)
                                print(f"Deleted extracted file {occurrence_file_path}")
                            
                            # Delete all other extracted files
                            for extracted_file in extracted_files:
                                extracted_file_path = os.path.join(download_dir, extracted_file)
                                if os.path.exists(extracted_file_path):
                                    os.remove(extracted_file_path)
                                    print(f"Deleted file {extracted_file_path}")
                        
                        # Ensure the zip file is closed before deleting it
                        del zip_ref
                        os.remove(file_path)
                        print(f"Deleted {file_path}")
                        
                        # Save the DOI to the skip file
                        save_processed_doi(skip_file, doi)
                        print(f"Saved DOI {doi} to skip file")
                    except zipfile.BadZipFile:
                        error_message = f"Bad zip file {file_path}"
                        error_file.write(error_message + '\n')
                        print(error_message)
                    except KeyError as e:
                        error_message = f"Missing expected column in file {file_path}: {str(e)}"
                        error_file.write(error_message + '\n')
                        print(error_message)
                    except Exception as e:
                        error_message = f"Failed to process file {file_path}: {str(e)}"
                        error_file.write(error_message + '\n')
                        print(error_message)
                except requests.exceptions.RequestException as e:
                    error_message = f"Request error for key {key}: {str(e)}"
                    error_file.write(error_message + '\n')
                    print(error_message)
                except Exception as e:
                    error_message = f"Unexpected error for key {key}: {str(e)}"
                    error_file.write(error_message + '\n')
                    print(error_message)

In [33]:
# Call the function with the filtered entries and specify the skip file and downloaded keys file
skip_file = "D:/gbif_skip_files/processed_dois.txt"  # Change to D drive and use a subdirectory
downloaded_keys_file = "D:/gbif_skip_files/downloaded_keys.txt"  # Change to D drive and use a subdirectory
download_and_process_gbif_data(filtered_entries, skip_file, downloaded_keys_file)

Loading processed DOIs from D:/gbif_skip_files/processed_dois.txt
Loaded 379 processed DOIs
Loading downloaded keys from D:/gbif_skip_files/downloaded_keys.txt
Loaded 403 downloaded keys


Downloading and processing GBIF data:   0%|          | 0/454 [00:00<?, ?it/s]

Skipping already processed DOI: 10.1007/s00606-023-01884-w
Skipping already processed DOI: 10.15560/20.1.144
Skipping already processed DOI: 
Skipping already processed DOI: 10.22201/fc.25942158e.2023.4.793
Skipping already processed DOI: 10.1007/s41348-024-00907-z
Skipping already processed DOI: 10.46471/gigabyte.117
Skipping already processed DOI: 10.35535/pfsyst-2023-0012
Skipping already processed DOI: 10.31111/nsnr/2024.58.1.f37
Skipping already processed DOI: 10.1649/0010-065x-77.4.542
Skipping already processed DOI: 10.5281/zenodo.10673563
Skipping already processed DOI: 10.1038/s41598-024-56930-5
Skipping already processed DOI: 
Skipping already processed DOI: 10.15560/20.2.268
Skipping already processed DOI: 10.21068/2539200x.1149
Skipping already processed DOI: 
Skipping already processed DOI: 10.15407/ukrbotj81.01.036
Skipping already processed DOI: 10.1016/j.tree.2024.01.006
Skipping already processed DOI: 10.1115/1.4064753
Skipping already processed DOI: 10.59893/abud.23(2

Downloading and processing GBIF data:  24%|██▎       | 107/454 [00:00<00:02, 139.29it/s]

Failed to download data for key 0397396-210914110416597: 404 Not Found
Skipping already processed DOI: 10.26897/1997-6011-2022-3-115-121
Skipping already processed DOI: 10.22201/fc.25942158e.2023.01.582
Skipping already processed DOI: 10.1007/s10531-023-02551-9
Skipping already processed DOI: 10.1016/j.cropro.2023.106202
Skipping already processed DOI: 10.15381/rpb.v29i4.23969
Skipping already processed DOI: 10.13057/biodiv/d240154
Skipping already processed DOI: 10.1002/ajb2.16137
Skipping already downloaded key: 0044554-210914110416597
Skipping already processed DOI: 10.1007/s11356-023-25455-1
Skipping already processed DOI: 10.17581/bp.2022.11114
Skipping already processed DOI: 10.3897/neobiota.81.95849
Skipping already processed DOI: 10.3390/biology12010141
Skipping already processed DOI: 10.3390/land12010190
Skipping already downloaded key: 0137431-220831081235567
Skipping already processed DOI: 10.15407/zoo2022.06.435
Skipping already processed DOI: 10.11606/issn.2316-9079.v21i2p

Downloading and processing GBIF data:  43%|████▎     | 196/454 [00:01<00:01, 130.20it/s]

Failed to download data for key 0082730-200613084148143: 404 Not Found
Skipping already processed DOI: 
Skipping already processed DOI: 10.11646/zootaxa.5124.1.5
Skipping already processed DOI: 10.1111/plb.13416
Skipping already processed DOI: 10.1086/720154
Skipping already processed DOI: 10.1002/ecy.3698
Skipping already processed DOI: 10.31055/1851.2372.V57.N1.33926
Skipping already processed DOI: 10.1016/j.pld.2022.03.004
Skipping already processed DOI: 10.7202/1075815ar
Skipping already processed DOI: 10.15407/ukrbotj78.06.373
Skipping already processed DOI: 
Skipping already processed DOI: 
Skipping already processed DOI: 
Skipping already processed DOI: 10.32800/abc.2022.45.0107
Skipping already processed DOI: 10.31687/saremmn.21.28.2.0.05.e0584
Skipping already processed DOI: 10.48156/1388.2022.1917157
Skipping already processed DOI: 10.1007/s11557-021-01757-x
Skipping already processed DOI: 10.5252/zoosystema2022v44a2
Skipping already processed DOI: 10.3161/15081109acc2021.23.

Downloading and processing GBIF data:  83%|████████▎ | 375/454 [00:02<00:00, 181.74it/s]

Failed to download data for key 0006355-180508205500799: 404 Not Found
Skipping already processed DOI: 10.11646/zootaxa.4656.3.13
Skipping already processed DOI: 10.1080/03721426.2019.1655935
Skipping already processed DOI: 10.31111/nsnr/2018.52.2.407
Skipping already processed DOI: 10.11646/zootaxa.4590.1.9
Skipping already processed DOI: 10.1007/s00606-019-01604-3
Skipping already processed DOI: 10.7717/peerj.10088
Skipping already processed DOI: 10.1590/1982-0224-20170046
Skipping already processed DOI: 10.31111/nsnr/2019.53.1.89
Skipping already processed DOI: 10.1371/journal.pone.0236042
Skipping already processed DOI: 10.15560/16.1.169
Skipping already processed DOI: 10.3389/fpls.2018.01664
Skipping already processed DOI: 10.1007/s12225-020-9867-5
Skipping already processed DOI: 10.3897/bdj.7.e34211
Skipping already processed DOI: 10.3391/mbi.2020.11.3.02
Skipping already processed DOI: 10.3800/pbr.14.22
Skipping already processed DOI: 10.3956/2017-93.4.234
Skipping already downl

Downloading and processing GBIF data:  83%|████████▎ | 375/454 [00:17<00:00, 181.74it/s]

Downloaded D:/gbif_downloads\0044342-200613084148143.zip
Saving downloaded key 0044342-200613084148143 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0044342-200613084148143.zip
Processing D:/gbif_downloads\0044342-200613084148143.csv
Column names: ['gbifid', 'datasetkey', 'occurrenceid', 'kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species', 'infraspecificepithet', 'taxonrank', 'scientificname', 'verbatimscientificname', 'verbatimscientificnameauthorship', 'countrycode', 'locality', 'stateprovince', 'occurrencestatus', 'individualcount', 'publishingorgkey', 'decimallatitude', 'decimallongitude', 'coordinateuncertaintyinmeters', 'coordinateprecision', 'elevation', 'elevationaccuracy', 'depth', 'depthaccuracy', 'eventdate', 'day', 'month', 'year', 'taxonkey', 'specieskey', 'basisofrecord', 'institutioncode', 'collectioncode', 'catalognumber', 'recordnumber', 'identifiedby', 'dateidentified', 'license', 'rightsholder', 'recordedby', 'typestatus', 'est

Downloading and processing GBIF data:  91%|█████████ | 413/454 [02:32<00:25,  1.61it/s] 

Processed D:/gbif_downloads\0044342-200613084148143.csv
Deleted extracted file D:/gbif_downloads\0044342-200613084148143.csv
Deleted D:/gbif_downloads\0044342-200613084148143.zip
Saving DOI 10.18475/cjos.v50i2.a6 to D:/gbif_skip_files/processed_dois.txt
Saved DOI 10.18475/cjos.v50i2.a6 to skip file
Skipping already processed DOI: 
Downloading https://api.gbif.org/v1/occurrence/download/request/0027179-200221144449610.zip


Downloading and processing GBIF data:  91%|█████████▏| 415/454 [02:32<00:24,  1.62it/s]

Downloaded D:/gbif_downloads\0027179-200221144449610.zip
Saving downloaded key 0027179-200221144449610 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0027179-200221144449610.zip
Processing D:/gbif_downloads\0027179-200221144449610.csv
Column names: ['gbifid', 'datasetkey', 'occurrenceid', 'kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species', 'infraspecificepithet', 'taxonrank', 'scientificname', 'verbatimscientificname', 'verbatimscientificnameauthorship', 'countrycode', 'locality', 'stateprovince', 'occurrencestatus', 'individualcount', 'publishingorgkey', 'decimallatitude', 'decimallongitude', 'coordinateuncertaintyinmeters', 'coordinateprecision', 'elevation', 'elevationaccuracy', 'depth', 'depthaccuracy', 'eventdate', 'day', 'month', 'year', 'taxonkey', 'specieskey', 'basisofrecord', 'institutioncode', 'collectioncode', 'catalognumber', 'recordnumber', 'identifiedby', 'dateidentified', 'license', 'rightsholder', 'recordedby', 'typestatus', 'est

Downloaded D:/gbif_downloads\0054247-200613084148143.zip
Saving downloaded key 0054247-200613084148143 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0054247-200613084148143.zip
Processing D:/gbif_downloads\occurrence.txt
Column names: ['gbifid', 'abstract', 'accessrights', 'accrualmethod', 'accrualperiodicity', 'accrualpolicy', 'alternative', 'audience', 'available', 'bibliographiccitation', 'conformsto', 'contributor', 'coverage', 'created', 'creator', 'date', 'dateaccepted', 'datecopyrighted', 'datesubmitted', 'description', 'educationlevel', 'extent', 'format', 'hasformat', 'haspart', 'hasversion', 'identifier', 'instructionalmethod', 'isformatof', 'ispartof', 'isreferencedby', 'isreplacedby', 'isrequiredby', 'isversionof', 'issued', 'language', 'license', 'mediator', 'medium', 'modified', 'provenance', 'publisher', 'references', 'relation', 'replaces', 'requires', 'rights', 'rightsholder', 'source', 'spatial', 'subject', 'tableofcontents', 'temporal', 'title

Downloaded D:/gbif_downloads\0013343-190621201848488.zip
Saving downloaded key 0013343-190621201848488 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0013343-190621201848488.zip
Processing D:/gbif_downloads\0013343-190621201848488.csv
Column names: ['gbifid', 'datasetkey', 'occurrenceid', 'kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species', 'infraspecificepithet', 'taxonrank', 'scientificname', 'countrycode', 'locality', 'publishingorgkey', 'decimallatitude', 'decimallongitude', 'coordinateuncertaintyinmeters', 'coordinateprecision', 'elevation', 'elevationaccuracy', 'depth', 'depthaccuracy', 'eventdate', 'day', 'month', 'year', 'taxonkey', 'specieskey', 'basisofrecord', 'institutioncode', 'collectioncode', 'catalognumber', 'recordnumber', 'identifiedby', 'dateidentified', 'license', 'rightsholder', 'recordedby', 'typestatus', 'establishmentmeans', 'lastinterpreted', 'mediatype', 'issue']
Processed D:/gbif_downloads\0013343-190621201848488.csv
Del

Downloading and processing GBIF data:  94%|█████████▍| 427/454 [02:39<00:16,  1.63it/s]

Downloaded D:/gbif_downloads\0008874-190813142620410.zip
Saving downloaded key 0008874-190813142620410 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0008874-190813142620410.zip
Processing D:/gbif_downloads\0008874-190813142620410.csv
Column names: ['gbifid', 'datasetkey', 'occurrenceid', 'kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species', 'infraspecificepithet', 'taxonrank', 'scientificname', 'verbatimscientificname', 'verbatimscientificnameauthorship', 'countrycode', 'locality', 'stateprovince', 'occurrencestatus', 'individualcount', 'publishingorgkey', 'decimallatitude', 'decimallongitude', 'coordinateuncertaintyinmeters', 'coordinateprecision', 'elevation', 'elevationaccuracy', 'depth', 'depthaccuracy', 'eventdate', 'day', 'month', 'year', 'taxonkey', 'specieskey', 'basisofrecord', 'institutioncode', 'collectioncode', 'catalognumber', 'recordnumber', 'identifiedby', 'dateidentified', 'license', 'rightsholder', 'recordedby', 'typestatus', 'est

Processed D:/gbif_downloads\0030482-181108115102211.csv
Deleted extracted file D:/gbif_downloads\0030482-181108115102211.csv
Deleted D:/gbif_downloads\0030482-181108115102211.zip
Saving DOI 10.1038/s41477-019-0535-4 to D:/gbif_skip_files/processed_dois.txt
Saved DOI 10.1038/s41477-019-0535-4 to skip file
Downloading https://api.gbif.org/v1/occurrence/download/request/0004128-191105090559680.zip
Downloaded D:/gbif_downloads\0004128-191105090559680.zip
Saving downloaded key 0004128-191105090559680 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0004128-191105090559680.zip
Processing D:/gbif_downloads\0004128-191105090559680.csv
Column names: ['gbifid', 'datasetkey', 'occurrenceid', 'kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species', 'infraspecificepithet', 'taxonrank', 'scientificname', 'verbatimscientificname', 'verbatimscientificnameauthorship', 'countrycode', 'locality', 'stateprovince', 'occurrencestatus', 'individualcount', 'publishingorgkey', 

Downloaded D:/gbif_downloads\0020432-190415153152247.zip
Saving downloaded key 0020432-190415153152247 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0020432-190415153152247.zip
Processing D:/gbif_downloads\0020432-190415153152247.csv
Column names: ['gbifid', 'datasetkey', 'occurrenceid', 'kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species', 'infraspecificepithet', 'taxonrank', 'scientificname', 'countrycode', 'locality', 'publishingorgkey', 'decimallatitude', 'decimallongitude', 'coordinateuncertaintyinmeters', 'coordinateprecision', 'elevation', 'elevationaccuracy', 'depth', 'depthaccuracy', 'eventdate', 'day', 'month', 'year', 'taxonkey', 'specieskey', 'basisofrecord', 'institutioncode', 'collectioncode', 'catalognumber', 'recordnumber', 'identifiedby', 'dateidentified', 'license', 'rightsholder', 'recordedby', 'typestatus', 'establishmentmeans', 'lastinterpreted', 'mediatype', 'issue']
Processed D:/gbif_downloads\0020432-190415153152247.csv
Del

Downloading and processing GBIF data:  96%|█████████▌| 435/454 [02:48<00:12,  1.54it/s]

Downloaded D:/gbif_downloads\0010627-191105090559680.zip
Saving downloaded key 0010627-191105090559680 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0010627-191105090559680.zip
Processing D:/gbif_downloads\occurrence.txt
Column names: ['gbifid', 'abstract', 'accessrights', 'accrualmethod', 'accrualperiodicity', 'accrualpolicy', 'alternative', 'audience', 'available', 'bibliographiccitation', 'conformsto', 'contributor', 'coverage', 'created', 'creator', 'date', 'dateaccepted', 'datecopyrighted', 'datesubmitted', 'description', 'educationlevel', 'extent', 'format', 'hasformat', 'haspart', 'hasversion', 'identifier', 'instructionalmethod', 'isformatof', 'ispartof', 'isreferencedby', 'isreplacedby', 'isrequiredby', 'isversionof', 'issued', 'language', 'license', 'mediator', 'medium', 'modified', 'provenance', 'publisher', 'references', 'relation', 'replaces', 'requires', 'rights', 'rightsholder', 'source', 'spatial', 'subject', 'tableofcontents', 'temporal', 'title

Downloaded D:/gbif_downloads\0053080-200221144449610.zip
Saving downloaded key 0053080-200221144449610 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0053080-200221144449610.zip
Processing D:/gbif_downloads\occurrence.txt
Column names: ['gbifid', 'abstract', 'accessrights', 'accrualmethod', 'accrualperiodicity', 'accrualpolicy', 'alternative', 'audience', 'available', 'bibliographiccitation', 'conformsto', 'contributor', 'coverage', 'created', 'creator', 'date', 'dateaccepted', 'datecopyrighted', 'datesubmitted', 'description', 'educationlevel', 'extent', 'format', 'hasformat', 'haspart', 'hasversion', 'identifier', 'instructionalmethod', 'isformatof', 'ispartof', 'isreferencedby', 'isreplacedby', 'isrequiredby', 'isversionof', 'issued', 'language', 'license', 'mediator', 'medium', 'modified', 'provenance', 'publisher', 'references', 'relation', 'replaces', 'requires', 'rights', 'rightsholder', 'source', 'spatial', 'subject', 'tableofcontents', 'temporal', 'title

Downloaded D:/gbif_downloads\0021352-191105090559680.zip
Saving downloaded key 0021352-191105090559680 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0021352-191105090559680.zip
Processing D:/gbif_downloads\occurrence.txt
Column names: ['gbifid', 'abstract', 'accessrights', 'accrualmethod', 'accrualperiodicity', 'accrualpolicy', 'alternative', 'audience', 'available', 'bibliographiccitation', 'conformsto', 'contributor', 'coverage', 'created', 'creator', 'date', 'dateaccepted', 'datecopyrighted', 'datesubmitted', 'description', 'educationlevel', 'extent', 'format', 'hasformat', 'haspart', 'hasversion', 'identifier', 'instructionalmethod', 'isformatof', 'ispartof', 'isreferencedby', 'isreplacedby', 'isrequiredby', 'isversionof', 'issued', 'language', 'license', 'mediator', 'medium', 'modified', 'provenance', 'publisher', 'references', 'relation', 'replaces', 'requires', 'rights', 'rightsholder', 'source', 'spatial', 'subject', 'tableofcontents', 'temporal', 'title

Downloading and processing GBIF data:  97%|█████████▋| 440/454 [02:52<00:09,  1.50it/s]

Downloaded D:/gbif_downloads\0051643-200221144449610.zip
Saving downloaded key 0051643-200221144449610 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0051643-200221144449610.zip
Processing D:/gbif_downloads\occurrence.txt
Column names: ['gbifid', 'abstract', 'accessrights', 'accrualmethod', 'accrualperiodicity', 'accrualpolicy', 'alternative', 'audience', 'available', 'bibliographiccitation', 'conformsto', 'contributor', 'coverage', 'created', 'creator', 'date', 'dateaccepted', 'datecopyrighted', 'datesubmitted', 'description', 'educationlevel', 'extent', 'format', 'hasformat', 'haspart', 'hasversion', 'identifier', 'instructionalmethod', 'isformatof', 'ispartof', 'isreferencedby', 'isreplacedby', 'isrequiredby', 'isversionof', 'issued', 'language', 'license', 'mediator', 'medium', 'modified', 'provenance', 'publisher', 'references', 'relation', 'replaces', 'requires', 'rights', 'rightsholder', 'source', 'spatial', 'subject', 'tableofcontents', 'temporal', 'title

Downloading and processing GBIF data:  98%|█████████▊| 444/454 [02:55<00:06,  1.52it/s]

Downloaded D:/gbif_downloads\0036119-200613084148143.zip
Saving downloaded key 0036119-200613084148143 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0036119-200613084148143.zip
Processing D:/gbif_downloads\occurrence.txt
Column names: ['gbifid', 'abstract', 'accessrights', 'accrualmethod', 'accrualperiodicity', 'accrualpolicy', 'alternative', 'audience', 'available', 'bibliographiccitation', 'conformsto', 'contributor', 'coverage', 'created', 'creator', 'date', 'dateaccepted', 'datecopyrighted', 'datesubmitted', 'description', 'educationlevel', 'extent', 'format', 'hasformat', 'haspart', 'hasversion', 'identifier', 'instructionalmethod', 'isformatof', 'ispartof', 'isreferencedby', 'isreplacedby', 'isrequiredby', 'isversionof', 'issued', 'language', 'license', 'mediator', 'medium', 'modified', 'provenance', 'publisher', 'references', 'relation', 'replaces', 'requires', 'rights', 'rightsholder', 'source', 'spatial', 'subject', 'tableofcontents', 'temporal', 'title

Downloading and processing GBIF data:  98%|█████████▊| 447/454 [02:56<00:04,  1.54it/s]

Downloaded D:/gbif_downloads\0008957-190415153152247.zip
Saving downloaded key 0008957-190415153152247 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0008957-190415153152247.zip
Processing D:/gbif_downloads\occurrence.txt
Column names: ['gbifid', 'abstract', 'accessrights', 'accrualmethod', 'accrualperiodicity', 'accrualpolicy', 'alternative', 'audience', 'available', 'bibliographiccitation', 'conformsto', 'contributor', 'coverage', 'created', 'creator', 'date', 'dateaccepted', 'datecopyrighted', 'datesubmitted', 'description', 'educationlevel', 'extent', 'format', 'hasformat', 'haspart', 'hasversion', 'identifier', 'instructionalmethod', 'isformatof', 'ispartof', 'isreferencedby', 'isreplacedby', 'isrequiredby', 'isversionof', 'issued', 'language', 'license', 'mediator', 'medium', 'modified', 'provenance', 'publisher', 'references', 'relation', 'replaces', 'requires', 'rights', 'rightsholder', 'source', 'spatial', 'subject', 'tableofcontents', 'temporal', 'title

Downloading and processing GBIF data:  99%|█████████▉| 449/454 [02:58<00:03,  1.53it/s]

Downloaded D:/gbif_downloads\0009319-190813142620410.zip
Saving downloaded key 0009319-190813142620410 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0009319-190813142620410.zip
Processing D:/gbif_downloads\0009319-190813142620410.csv
Column names: ['gbifid', 'datasetkey', 'occurrenceid', 'kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species', 'infraspecificepithet', 'taxonrank', 'scientificname', 'verbatimscientificname', 'verbatimscientificnameauthorship', 'countrycode', 'locality', 'stateprovince', 'occurrencestatus', 'individualcount', 'publishingorgkey', 'decimallatitude', 'decimallongitude', 'coordinateuncertaintyinmeters', 'coordinateprecision', 'elevation', 'elevationaccuracy', 'depth', 'depthaccuracy', 'eventdate', 'day', 'month', 'year', 'taxonkey', 'specieskey', 'basisofrecord', 'institutioncode', 'collectioncode', 'catalognumber', 'recordnumber', 'identifiedby', 'dateidentified', 'license', 'rightsholder', 'recordedby', 'typestatus', 'est

Downloading and processing GBIF data:  99%|█████████▉| 451/454 [03:00<00:02,  1.47it/s]

Processed D:/gbif_downloads\0006532-191105090559680.csv
Deleted extracted file D:/gbif_downloads\0006532-191105090559680.csv
Deleted D:/gbif_downloads\0006532-191105090559680.zip
Saving DOI 10.1111/jen.12767 to D:/gbif_skip_files/processed_dois.txt
Saved DOI 10.1111/jen.12767 to skip file
Downloading https://api.gbif.org/v1/occurrence/download/request/0009932-191105090559680.zip


Downloading and processing GBIF data: 100%|█████████▉| 452/454 [03:00<00:01,  1.45it/s]

Downloaded D:/gbif_downloads\0009932-191105090559680.zip
Saving downloaded key 0009932-191105090559680 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0009932-191105090559680.zip
Processing D:/gbif_downloads\occurrence.txt
Column names: ['gbifid', 'abstract', 'accessrights', 'accrualmethod', 'accrualperiodicity', 'accrualpolicy', 'alternative', 'audience', 'available', 'bibliographiccitation', 'conformsto', 'contributor', 'coverage', 'created', 'creator', 'date', 'dateaccepted', 'datecopyrighted', 'datesubmitted', 'description', 'educationlevel', 'extent', 'format', 'hasformat', 'haspart', 'hasversion', 'identifier', 'instructionalmethod', 'isformatof', 'ispartof', 'isreferencedby', 'isreplacedby', 'isrequiredby', 'isversionof', 'issued', 'language', 'license', 'mediator', 'medium', 'modified', 'provenance', 'publisher', 'references', 'relation', 'replaces', 'requires', 'rights', 'rightsholder', 'source', 'spatial', 'subject', 'tableofcontents', 'temporal', 'title

Downloading and processing GBIF data: 100%|█████████▉| 453/454 [03:03<00:00,  1.26it/s]

Failed to process file D:/gbif_downloads\0005654-190307172214381.zip: 'NoneType' object has no attribute 'lower'
Downloading https://api.gbif.org/v1/occurrence/download/request/0012294-190918142434337.zip


Downloading and processing GBIF data: 100%|██████████| 454/454 [03:03<00:00,  2.47it/s]

Downloaded D:/gbif_downloads\0012294-190918142434337.zip
Saving downloaded key 0012294-190918142434337 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0012294-190918142434337.zip
Processing D:/gbif_downloads\occurrence.txt
Column names: ['gbifid', 'abstract', 'accessrights', 'accrualmethod', 'accrualperiodicity', 'accrualpolicy', 'alternative', 'audience', 'available', 'bibliographiccitation', 'conformsto', 'contributor', 'coverage', 'created', 'creator', 'date', 'dateaccepted', 'datecopyrighted', 'datesubmitted', 'description', 'educationlevel', 'extent', 'format', 'hasformat', 'haspart', 'hasversion', 'identifier', 'instructionalmethod', 'isformatof', 'ispartof', 'isreferencedby', 'isreplacedby', 'isrequiredby', 'isversionof', 'issued', 'language', 'license', 'mediator', 'medium', 'modified', 'provenance', 'publisher', 'references', 'relation', 'replaces', 'requires', 'rights', 'rightsholder', 'source', 'spatial', 'subject', 'tableofcontents', 'temporal', 'title


