In [4]:
import requests
import json
from tqdm import tqdm
import os  # Import os module for directory management
import zipfile
import csv
import sys

In [5]:
# Define the API endpoint and initial parameters
api_url = "https://api.gbif.org/v1/literature/search"
params = {
    "contentType": "literature",
    "literatureType": ["journal", "working_paper"],
    "relevance": "GBIF_USED",
    "peerReview": "true",
    "limit": 10,
    "offset": 0  # Start from the beginning
}

In [6]:
# Function to get data from the API
def fetch_data(params):
    response = requests.get(api_url, params=params)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Failed to fetch data: {response.status_code}")
        return None

In [7]:
# Function to extract all entries and filter those with content in gbifDownloadKey
def extract_filtered_entries():
    all_entries = []
    params['offset'] = 0  # Ensure offset starts at 0
    
    # Fetch initial data to determine total number of results
    initial_data = fetch_data(params)
    if not initial_data or 'count' not in initial_data:
        print("Failed to fetch initial data or count not available.")
        return []
    
    total_results = initial_data['count']
    print(f"Total results to fetch: {total_results}")
    
    with tqdm(total=total_results, desc="Fetching entries") as pbar:
        while True:
            data = fetch_data(params)
            if data and 'results' in data:
                # Filter entries that have content in gbifDownloadKey
                filtered_entries = [entry for entry in data['results'] if entry.get('gbifDownloadKey')]
                all_entries.extend(filtered_entries)
                pbar.update(len(data['results']))
                if len(data['results']) < params['limit']:
                    # No more data to fetch
                    break
                else:
                    # Move to the next page
                    params['offset'] += params['limit']
            else:
                break
            
    return all_entries

In [8]:
# Extract and filter entries
filtered_entries = extract_filtered_entries()

# Optionally, save the data to a file
with open('filtered_gbif_entries.json', 'w') as f:
    json.dump(filtered_entries, f, indent=2)

# Print the number of filtered entries fetched
print(f"Total filtered entries fetched: {len(filtered_entries)}")

Total results to fetch: 10585


Fetching entries: 100%|██████████| 10585/10585 [04:38<00:00, 38.05it/s]


Total filtered entries fetched: 3572


### Summary:
- **Increase Field Size Limit**: The script sets the field size limit for CSV processing to 1,000,000 characters to handle large fields.
- **Load and Save Processed DOIs**: Functions to load and save DOIs to track which entries have been processed.
- **Download and Process Data**: The main function to download, unzip, process, and filter data, ensuring only preserved specimens are kept, and appending results to an output file on the D drive.
- **Directory Checks**: Ensures necessary directories exist before writing files.

In [34]:
# Increase the CSV field size limit to a large value
csv.field_size_limit(10**6)

# Function to load processed DOIs from skip file
def load_processed_dois(skip_file):
    print(f"Loading processed DOIs from {skip_file}")
    if os.path.exists(skip_file):
        with open(skip_file, 'r', encoding='utf-8') as file:
            return set(line.strip() for line in file)
    return set()

# Function to save a DOI to the skip file
def save_processed_doi(skip_file, doi):
    print(f"Saving DOI {doi} to {skip_file}")
    with open(skip_file, 'a', encoding='utf-8') as file:
        file.write(doi + '\n')

# Function to load downloaded keys from a file
def load_downloaded_keys(downloaded_keys_file):
    print(f"Loading downloaded keys from {downloaded_keys_file}")
    if os.path.exists(downloaded_keys_file):
        with open(downloaded_keys_file, 'r', encoding='utf-8') as file:
            return set(line.strip() for line in file)
    return set()

# Function to save a downloaded key to a file
def save_downloaded_key(downloaded_keys_file, key):
    print(f"Saving downloaded key {key} to {downloaded_keys_file}")
    with open(downloaded_keys_file, 'a', encoding='utf-8') as file:
        file.write(key + '\n')

# Function to download, unzip, process data using gbifDownloadKey, and delete zip files and extracted contents
def download_and_process_gbif_data(filtered_entries, skip_file, downloaded_keys_file):
    base_url = "https://api.gbif.org/v1/occurrence/download/request/"
    download_dir = "D:/gbif_downloads"  # Change to D drive
    error_log = "D:/gbif_errors/error_log.txt"  # Change to D drive and use a subdirectory
    output_file = "D:/gbif_outputs/output_data.csv"  # Change to D drive and use a subdirectory
    
    # Ensure the directories exist
    if not os.path.exists(download_dir):
        os.makedirs(download_dir)
    if not os.path.exists(os.path.dirname(error_log)):
        os.makedirs(os.path.dirname(error_log))
    if not os.path.exists(os.path.dirname(output_file)):
        os.makedirs(os.path.dirname(output_file))
    if not os.path.exists(os.path.dirname(skip_file)):
        os.makedirs(os.path.dirname(skip_file))
    if not os.path.exists(os.path.dirname(downloaded_keys_file)):
        os.makedirs(os.path.dirname(downloaded_keys_file))
    
    # Load processed DOIs
    processed_dois = load_processed_dois(skip_file)
    print(f"Loaded {len(processed_dois)} processed DOIs")

    # Load downloaded keys
    downloaded_keys = load_downloaded_keys(downloaded_keys_file)
    print(f"Loaded {len(downloaded_keys)} downloaded keys")
    
    # Determine if we need to write the header
    write_header = not os.path.exists(output_file)
    
    # Open the output CSV file in append mode
    with open(output_file, 'a', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['gbifID', 'year', 'countryCode', 'gbifDownloadKey', 'doi']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        
        # Write the header if the file did not exist before
        if write_header:
            writer.writeheader()
            print(f"Wrote header to {output_file}")
        
        with open(error_log, 'w') as error_file:
            for entry in tqdm(filtered_entries, desc="Downloading and processing GBIF data"):
                try:
                    identifiers = entry.get('identifiers', {})
                    doi = identifiers.get('doi', '')
                    if doi in processed_dois:
                        print(f"Skipping already processed DOI: {doi}")
                        continue
                    
                    key = entry.get('gbifDownloadKey', [])[0]
                    if key in downloaded_keys:
                        print(f"Skipping already downloaded key: {key}")
                        continue
                    
                    file_path = os.path.join(download_dir, f"{key}.zip")
                    
                    # Check if file already exists
                    if os.path.exists(file_path):
                        print(f"File {file_path} already exists. Skipping download.")
                        continue
                    
                    # Download the zip file
                    download_url = f"{base_url}{key}.zip"
                    print(f"Downloading {download_url}")
                    response = requests.get(download_url, stream=True)
                    if response.status_code == 200:
                        with open(file_path, 'wb') as file:
                            for chunk in response.iter_content(chunk_size=1024):
                                file.write(chunk)
                        print(f"Downloaded {file_path}")
                        # Save the downloaded key
                        save_downloaded_key(downloaded_keys_file, key)
                        downloaded_keys.add(key)
                    elif response.status_code == 404:
                        error_message = f"Failed to download data for key {key}: 404 Not Found"
                        error_file.write(error_message + '\n')
                        print(error_message)
                        continue
                    else:
                        error_message = f"Failed to download data for key {key}: {response.status_code}"
                        error_file.write(error_message + '\n')
                        print(error_message)
                        continue
                
                    # Unzip the downloaded file and extract required information
                    try:
                        print(f"Unzipping {file_path}")
                        with zipfile.ZipFile(file_path, 'r') as zip_ref:
                            zip_ref.extractall(download_dir)
                            extracted_files = zip_ref.namelist()
                            print(f"Extracted files: {extracted_files}")
                            
                            # Check for occurrence.txt (Darwin Core archive) or single CSV file
                            occurrence_file_path = None
                            if 'occurrence.txt' in extracted_files:
                                occurrence_file_path = os.path.join(download_dir, 'occurrence.txt')
                            else:
                                csv_file_name = f"{key}.csv"
                                if csv_file_name in extracted_files:
                                    occurrence_file_path = os.path.join(download_dir, csv_file_name)
                            
                            if occurrence_file_path:
                                print(f"Processing {occurrence_file_path}")
                                with open(occurrence_file_path, newline='', encoding='utf-8') as occurrence_file:
                                    reader = csv.DictReader(occurrence_file, delimiter='\t')
                                    for row in reader:
                                        if row['basisOfRecord'] == 'PRESERVED_SPECIMEN':
                                            writer.writerow({
                                                'gbifID': row['gbifID'],
                                                'year': row['year'],
                                                'countryCode': row['countryCode'],
                                                'gbifDownloadKey': key,
                                                'doi': doi
                                            })
                                print(f"Processed {occurrence_file_path}")
                                # Ensure the file is closed before deleting it
                                del reader
                                os.remove(occurrence_file_path)
                                print(f"Deleted extracted file {occurrence_file_path}")
                        
                        # Ensure the zip file is closed before deleting it
                        del zip_ref
                        os.remove(file_path)
                        print(f"Deleted {file_path}")
                        
                        # Save the DOI to the skip file
                        save_processed_doi(skip_file, doi)
                        print(f"Saved DOI {doi} to skip file")
                    except zipfile.BadZipFile:
                        error_message = f"Bad zip file {file_path}"
                        error_file.write(error_message + '\n')
                        print(error_message)
                    except Exception as e:
                        error_message = f"Failed to process file {file_path}: {str(e)}"
                        error_file.write(error_message + '\n')
                        print(error_message)
                except requests.exceptions.RequestException as e:
                    error_message = f"Request error for key {key}: {str(e)}"
                    error_file.write(error_message + '\n')
                    print(error_message)
                except Exception as e:
                    error_message = f"Unexpected error for key {key}: {str(e)}"
                    error_file.write(error_message + '\n')
                    print(error_message)

In [None]:
# Call the function with the filtered entries and specify the skip file and downloaded keys file
skip_file = "D:/gbif_skip_files/processed_dois.txt"  # Change to D drive and use a subdirectory
downloaded_keys_file = "D:/gbif_skip_files/downloaded_keys.txt"  # Change to D drive and use a subdirectory
download_and_process_gbif_data(filtered_entries, skip_file, downloaded_keys_file)

Loading processed DOIs from D:/gbif_skip_files/processed_dois.txt
Loaded 50 processed DOIs
Loading downloaded keys from D:/gbif_skip_files/downloaded_keys.txt
Loaded 1 downloaded keys


Downloading and processing GBIF data:   0%|          | 0/3572 [00:00<?, ?it/s]

Skipping already processed DOI: 10.15666/aeer/2202_18851902
Downloading https://api.gbif.org/v1/occurrence/download/request/0013673-171219132708484.zip
Downloaded D:/gbif_downloads\0013673-171219132708484.zip
Saving downloaded key 0013673-171219132708484 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0013673-171219132708484.zip
Extracted files: ['dataset/80d44654-f762-11e1-a439-00145eb45e9a.xml', 'dataset/2273e40d-26c2-4959-aea2-b6abcce16d09.xml', 'dataset/41c4b3e1-0a28-45f4-9f6c-09a7f77c184a.xml', 'dataset/5b605e9d-4e61-424d-950b-68ffbd010d20.xml', 'dataset/df582950-3b58-11dc-8c19-b8a03c50a862.xml', 'dataset/7fb0cd38-f762-11e1-a439-00145eb45e9a.xml', 'dataset/6bc3865d-62b3-48b8-91be-dd885437df9e.xml', 'dataset/f9f727fb-6356-4282-b39a-f6754424a618.xml', 'dataset/7741a517-ab3d-4b85-ae58-56322e44c8ce.xml', 'dataset/bdb3951b-1b8a-446d-8820-22ce0a2ea380.xml', 'dataset/4ce8e3f9-2546-4af1-b28d-e2eadf05dfd4.xml', 'dataset/3941e0f3-a88c-42af-b340-60056c84dc41.xml', 'data

Processed D:/gbif_downloads\occurrence.txt
Deleted extracted file D:/gbif_downloads\occurrence.txt


Downloading and processing GBIF data:   0%|          | 2/3572 [17:28<519:38:20, 524.01s/it]

Deleted D:/gbif_downloads\0013673-171219132708484.zip
Saving DOI 10.1002/ece3.11230 to D:/gbif_skip_files/processed_dois.txt
Saved DOI 10.1002/ece3.11230 to skip file
Skipping already processed DOI: 10.1038/s41598-024-59947-y
Skipping already downloaded key: 0019000-220831081235567
Skipping already processed DOI: 10.1016/j.tfp.2024.100559
Downloading https://api.gbif.org/v1/occurrence/download/request/0009997-230828120925497.zip


Downloading and processing GBIF data:   0%|          | 6/3572 [17:28<134:38:39, 135.93s/it]

Downloaded D:/gbif_downloads\0009997-230828120925497.zip
Saving downloaded key 0009997-230828120925497 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0009997-230828120925497.zip
Extracted files: ['0009997-230828120925497.csv']
Processing D:/gbif_downloads\0009997-230828120925497.csv
Processed D:/gbif_downloads\0009997-230828120925497.csv
Deleted extracted file D:/gbif_downloads\0009997-230828120925497.csv
Deleted D:/gbif_downloads\0009997-230828120925497.zip
Saving DOI 10.1007/s10530-024-03313-6 to D:/gbif_skip_files/processed_dois.txt
Saved DOI 10.1007/s10530-024-03313-6 to skip file
Downloading https://api.gbif.org/v1/occurrence/download/request/0162339-230224095556074.zip
Downloaded D:/gbif_downloads\0162339-230224095556074.zip
Saving downloaded key 0162339-230224095556074 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0162339-230224095556074.zip
Extracted files: ['0162339-230224095556074.csv']
Processing D:/gbif_downloads\0162339-230224

Downloading and processing GBIF data:   0%|          | 7/3572 [18:02<113:31:09, 114.63s/it]

Processed D:/gbif_downloads\0162339-230224095556074.csv
Deleted extracted file D:/gbif_downloads\0162339-230224095556074.csv
Deleted D:/gbif_downloads\0162339-230224095556074.zip
Saving DOI 10.32383/appdr/185727 to D:/gbif_skip_files/processed_dois.txt
Saved DOI 10.32383/appdr/185727 to skip file
Skipping already processed DOI: 10.3390/fishes9040148
Downloading https://api.gbif.org/v1/occurrence/download/request/0227785-230224095556074.zip


Downloading and processing GBIF data:   0%|          | 9/3572 [18:02<71:04:08, 71.81s/it]  

Downloaded D:/gbif_downloads\0227785-230224095556074.zip
Saving downloaded key 0227785-230224095556074 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0227785-230224095556074.zip
Extracted files: ['0227785-230224095556074.csv']
Processing D:/gbif_downloads\0227785-230224095556074.csv
Processed D:/gbif_downloads\0227785-230224095556074.csv
Deleted extracted file D:/gbif_downloads\0227785-230224095556074.csv
Deleted D:/gbif_downloads\0227785-230224095556074.zip
Saving DOI 10.1007/s10681-024-03317-2 to D:/gbif_skip_files/processed_dois.txt
Saved DOI 10.1007/s10681-024-03317-2 to skip file
Downloading https://api.gbif.org/v1/occurrence/download/request/0215706-220831081235567.zip
Downloaded D:/gbif_downloads\0215706-220831081235567.zip
Saving downloaded key 0215706-220831081235567 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0215706-220831081235567.zip
Extracted files: ['0215706-220831081235567.csv']
Processing D:/gbif_downloads\0215706-220831

Downloading and processing GBIF data:   0%|          | 10/3572 [18:07<57:03:38, 57.67s/it]

Processed D:/gbif_downloads\0215706-220831081235567.csv
Deleted extracted file D:/gbif_downloads\0215706-220831081235567.csv
Deleted D:/gbif_downloads\0215706-220831081235567.zip
Saving DOI 10.1007/s10750-024-05554-x to D:/gbif_skip_files/processed_dois.txt
Saved DOI 10.1007/s10750-024-05554-x to skip file
Downloading https://api.gbif.org/v1/occurrence/download/request/0197090-230224095556074.zip
Downloaded D:/gbif_downloads\0197090-230224095556074.zip
Saving downloaded key 0197090-230224095556074 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0197090-230224095556074.zip
Extracted files: ['0197090-230224095556074.csv']
Processing D:/gbif_downloads\0197090-230224095556074.csv


Downloading and processing GBIF data:   0%|          | 11/3572 [18:09<44:18:31, 44.79s/it]

Processed D:/gbif_downloads\0197090-230224095556074.csv
Deleted extracted file D:/gbif_downloads\0197090-230224095556074.csv
Deleted D:/gbif_downloads\0197090-230224095556074.zip
Saving DOI 10.1016/j.ecoinf.2024.102604 to D:/gbif_skip_files/processed_dois.txt
Saved DOI 10.1016/j.ecoinf.2024.102604 to skip file
Downloading https://api.gbif.org/v1/occurrence/download/request/0077952-230224095556074.zip


Downloading and processing GBIF data:   0%|          | 12/3572 [18:10<33:26:29, 33.82s/it]

Downloaded D:/gbif_downloads\0077952-230224095556074.zip
Saving downloaded key 0077952-230224095556074 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0077952-230224095556074.zip
Extracted files: ['0077952-230224095556074.csv']
Processing D:/gbif_downloads\0077952-230224095556074.csv
Processed D:/gbif_downloads\0077952-230224095556074.csv
Deleted extracted file D:/gbif_downloads\0077952-230224095556074.csv
Deleted D:/gbif_downloads\0077952-230224095556074.zip
Saving DOI 10.1093/jee/toae013 to D:/gbif_skip_files/processed_dois.txt
Saved DOI 10.1093/jee/toae013 to skip file
Downloading https://api.gbif.org/v1/occurrence/download/request/0016030-230828120925497.zip


Downloading and processing GBIF data:   0%|          | 13/3572 [18:10<24:46:15, 25.06s/it]

Downloaded D:/gbif_downloads\0016030-230828120925497.zip
Saving downloaded key 0016030-230828120925497 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0016030-230828120925497.zip
Extracted files: ['0016030-230828120925497.csv']
Processing D:/gbif_downloads\0016030-230828120925497.csv
Processed D:/gbif_downloads\0016030-230828120925497.csv
Deleted extracted file D:/gbif_downloads\0016030-230828120925497.csv
Deleted D:/gbif_downloads\0016030-230828120925497.zip
Saving DOI 10.3897/zookeys.1196.116144 to D:/gbif_skip_files/processed_dois.txt
Saved DOI 10.3897/zookeys.1196.116144 to skip file
Downloading https://api.gbif.org/v1/occurrence/download/request/0054160-200221144449610.zip


Downloading and processing GBIF data:   0%|          | 14/3572 [18:11<18:06:40, 18.32s/it]

Downloaded D:/gbif_downloads\0054160-200221144449610.zip
Saving downloaded key 0054160-200221144449610 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0054160-200221144449610.zip
Extracted files: ['0054160-200221144449610.csv']
Processing D:/gbif_downloads\0054160-200221144449610.csv
Processed D:/gbif_downloads\0054160-200221144449610.csv
Deleted extracted file D:/gbif_downloads\0054160-200221144449610.csv
Deleted D:/gbif_downloads\0054160-200221144449610.zip
Saving DOI 10.37828/em.2024.72.20 to D:/gbif_skip_files/processed_dois.txt
Saved DOI 10.37828/em.2024.72.20 to skip file
Downloading https://api.gbif.org/v1/occurrence/download/request/0417875-210914110416597.zip


Downloading and processing GBIF data:   0%|          | 15/3572 [18:11<13:09:50, 13.32s/it]

Downloaded D:/gbif_downloads\0417875-210914110416597.zip
Saving downloaded key 0417875-210914110416597 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0417875-210914110416597.zip
Extracted files: ['0417875-210914110416597.csv']
Processing D:/gbif_downloads\0417875-210914110416597.csv
Processed D:/gbif_downloads\0417875-210914110416597.csv
Deleted extracted file D:/gbif_downloads\0417875-210914110416597.csv
Deleted D:/gbif_downloads\0417875-210914110416597.zip
Saving DOI 10.7494/geom.2024.18.3.45 to D:/gbif_skip_files/processed_dois.txt
Saved DOI 10.7494/geom.2024.18.3.45 to skip file
Downloading https://api.gbif.org/v1/occurrence/download/request/0081205-240229165702484.zip


Downloading and processing GBIF data:   0%|          | 16/3572 [18:11<9:29:40,  9.61s/it] 

Downloaded D:/gbif_downloads\0081205-240229165702484.zip
Saving downloaded key 0081205-240229165702484 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0081205-240229165702484.zip
Extracted files: ['0081205-240229165702484.csv']
Processing D:/gbif_downloads\0081205-240229165702484.csv
Processed D:/gbif_downloads\0081205-240229165702484.csv
Deleted extracted file D:/gbif_downloads\0081205-240229165702484.csv
Deleted D:/gbif_downloads\0081205-240229165702484.zip
Saving DOI 10.1007/s10113-024-02222-7 to D:/gbif_skip_files/processed_dois.txt
Saved DOI 10.1007/s10113-024-02222-7 to skip file
Downloading https://api.gbif.org/v1/occurrence/download/request/0034182-231120084113126.zip


Downloading and processing GBIF data:   0%|          | 17/3572 [18:12<6:51:38,  6.95s/it]

Downloaded D:/gbif_downloads\0034182-231120084113126.zip
Saving downloaded key 0034182-231120084113126 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0034182-231120084113126.zip
Extracted files: ['0034182-231120084113126.csv']
Processing D:/gbif_downloads\0034182-231120084113126.csv
Processed D:/gbif_downloads\0034182-231120084113126.csv
Deleted extracted file D:/gbif_downloads\0034182-231120084113126.csv
Deleted D:/gbif_downloads\0034182-231120084113126.zip
Saving DOI 10.13057/biodiv/d250328 to D:/gbif_skip_files/processed_dois.txt
Saved DOI 10.13057/biodiv/d250328 to skip file
Skipping already processed DOI: 10.1002/ajb2.16322
Downloading https://api.gbif.org/v1/occurrence/download/request/0227647-230224095556074.zip


Downloading and processing GBIF data:   1%|          | 19/3572 [18:12<3:51:18,  3.91s/it]

Downloaded D:/gbif_downloads\0227647-230224095556074.zip
Saving downloaded key 0227647-230224095556074 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0227647-230224095556074.zip
Extracted files: ['0227647-230224095556074.csv']
Processing D:/gbif_downloads\0227647-230224095556074.csv
Processed D:/gbif_downloads\0227647-230224095556074.csv
Deleted extracted file D:/gbif_downloads\0227647-230224095556074.csv
Deleted D:/gbif_downloads\0227647-230224095556074.zip
Saving DOI 10.1111/eea.13451 to D:/gbif_skip_files/processed_dois.txt
Saved DOI 10.1111/eea.13451 to skip file
Downloading https://api.gbif.org/v1/occurrence/download/request/0193290-220831081235567.zip
Downloaded D:/gbif_downloads\0193290-220831081235567.zip
Saving downloaded key 0193290-220831081235567 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0193290-220831081235567.zip
Extracted files: ['0193290-220831081235567.csv']
Processing D:/gbif_downloads\0193290-220831081235567.csv


Downloading and processing GBIF data:   1%|          | 20/3572 [18:58<13:52:42, 14.07s/it]

Processed D:/gbif_downloads\0193290-220831081235567.csv
Deleted extracted file D:/gbif_downloads\0193290-220831081235567.csv
Deleted D:/gbif_downloads\0193290-220831081235567.zip
Saving DOI 10.1111/oik.10217 to D:/gbif_skip_files/processed_dois.txt
Saved DOI 10.1111/oik.10217 to skip file
Downloading https://api.gbif.org/v1/occurrence/download/request/0407383-210914110416597.zip
Downloaded D:/gbif_downloads\0407383-210914110416597.zip
Saving downloaded key 0407383-210914110416597 to D:/gbif_skip_files/downloaded_keys.txt
Unzipping D:/gbif_downloads\0407383-210914110416597.zip
Extracted files: ['0407383-210914110416597.csv']
Processing D:/gbif_downloads\0407383-210914110416597.csv


Downloading and processing GBIF data:   1%|          | 21/3572 [19:01<11:09:50, 11.32s/it]

Processed D:/gbif_downloads\0407383-210914110416597.csv
Deleted extracted file D:/gbif_downloads\0407383-210914110416597.csv
Deleted D:/gbif_downloads\0407383-210914110416597.zip
Saving DOI 10.1016/j.scitotenv.2024.172519 to D:/gbif_skip_files/processed_dois.txt
Saved DOI 10.1016/j.scitotenv.2024.172519 to skip file
Downloading https://api.gbif.org/v1/occurrence/download/request/0014654-230224095556074.zip


Downloading and processing GBIF data:   1%|          | 22/3572 [19:01<8:14:13,  8.35s/it] 

Failed to download data for key 0014654-230224095556074: 404 Not Found
Skipping already processed DOI: 10.1002/tax.13173
Downloading https://api.gbif.org/v1/occurrence/download/request/0202277-220831081235567.zip
