In [None]:
import os

import pandas as pd
import requests

### Load file and get dataset IDs

In [None]:
file = "../pangaea_2022-01-02_filtered_subsampled-1.25m-1200-1000_remove-core-surf.csv"
pangaea_df = pd.read_csv(file, low_memory=False)

# Get all dataset IDs
ds_ids = [dataset.split("-")[-1] for dataset in pangaea_df.dataset.unique()]

### Get BibTex Citation

In [None]:
def get_bibtex(ds_id: str, verbose=False) -> str:
    """Get the BibTex Citation of a Pangaea dataset using the dataset ID."""
    bib_url = f"https://doi.pangaea.de/10.1594/PANGAEA.{ds_id}?format=citation_bibtex"
    resp = requests.get(bib_url)
    if verbose:
        print("\tStatus code:", resp.status_code)
    return resp.text

### Write to file

In [None]:
citations_file = "../pangaea-citations.bib"
with open(citations_file, "w") as f:
    for i, ds_id in enumerate(ds_ids):
        bibtex = get_bibtex(ds_id)
        f.write(bibtex)
        print(f"{(i+1)}/{len(ds_ids)} complete.")

print(f"All dataset BibTex citations written to file: '{citations_file}'")