<a href="https://colab.research.google.com/github/JR-1991/NetCDF-Example/blob/main/UploadNetCDF2DemoDV.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import sys

# Install dependencies and pyDataverse patch
!{sys.executable} -m pip install netcdf4 git+https://github.com/gdcc/easyDataverse.git@flexible-connect
!{sys.executable} -m pip uninstall pydataverse -y
!{sys.executable} -m pip install git+https://github.com/JR-1991/pyDataverse.git


In [None]:
# Fetch dataset from web
!wget https://www.northwestknowledge.net/metdata/data/bi_1985.nc

In [3]:
import re
import netCDF4 as nc

from easyDataverse import Dataset

In [4]:
# Pattern to extract the author of this NetCDF file
AUTHOR_PATTERN = r"([A-Za-z\s]*)\-([A-Za-z\s]*)\,([A-Za-z\s0-9\@\.]*)"

In [5]:
# Load NetCDF dataset
ds = nc.Dataset("bi_1985.nc")

In [15]:
# Initialize EasyDataverse dataset
dataset = Dataset.connect(
    url="https://demo.dataverse.org", #@param {type: "string"}
    API_TOKEN="Enter API Token" #@param {type:"string"}
)

In [7]:
# Extract author information
match = re.match(AUTHOR_PATTERN, ds.author)
name = match.group(1).strip()
university = match.group(2).strip()
mail = match.group(3).strip()

In [8]:
# Extract notes for the notes field
notes = "\n".join([
    content for name, content in ds.__dict__.items()
    if bool(re.match(r"note\d*", name))
])

In [9]:
# Fill Citation metadatablock
dataset.citation.title = "NetCDF Test"
dataset.citation.add_ds_description(value="This is a test for NetCDF")
dataset.citation.add_author(name=name, affiliation=university)
dataset.citation.add_dataset_contact(name="John Doe", email="jonhdoe@doetastic.com")
dataset.citation.subject = ["Earth and Environmental Sciences"]
dataset.citation.notes_text = notes

In [10]:
# Extract longitude and lattitude 
longitude_dir = ds.geospatial_lon_units.split(" ")[-1].lower()
latitude_dir = ds.geospatial_lat_units.split(" ")[-1].lower()

# Set up parameters for the bounding box by using the unit
parameters = {}

# Parse longitude
if longitude_dir == "east":
    parameters.update({
        "east_longitude": ds.geospatial_lon_max,
        "west_longitude": ds.geospatial_lon_min
    })
else:
    parameters.update({
        "west_longitude": ds.geospatial_lon_max,
        "east_longitude": ds.geospatial_lon_min
    })

# Parse latitude
if latitude_dir == "north":
    parameters.update({
        "north_longitude": ds.geospatial_lat_max,
        "south_longitude": ds.geospatial_lat_min
    })
else:
    parameters.update({
        "south_longitude": ds.geospatial_lat_max,
        "north_longitude": ds.geospatial_lat_min
    })

In [11]:
# Add bounding box to the block
dataset.geospatial.add_bounding_box(**parameters)

In [12]:
# Add the dataset file
dataset.add_file("./bi_1985.nc", description="Corresponding NetCDF file")

In [None]:
dataset.upload(
    dataverse_name="Enter collection name" #@param {type:"string"}
) 