In [1]:
import uuid
import hashlib
import datetime
import io

from d1_client.mnclient_2_0 import *
from d1_common.types import dataoneTypes
from d1_common.resource_map import createSimpleResourceMap
from d1_common.types.exceptions import DataONEException, InvalidToken

def generate_system_metadata(pid: str, format_id: str, science_object: bytes, orcid: str):
    """
    Generates a system metadata document.
    :param pid: The pid that the object will have
    :param format_id: The format of the object (e.g text/csv)
    :param science_object: The object that is being described
    :return:
    """

    # Check that the science_object is unicode, attempt to convert it if it's a str
    if not isinstance(science_object, bytes):
        if isinstance(science_object, str):
            science_object = science_object.encode("utf-8")
        else:
            raise ValueError('Supplied science_object is not unicode')

    size = len(science_object)
    md5 = hashlib.md5()
    md5.update(science_object)
    md5 = md5.hexdigest()
    now = datetime.datetime.now()
    sys_meta = generate_sys_meta(pid, format_id, size, md5, now, orcid)
    return sys_meta


def generate_sys_meta(pid: str, format_id: str, size: int, md5, now, orcid: str):
    """
    Fills out the system metadata object with the needed properties
    :param pid: The pid of the system metadata document
    :param format_id: The format of the document being described
    :param size: The size of the document that is being described
    :param md5: The md5 hash of the document being described
    :param now: The current time
    :param orcid: The uploader's orcid
    """

    sys_meta = dataoneTypes.systemMetadata()
    sys_meta.identifier = str(pid)
    sys_meta.formatId = format_id
    sys_meta.size = size
    sys_meta.rightsHolder = orcid

    sys_meta.checksum = dataoneTypes.checksum(str(md5))
    sys_meta.checksum.algorithm = 'MD5'
    sys_meta.dateUploaded = now
    sys_meta.dateSysMetadataModified = now
    sys_meta.accessPolicy = generate_public_access_policy()
    return sys_meta


def generate_public_access_policy():
    """
    Creates the access policy for the object. Note that the permission is set to 'read'.
    """

    accessPolicy = dataoneTypes.accessPolicy()
    accessRule = dataoneTypes.AccessRule()
    accessRule.subject.append(d1_common.const.SUBJECT_PUBLIC)
    permission = dataoneTypes.Permission('read')
    accessRule.permission.append(permission)
    accessPolicy.append(accessRule)
    return accessPolicy


def create_minimum_eml() -> bytes:
    """
    Ugly method that creates a bare minimum EML record for a package.
    This includes the title, creator, and contact. Ideally the EML shouldn't need
    to be generated in python.

    :param tale: The tale that is being packaged.
    :return: The EML document
    """

    # XML declaration
    top = '<?xml version="1.0" encoding="UTF-8"?>'
    namespace = '<eml:eml xmlns:eml="eml://ecoinformatics.org/eml-2.1.1" xmlns:stmml="http://www.xml-cml.org/schema/stmml-1.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" packageId="test_pkg" system="test_system" xsi:schemaLocation="eml://ecoinformatics.org/eml-2.1.1 eml.xsd">'

    dataset = '<dataset>\n'
    title = '<title>{0}</title>\n'.format("Test Data Package")

    # The uploader's surname
    individualName = '<individualName>\n<surName>\n{0}\n</surName>\n</individualName>'.format("Test User")

    # Create an EML creator record
    creator = '<creator>\n{0}\n</creator>\n'.format(individualName)
    # Create an EML contact record
    contact = '<contact>\n{0}\n</contact>\n'.format(individualName)
    dataset_close = '</dataset>\n'
    eml_close = '</eml:eml>'

    # Append the above xml strings together to form the EML document
    xml = top + namespace + dataset + title + creator + contact + dataset_close + eml_close

    return xml.encode("utf-8")

def create_package(orcid):

    # Create and upload the EML
    eml_pid = str(uuid.uuid4())
    eml_bytes = create_minimum_eml()
    meta_sm = generate_system_metadata(pid=eml_pid,
                                       format_id='eml://ecoinformatics.org/eml-2.1.1',
                                       science_object=eml_bytes,
                                       orcid=orcid)
    client.create(eml_pid, eml_bytes, meta_sm)

    # Create and upload the data
    data = "data"
    data_pid = str(uuid.uuid4())
    data_bytes = data.encode('utf-8')
    data_sm = generate_system_metadata(pid=data_pid,
                                       format_id='text/plain',
                                       science_object=data_bytes,
                                       orcid=orcid)
    client.create(data_pid, data_bytes, data_sm)

    # Create and upload the resource map
    ore_pid = str(uuid.uuid4())
    ore = createSimpleResourceMap(ore_pid, eml_pid, [data_pid])
    ore_meta = generate_system_metadata(pid=ore_pid,
                                        format_id='http://www.openarchives.org/ore/terms',
                                        science_object=ore.serialize(),
                                        orcid=orcid)
    client.create(ore_pid, ore.serialize(), ore_meta)
    return eml_pid


if __name__ == "__main__":
    """
        Paste your auth token into 'auth_token' and your orcid into 'orcid'
    """
    auth_token: str = ""
    # Set the token in the request header
    options: dict = {"headers": {"Authorization": "Bearer " + auth_token}}
    # Create the Member Node Client
    client: MemberNodeClient_2_0 = MemberNodeClient_2_0('https://dev.nceas.ucsb.edu/knb/d1/mn/', **options)
    # Set your ORCID
    orcid: str = "http://orcid.org/0000-0002-1756-2128"

    # Create & upload a default package to dataone
    print("Creating initial package....")
    eml_pid: str = create_package(orcid)
    print("Created package with pid {}".format(eml_pid))

    # Create a pid for the new EML document
    new_eml_pid: str = str(uuid.uuid4())
    # Create the new EML
    new_eml_bytes: bytes = create_minimum_eml()

    try:
        new_eml_system_metadata = generate_system_metadata(pid=new_eml_pid,
                                           format_id='eml://ecoinformatics.org/eml-2.1.1',
                                           science_object=new_eml_bytes,
                                           orcid=orcid)

        client.update(eml_pid, io.BytesIO(new_eml_bytes), new_eml_pid, new_eml_system_metadata)
        print("Obsoleted {} with {}".format(eml_pid, new_eml_pid))
    except DataONEException as e:
        print('Error obsoleting package {} with {}. {}'.format(eml_pid, new_eml_pid, e))

Creating initial package....


InvalidToken: name: InvalidToken
errorCode: 401
detailCode: 1110
description: Session is required to WRITE to the Node.
traceInformation:
  < Accept-Encoding: gzip, deflate
  < Accept: */*
  < Authorization: Bearer 
  < Charset: utf-8
  < Connection: keep-alive
  < Content-Length: 1555
  < Content-Type: multipart/form-data; boundary=3fe41f709d5740c1a510e899b8933410
  < Host: dev.nceas.ucsb.edu
  < POST /knb/d1/mn/v2/object HTTP/1.1
  < User-Agent: DataONE-Python/3.5.0 +http://dataone.org/
  
  
  
  << Request body is not a string-like type >>
  > HTTP/1.1 401 401
  > Date: Mon, 10 Aug 2020 11:18:30 GMT
  > Server: Apache/2.4.29 (Ubuntu)
  > X-Frame-Options: SAMEORIGIN
  > X-Frame-Options: sameorigin
  > Vary: User-Agent,Authorization
  > Set-Cookie: JSESSIONID=83F4607F1F676FCC7155ED4759F56345; Path=/knb; Secure
  > Content-Length: 182
  > Access-Control-Allow-Origin: 
  > Access-Control-Allow-Headers: Authorization, Content-Type, Origin, Cache-Control
  > Access-Control-Allow-Methods: GET, POST, PUT, OPTIONS
  > Access-Control-Allow-Credentials: true
  > Keep-Alive: timeout=5, max=100
  > Connection: Keep-Alive
  > Content-Type: text/xml
  > 
  <?xml version="1.0" encoding="UTF-8"?><error detailCode="1110" errorCode="401" name="InvalidToken">
      <description>Session is required to WRITE to the Node.</description>
  </error>
identifier: <unset>
nodeId: <unset>
