# Creating CIM v2 documents from CMIP6 spreadsheet

## Setup

In [1]:
import glob
import inspect
import os
import uuid

import pyesdoc
import pyesdoc.ontologies.cim as cim
import xlrd

ES-DOC :: pyesdoc :: INFO > Loading pyesdoc config from: /Users/macg/dev/esdoc/ops/config/pyesdoc.conf


In [2]:
# Path to test folder.
_HOME = os.path.join(os.path.expanduser("~"), "esdoc-test")

# Path to workbook being converted to CIM v2 documents.
_WORKBOOK_FPATH = os.path.join(_HOME, "CMIP6Experiments.xlsx")

# Workbook being converted to CIM v2 documents.
_WORKBOOK = None

# Name of relevant worksheets to be found within workbook.
_WS_ENSEMBLE_REQUIREMENT = "EnsembleRequirement"
_WS_EXPERIMENT = "experiment"
_WS_FORCING_CONSTRAINT = "ForcingConstraint"
_WS_PARTY = "party"
_WS_REFERENCES = "references"
_WS_REQUIREMENT = "requirement"
_WS_TEMPORAL_CONSTRAINT = "TemporalConstraint"
_WS_URL = "url"

# Default document project code.
_DOC_PROJECT = 'CMIP6-TEST'

# Default document source.
_DOC_SOURCE = 'test-script'

# Default document author.
_DOC_AUTHOR = pyesdoc.create(cim.v2.Party,
                             source=_DOC_SOURCE,
                             uid='253825f3-fbc8-43fb-b1f6-cc575dc693eb')
_DOC_AUTHOR.email = u"charlotte.pascoe@stfc.ac.uk"
_DOC_AUTHOR.name = u"Charlotte Pascoe"

## Helper functions to extract data from workbook

In [3]:
def _get_workbook():
    """Returns pointer to workbook object.
    
    """
    global _WORKBOOK
    
    if _WORKBOOK is None:
        _WORKBOOK = xlrd.open_workbook(_WORKBOOK_FPATH)

    return _WORKBOOK


def _get_ws(ws_name):
    """Returns pointer to a named worksheet.
    
    """
    return _get_workbook().sheet_by_name(ws_name)


def _get_ws_rows(ws_name):
    """Returns collection of rows within a named worksheet.
    
    """
    return enumerate(_get_ws(ws_name).get_rows())


def _get_ws_data(ws_name):
    """Returns collection of rows within a named worksheet that correspond to actual data.
    
    """
    for idx, row in _get_ws_rows(ws_name):
        if idx > 0 and len(row[0].value) > 0:
            yield row

            
def _get_ws_col_map(ws_name):
    """Returns map of column index to column names - supports situation when user reorders columns.
    
    """
    for idx, row in _get_ws_rows(ws_name):
        if idx == 0:
            return {col.value: col_idx for col_idx, col in enumerate(row)} 


def _get_ws_document(row, col_map, doc_type, doc_mappings):
    """Returns a cim document from a spreadsheet row.
    
    """
    # Create document.
    doc = pyesdoc.create(doc_type,
                         project=_DOC_PROJECT,
                         source=_DOC_SOURCE,
                         author=_DOC_AUTHOR)
    
    # Apply attribute mappings.
    for mapping in doc_mappings:
        # Unpack mapping info.
        cell_value_convertor = None
        if isinstance(mapping, tuple):
            mapping, cell_value_convertor = mapping
        mapping = mapping.split(":")
        doc_attr = mapping[0]
        col_name = mapping[0] if len(mapping) == 1 else mapping[1]
        
        # Get cell value.
        cell_value = row[col_map[col_name]].value
        if cell_value_convertor:
            cell_value = cell_value_convertor(cell_value)
                    
        # Set document attribute.
        setattr(doc, doc_attr, cell_value)

    return doc


def _get_ws_documents(ws_name, doc_type, doc_mappings):
    """Returns set of cim documents within a spreadsheet."""
    result = list()
    col_map = _get_ws_col_map(ws_name)
    for row in _get_ws_data(ws_name):
        result.append(_get_ws_document(row, col_map, doc_type, doc_mappings))

    return result

## Declare cell value convertors

In [4]:
def _convert_to_bool(value):
    """Converts a cell value to a boolean."""
    if len(value) == 0:
        return False
    return unicode(value).lower() in [u'true', u't', u'yes', u'y', u"1"]

def _convert_to_time_period(value):
    """Converts a cell value to a cim.v2.TimePeriod instance"""
    d = cim.v2.TimePeriod()
    d.length = int(value.split(" ")[0])
    d.units = value.split(" ")[1]
    d.date_type = u'unused'

    return d

## Map worksheets to CIM v2 documents

In [5]:
def _get_parties():
    """Returns set of resposible parties definied within workbook.
    
    """
    def _convert_url(value):
        """Converts a cell value to a cim.v2.OnlineResource instance"""
        if len(value) == 0:
            return None
        d = cim.v2.OnlineResource()
        d.name = value
        return d
    
    mappings = [
        ("address"),
        ("email"),
        ("name"),
        ("organisation", _convert_to_bool),
        ("url", _convert_url)
    ]
    
    return _get_ws_documents(_WS_PARTY, cim.v2.Party, mappings) 

In [6]:
def _get_temporal_constraints():
    """Returns set of temporal constraints definied within workbook.
    
    """
    mappings = [
        ("canonical_name"),
        ("conformance_is_requested:conformance_requested", _convert_to_bool),
        ("duration:required_duration", _convert_to_time_period),
        ("name"),
    ]
    
    return _get_ws_documents(_WS_TEMPORAL_CONSTRAINT, cim.v2.TemporalConstraint, mappings)

In [7]:
def _get_forcing_constraints():
    """Returns set of temporal constraints definied within workbook.
    
    """
    mappings = [
        ("canonical_name"),
        ("conformance_is_requested:conformance_requested", _convert_to_bool),        
        ("forcing_type"),
        ("name"),
    ]
    
    return _get_ws_documents(_WS_FORCING_CONSTRAINT, cim.v2.ForcingConstraint, mappings)

## Display document validation errors

In [8]:
def _validate(target):
    """Displays document validation errors."""
    # Invoke document factory functions.
    if inspect.isfunction(target):
        target = target()

    # Set document collection.
    try:
        iter(target)
    except TypeError:
        docs = [target]
    else:
        docs = target
    
    # Validate document collection.
    for doc in docs:
        for err_idx, err in enumerate(pyesdoc.validate(doc)):
            if err_idx == 0:
                print("Document errors: {} :: {} :: v{}".format(doc.type_key, doc.meta.id, doc.meta.version))
            print("\t{}".format(err))    

In [9]:
_validate(_get_parties)

Document errors: cim.2.shared.Party :: e1772b67-71f5-441e-b9e5-0c94c971bf52 :: v0
	doc.url.linkage --> is null
Document errors: cim.2.shared.Party :: 4377800e-e93a-45be-8cc1-9fe10a8d0deb :: v0
	doc.url.linkage --> is null
Document errors: cim.2.shared.Party :: 52d5636e-bf6c-4bfa-a353-8f89b5a3aed7 :: v0
	doc.url.linkage --> is null
Document errors: cim.2.shared.Party :: f10313a3-7c15-48db-b815-7cf930b39dff :: v0
	doc.url.linkage --> is null
Document errors: cim.2.shared.Party :: b9fee1d6-cd0f-438e-874f-eb4fb2336368 :: v0
	doc.url.linkage --> is null
Document errors: cim.2.shared.Party :: a746b1fc-5722-47c7-b3e2-4c0d28dd8f34 :: v0
	doc.url.linkage --> is null
Document errors: cim.2.shared.Party :: 5ec01ba0-c6bf-46f1-8ce5-aed30543ed8b :: v0
	doc.url.linkage --> is null
Document errors: cim.2.shared.Party :: 91f051d5-0ce5-4940-96b5-5f7f001eda5c :: v0
	doc.url.linkage --> is null
Document errors: cim.2.shared.Party :: 718a1e5c-63d2-46e6-a415-6f10ae22ac12 :: v0
	doc.url.linkage --> is null
D

In [10]:
_validate(_get_temporal_constraints)

In [11]:
_validate(_get_forcing_constraints)

Document errors: cim.2.activity.ForcingConstraint :: d50536c8-df6b-4511-8b32-e7b5201e30ed :: v0
	doc.category --> is null
	doc.code --> is null
Document errors: cim.2.activity.ForcingConstraint :: afae1b19-22c6-4e6e-8106-2ee962466167 :: v0
	doc.category --> is null
	doc.code --> is null
Document errors: cim.2.activity.ForcingConstraint :: 0f1e5594-f896-4080-8764-340599a7a533 :: v0
	doc.category --> is null
	doc.code --> is null
Document errors: cim.2.activity.ForcingConstraint :: cce7bebd-a3f3-4fda-9378-1bb7ad279d57 :: v0
	doc.category --> is null
	doc.code --> is null
Document errors: cim.2.activity.ForcingConstraint :: 8dc4095d-bb23-476a-adfb-81d3afbcdcdb :: v0
	doc.category --> is null
	doc.code --> is null
Document errors: cim.2.activity.ForcingConstraint :: fc1376ad-c032-41ba-abe1-fc2f8a2be321 :: v0
	doc.category --> is null
	doc.code --> is null
Document errors: cim.2.activity.ForcingConstraint :: 34518024-5a1b-4317-aa17-ec271e94822d :: v0
	doc.category --> is null
	doc.code --> 

## Save CIM documents to file system 

In [12]:
# Set I/O directory.
pyesdoc.set_option("output_dir", _HOME)

In [13]:
# Build collection of documents to be written to file system.
docs = sorted(_get_temporal_constraints() + _get_forcing_constraints())

In [14]:
# Write document set to file system.
for doc in sorted(docs):
    print pyesdoc.write(doc)

/Users/macg/esdoc-test/cim.2.activity.ForcingConstraint_26357051-e72a-47af-9dfa-cb0a47fe325d_0.json
/Users/macg/esdoc-test/cim.2.activity.ForcingConstraint_aba7c455-c4d0-48a7-a5ee-7414530b7b06_0.json
/Users/macg/esdoc-test/cim.2.activity.ForcingConstraint_3ae36ece-0c02-42f6-a17a-167b24a152ec_0.json
/Users/macg/esdoc-test/cim.2.activity.ForcingConstraint_8c242920-0929-4ee4-ad3c-854398291b2d_0.json
/Users/macg/esdoc-test/cim.2.activity.ForcingConstraint_5927b096-78fa-4533-b5b1-068803f2f30c_0.json
/Users/macg/esdoc-test/cim.2.activity.ForcingConstraint_4e6c7c3a-04f6-4267-975a-731ff68b042a_0.json
/Users/macg/esdoc-test/cim.2.activity.ForcingConstraint_d494ae2c-98b8-4ccc-99e4-f71e100a5878_0.json
/Users/macg/esdoc-test/cim.2.activity.ForcingConstraint_90d1bf1c-4594-475d-a2a1-f977e141500a_0.json
/Users/macg/esdoc-test/cim.2.activity.ForcingConstraint_471184b2-2354-48d6-a88b-6801037ae567_0.json
/Users/macg/esdoc-test/cim.2.activity.ForcingConstraint_0cd377ef-2e4f-4690-b163-bd682b35fcea_0.json


In [15]:
# Read from file system
docs = map(pyesdoc.read, glob.glob(os.path.join(_HOME, "*.json")))
for doc in sorted(docs):
    print doc

<pyesdoc.ontologies.cim.v2.typeset_for_activity_package.ForcingConstraint object at 0x10d718450>
<pyesdoc.ontologies.cim.v2.typeset_for_activity_package.ForcingConstraint object at 0x10d733890>
<pyesdoc.ontologies.cim.v2.typeset_for_activity_package.ForcingConstraint object at 0x10d799190>
<pyesdoc.ontologies.cim.v2.typeset_for_activity_package.ForcingConstraint object at 0x10d7ab350>
<pyesdoc.ontologies.cim.v2.typeset_for_activity_package.ForcingConstraint object at 0x10d7d0390>
<pyesdoc.ontologies.cim.v2.typeset_for_activity_package.ForcingConstraint object at 0x10d7d0490>
<pyesdoc.ontologies.cim.v2.typeset_for_activity_package.ForcingConstraint object at 0x10d7d04d0>
<pyesdoc.ontologies.cim.v2.typeset_for_activity_package.ForcingConstraint object at 0x10d7d0610>
<pyesdoc.ontologies.cim.v2.typeset_for_activity_package.ForcingConstraint object at 0x10d7d0710>
<pyesdoc.ontologies.cim.v2.typeset_for_activity_package.ForcingConstraint object at 0x10d7d0a10>
<pyesdoc.ontologies.cim.v2.typ

In [16]:
# Clean up file system.
for fpath in glob.glob(os.path.join(_HOME, "*.json")):
    os.remove(fpath)