# Extract Distribution information from ISO 19139 metadata

This notebook is opened with a documentID used to pull an ISO XML record from the CINERGI catalog;
The record is parsed to extract distribution information and generate a dispatchList object

The dispatch list object gets passed to a dispatcher that accesses mapping between endpoint applications and application profiles in the dispatchList.


In [4]:
%%javascript
function getQueryStringValue (key)
{  
    return unescape(window.location.search.replace(new RegExp("^(?:.*[&\\?]" + escape(key).replace(/[\.\+\*]/g, "\\$&") + "(?:\\=([^&]*))?)?.*$", "i"), "$1"));
}
IPython.notebook.kernel.execute("documentID='".concat(getQueryStringValue("documentId")).concat("'"));
IPython.notebook.kernel.execute("user='".concat(getQueryStringValue("user")).concat("'"));
IPython.notebook.kernel.execute("full_notebook_url='" + window.location + "'"); 

<IPython.core.display.Javascript object>

In [6]:
# use hardwired values for testing


catalogURL = "http://cinergi.sdsc.edu/geoportal/"
if (len(documentID)==0):
    #documentID="e3619c5df2644204b67f51f48525a0b1"
    documentID="4db8156abb6d4119aa5c35aa39514b42"

In [7]:
url_partitioned = full_notebook_url.partition('ISOmetadata-ExtractDistributions.ipynb')
base_url = url_partitioned[0];

print("User: ",user)
print("DocumentID: ", documentID)
print("full notebook url partition", url_partitioned)
print("full notebook url", full_notebook_url)

User:  
DocumentID:  4db8156abb6d4119aa5c35aa39514b42
full notebook url partition ('http://suave-jupyterhub.com/user/zeppelin-v/notebooks/DispatchTesting/', 'ISOmetadata-ExtractDistributions.ipynb', '')
full notebook url http://suave-jupyterhub.com/user/zeppelin-v/notebooks/DispatchTesting/ISOmetadata-ExtractDistributions.ipynb


In [8]:

#import xmltodict
import requests
import json
from lxml import etree  #supposed to be better than xml.etree
from io import StringIO,BytesIO

#get the url to retrieve xml record from catalog
metadataURLx=catalogURL + 'rest/metadata/item/' + documentID + '/xml'

print ("metadata URL: ", metadataURLx)

#get the xml record
the_page = requests.get(metadataURLx)


metadata URL:  http://cinergi.sdsc.edu/geoportal/rest/metadata/item/4db8156abb6d4119aa5c35aa39514b42/xml


In [9]:
# use this to generate JSON representation of the metadata record
#the_isojson = json.loads(json.dumps(xmltodict.parse(the_page.text)))

#print(the_isojson.keys())
#print(the_isojson["gmi:MI_Metadata"])

In [10]:
#set up namespace map for ISO metadata
NSMAP = {"gmi":"http://www.isotc211.org/2005/gmi" ,
    "gco":"http://www.isotc211.org/2005/gco" ,
    "gmd":"http://www.isotc211.org/2005/gmd" ,
    "gml":"http://www.opengis.net/gml" ,
    "gmx":"http://www.isotc211.org/2005/gmx" ,
    "gts":"http://www.isotc211.org/2005/gts" ,
    "srv":"http://www.isotc211.org/2005/srv" ,
    "xlink":"http://www.w3.org/1999/xlink"}

In [11]:
#root = etree.fromstring(the_page.text)

#tree is an element tree
tree = etree.parse(metadataURLx)
#root = etree.tostring(tree.getroot())
root = tree.getroot()
docinfo = tree.docinfo
print(docinfo.xml_version)
print(tree.findall("//gmd:MD_DigitalTransferOptions",namespaces=NSMAP))



1.0
[<Element {http://www.isotc211.org/2005/gmd}MD_DigitalTransferOptions at 0x7f1af0e33f88>]


In [12]:
#iterate through digital transfer options and set up dispatch object
# dispatch list is a list of 'options' consisting of 
# {an application profile (string, from EC resource registry) that the disptcher will use to identify target notebooks, 
#   the URL for the information resource input to the target for that profile}
# e.g. dispatchlist = [{"profile":"profile1","url":"url1"}, {"profile":"profile2","url":"url2"}]

dispatchlist = []

for  elt in tree.getiterator("{http://www.isotc211.org/2005/gmd}MD_DigitalTransferOptions"):
    # only want OnlineResources that are in distribution//MD_DigitalTransferOptions
    #  TBD-- figure out what to do with CI_OnlineResource inside SV_OperationMetadata
    #print elt.text
#iterate through CI_OnlineResource elements
    for onlineres in elt.getiterator("{http://www.isotc211.org/2005/gmd}CI_OnlineResource"):
        if (onlineres.find("gmd:linkage/gmd:URL",namespaces=NSMAP) is not None):
            theURL=onlineres.find("gmd:linkage/gmd:URL",namespaces=NSMAP).text
        else:
            continue #don't bother if there's no URL!
        
        if (onlineres.find("gmd:name/gco:CharacterString",namespaces=NSMAP) is not None):
            thename=onlineres.find("gmd:name/gco:CharacterString",namespaces=NSMAP).text
        else:
            thename=''
        
        if (onlineres.find("gmd:description/gco:CharacterString",namespaces=NSMAP) is not None):
            thedescription=onlineres.find("gmd:description/gco:CharacterString",namespaces=NSMAP).text
        else:
            thedescription=''
            
        if (onlineres.find("gmd:protocol/gco:CharacterString",namespaces=NSMAP) is not None):
            theprotocol=onlineres.find("gmd:protocol/gco:CharacterString",namespaces=NSMAP).text
        else:
            theprotocol=''
        
        if (onlineres.find("gmd:applicationProfile/gco:CharacterString",namespaces=NSMAP) is not None):
            theappprofile=onlineres.find("gmd:applicationProfile/gco:CharacterString",namespaces=NSMAP).text
        else:
            theappprofile=''
            
        if (onlineres.find("gmd:function/gmd:CI_OnLineFunctionCode",namespaces=NSMAP) is not None):
            thefunctioncode=onlineres.find("gmd:function/gmd:CI_OnLineFunctionCode",namespaces=NSMAP).get("codeListValue")
        else:
            thefunctioncode=''
            
        if (onlineres.find("gmd:function/gmd:CI_OnLineFunctionCode",namespaces=NSMAP) is not None):    
            thefunctiontext=onlineres.find("gmd:function/gmd:CI_OnLineFunctionCode",namespaces=NSMAP).text
        else:
            thefunctiontext=''
            
        print(theURL,thename,thedescription,theprotocol,theappprofile,thefunctioncode,thefunctiontext)
        
        # series of tests to determine what application profiles are applicable for this online resource
        #check for OGC WFS Web feature service
        if (theprotocol.lower().find('wfs')>-1 or
           theURL.lower().find('service=wfs')>-1):       
            # append to dispatchlist
            # wfs disptacher gets the base URL for the service
            dispatchlist.append({"profile":"wfsclient","url":theURL.split('?')[0]})
            
        #check for OGC WMS; open in QGIS, ArcGIS, or OpenLayers web client
        if (theprotocol.lower().find('wms')>-1 or
           (theURL.lower().find('service=wms')>-1 and theURL.lower().find('request=kml')==-1) ):
            #kml test is because of GeoServer handling of kml response for wms
            # append to dispatchlist
            # wfs disptacher gets the base URL for the service
            dispatchlist.append({"profile":"wmsclient","url":theURL.split('?')[0]})
            
        # KML client-- open in GoogleEarth or ?OpenLayers? kml client
        if (thedescription.lower().find('kml download')>-1 or
           (theURL.lower().find('request=kml')>-1 and theURL.lower().find('mode=download')>-1) or
           theURL.lower().find('.kml')>-1 or theURL.lower().find('.kmz')>-1):
            #kml test for GeoServer handling of kml response for wms
            # append to dispatchlist
            # wfs disptacher gets the base URL for the service
            dispatchlist.append({"profile":"kmlclient","url":theURL})
            
        # other http URL-- check if the URL works
        if (thedescription.lower().find('kml download')>-1 or
           (theURL.lower().find('request=kml')>-1 and theURL.lower().find('mode=download')>-1) or
           theURL.lower().find('.kml')>-1 or theURL.lower().find('.kmz')>-1):
            #kml test for GeoServer handling of kml response for wms
            # append to dispatchlist
            # wfs disptacher gets the base URL for the service
            dispatchlist.append({"profile":"kmlclient","url":theURL})
            
print(dispatchlist)

https://www.sciencebase.gov/catalog/item/5032ab9de4b0d64661a77224 ScienceBase Item Summary Page Link to the ScienceBase Item Summary page for the item described by this metadata record WWW:LINK-1.0-http--link Web Browser information information
https://www.sciencebase.gov/catalogMaps/mapping/ows/5032ab9de4b0d64661a77224?mode=download&request=kml&service=wms&layers=WYSagegrouse_currentdistribution KML Service KML Download KML Web Browser information information
https://www.sciencebase.gov/catalogMaps/mapping/ows/5032ab9de4b0d64661a77224?service=wms&request=getcapabilities&version=1.3.0 ScienceBase WMS Service OGC Service Capabilities URL OGC:WMS Web Browser information information
https://www.sciencebase.gov/catalogMaps/mapping/ows/5032ab9de4b0d64661a77224?service=wfs&request=getcapabilities&version=1.0.0 ScienceBase WFS Service OGC Service Capabilities URL OGC:WFS Web Browser information information
https://www.sciencebase.gov/catalog/file/get/5032ab9de4b0d64661a77224 Download Attached

Call the dispatcher with the dispatchlist
The dispatcher will need to access registry with mapping from application profile values to endpoints that will 'open' the url associated with that profile in the dispatch option.

In the long run, the dispatcher should be a separate component accessed via URL; start with it hard wired here.



In [13]:
for option in dispatchlist:
    if (option['profile']=='wfsclient'):
        #offer links for apps that consume generic WFS
        print('got wfs')

got wfs
