From 3a751d007d29f514f7f93b89f954c9c3e2947200 Mon Sep 17 00:00:00 2001 From: Matt Jones Date: Fri, 29 Jun 2018 00:03:01 -0800 Subject: [PATCH] Completed fields for semantic annotation. See discssion in issue #25. --- src/test/resources/eml-data-paper.xml | 6 +- src/test/resources/eml-sample.xml | 56 ++++--- src/test/resources/eml-semantics.xml | 54 +++---- xsd/eml-attribute.xsd | 87 ++++++----- xsd/eml-entity.xsd | 7 +- xsd/eml-resource.xsd | 9 +- xsd/eml-semantics.xsd | 209 ++++++++++++++++---------- xsd/eml.xsd | 53 +++++++ 8 files changed, 290 insertions(+), 191 deletions(-) diff --git a/src/test/resources/eml-data-paper.xml b/src/test/resources/eml-data-paper.xml index 88076301..1ae69277 100644 --- a/src/test/resources/eml-data-paper.xml +++ b/src/test/resources/eml-data-paper.xml @@ -149,10 +149,8 @@ - http://purl.org/dc/elements/1.1/subject - Subject - http://purl.obolibrary.org/obo/ENVO_01000177 - grassland biome + http://purl.org/dc/elements/1.1/subject + http://purl.obolibrary.org/obo/ENVO_01000177 diff --git a/src/test/resources/eml-sample.xml b/src/test/resources/eml-sample.xml index 38d580b0..85a23f6d 100644 --- a/src/test/resources/eml-sample.xml +++ b/src/test/resources/eml-sample.xml @@ -48,6 +48,13 @@ 208-236-2933 inouye@lter.umn.edu + + + Mr. + Adam + Shepherd + + Old field grassland biomass @@ -56,10 +63,8 @@ species richness - http://purl.org/dc/elements/1.1/subject - Subject - http://purl.obolibrary.org/obo/ENVO_01000177 - grassland biome + http://purl.org/dc/elements/1.1/subject + http://purl.obolibrary.org/obo/ENVO_01000177 clarence.lehman @@ -84,7 +89,7 @@ https://www.nsf.gov/awardsearch/showAward?AWD_ID=1546024 - + CDR LTER-patterns among communities.txt patterns amoung communities at CDR @@ -301,22 +306,16 @@ - http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#ofCharacteristic - Characteristic - http://ecoinformatics.org/oboe/oboe.1.2/oboe-characteristics.owl#Mass - Mass + http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#ofCharacteristic + http://ecoinformatics.org/oboe/oboe.1.2/oboe-characteristics.owl#Mass - http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard - unit - http://ecoinformatics.org/oboe/oboe.1.2/oboe-standards.owl#Kilogram - Kilogram + http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard + http://ecoinformatics.org/oboe/oboe.1.2/oboe-standards.owl#Kilogram - http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#ofEntity - entity - http://example.com/example-vocab-1.owl#PlantSample - Plant Sample + http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#ofEntity + http://example.com/example-vocab-1.owl#PlantSample @@ -362,6 +361,20 @@ 22 + + + http://purl.org/dc/elements/1.1/subject + http://purl.obolibrary.org/obo/ENVO_01000177 + + + http://www.w3.org/1999/02/22-rdf-syntax-ns#type + https://schema.org/Person + + + https://schema.org/memberOf + https://doi.org/10.17616/R37P4C + + + + adam.shepherd + + + https://schema.org/memberOf + https://doi.org/10.17616/R37P4C + + + diff --git a/src/test/resources/eml-semantics.xml b/src/test/resources/eml-semantics.xml index 7d49771b..03b1d484 100644 --- a/src/test/resources/eml-semantics.xml +++ b/src/test/resources/eml-semantics.xml @@ -3,40 +3,34 @@ system="http://knb.ecoinformatics.org" xmlns:dt="eml://ecoinformatics.org/dataTable-2.2.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="eml://ecoinformatics.org/dataTable-2.2.0 - xsd/eml-dataTable.xsd"> + xsi:schemaLocation="eml://ecoinformatics.org/dataTable-2.2.0 xsd/eml-dataTable.xsd"> TEMPID234 Test Data This is a test entity that is fake. This is test data - - tmpair - Air Temperature - Air temperature at 1m from ground. - - float - - - celsius - 0.5 - - real - - - - - http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#ofCharacteristic - characteristic - http://ecoinformatics.org/oboe/oboe.1.2/oboe-characteristics.owl#Temperature - Temperature - - - http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#ofEntity - entity - http://purl.obolibrary.org/obo/ENVO_00002005 - air - - + + tmpair + Air Temperature + Air temperature at 1m from ground. + float + + + celsius + 0.5 + + real + + + + + http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#ofCharacteristic + http://ecoinformatics.org/oboe/oboe.1.2/oboe-characteristics.owl#Temperature + + + http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#ofEntity + http://purl.obolibrary.org/obo/ENVO_00002005 + + diff --git a/xsd/eml-attribute.xsd b/xsd/eml-attribute.xsd index 83566d08..c1863118 100644 --- a/xsd/eml-attribute.xsd +++ b/xsd/eml-attribute.xsd @@ -132,7 +132,7 @@ seems to fall in a gray area between the logical and physical aspects of stored data. Neither comfortable with eliminating it nor with making it required, the authors left it available but optional - under attribute. In addition, it is repeatable so that different + under attribute. In addition, it is repeatable so that different storage types can be provided for various systems (e.g., different databases might use different types for columns, even though the domain of the attribute is the same regardless of which database @@ -147,7 +147,7 @@ interoperability of dateTime formats. EML 2.0 tries to provide an unambiguous mechanism for describing the format of dateTime values by providing a separate category for date and time values. This - "dateTime" measurement scale allows users to explicitly label + "dateTime" measurement scale allows users to explicitly label attributes that contain Gregorian date and time values, and allows them to provide the information needed to parse these values into their appropriate components (e.g., days, months, years)./ @@ -287,7 +287,7 @@ Storage Type Storage type hint for this field This element describes the storage type, - for data in a RDBMS (or other data management system) field. + for data in a RDBMS (or other data management system) field. As many systems do not provide for fine-grained restrictions on types, this type will often be a superset of the allowed domain defined in @@ -376,7 +376,7 @@ Nominal scale - Characteristics used to define nominal + Characteristics used to define nominal (categorical) scale attributes This field is used for defining the characteristics of this variable if it is a @@ -398,7 +398,7 @@ Ordinal scale - Characteristics used to define ordinal + Characteristics used to define ordinal (ordered) scale attributes This field is used for defining the characteristics of this variable if it is an @@ -423,7 +423,7 @@ Interval scale - Characteristics used to define interval + Characteristics used to define interval scale attributes This field is used for defining the characteristics of this variable if it is an @@ -449,7 +449,7 @@ Ratio scale - Characteristics used to define ratio + Characteristics used to define ratio scale attributes This field is used for defining the characteristics of this variable if it is a @@ -476,7 +476,7 @@ Date/Time scale - Characteristics used to define date and + Characteristics used to define date and time attributes The dateTime field is used for defining the @@ -489,12 +489,12 @@ a fixed point) but they sometimes only behave as ordinals (because the calendar is not predetermined, for some dateTime values one can only find out the order of - the points and not the magnitude of the duration + the points and not the magnitude of the duration between those points). Thus, the dateTime scale provides the information necessary to properly understand and - parse date and time values without improperly + parse date and time values without improperly labeling them under one of the more traditional scales. - Date and time values are unlike any other measured values. + Date and time values are unlike any other measured values. Note that the dateTime field would not be used if one is recording time durations. In that case, one should use a standard unit such as seconds, nominalMinute or nominalDay, or a @@ -601,7 +601,7 @@ string for dateTime values allows one to accurately calculate the duration in SI second units between two measured dateTime values, assuming that the conversion software has a detailed knowledge of - the Gregorian calendar. + the Gregorian calendar. YYYY-MM-DDThh:mm:ss @@ -773,14 +773,11 @@ Each annotation consists of a propertyURI and valueURI that define the property and value of the statement. The subject of the statement is the eml:attribute that contains the - annotation. The propertyLabel and valueLabel can be used + annotation. The associated labels can be used to display the statement to users. Each URI is resolvable to a controlled vocabulary that provides a definition, relationships to other terms, and multiple - labels for displaying the statement. The propertyURI and propertyLabel - fields are optional. When they are omitted for an attribute defniition, - then users should assume thatthe property being asserted is - oboe:MeasurementType. + labels for displaying the statement. @@ -890,7 +887,7 @@ Standard Unit The name of a standard unit used to make this measurement - Use the standardUnit element if the unit for this attribute has + Use the standardUnit element if the unit for this attribute has been defined in the Standard Unit Dictionary. The list of "standard" units includes the SI base units and many compound units based on SI, plus and some commonly used units which are not SI. The list is by no means @@ -910,7 +907,7 @@ The name of a custom unit used to make this measurement. The customUnit element is for units that are - not part of the standard list provided with EML. The customUnit + not part of the standard list provided with EML. The customUnit must correspond to an id in the document where its definition is provided using the STMML syntax. The customUnit definition will most likely be in @@ -926,14 +923,14 @@ Precision The precision of the measurement. - Precision indicates how close together or how - repeatable measurements are. A precise measuring instrument will give - very nearly the same result each time it is used. This means that - someone interpreting the data should expect that if a measurement were - repeated, most measured values would fall within the interval specified + Precision indicates how close together or how + repeatable measurements are. A precise measuring instrument will give + very nearly the same result each time it is used. This means that + someone interpreting the data should expect that if a measurement were + repeated, most measured values would fall within the interval specified by the precision. The value of precision should be expressed in the - same unit as the measurement. For example, for an attribute with unit - "meter", a precision of "0.1" would be interpreted to mean that most + same unit as the measurement. For example, for an attribute with unit + "meter", a precision of "0.1" would be interpreted to mean that most repeat measurements would fall within an interval of 1/10th of a meter. 0.1 @@ -962,9 +959,9 @@ text domain is usually used for comment and notes attributes, and other character attributes that don't have a precise set of constrained values. This is an important field for post processing - and error checking of the dataset. It represents a formal - specification of the value space for the attribute, and so there - should never be a value for the attribute that falls outside of + and error checking of the dataset. It represents a formal + specification of the value space for the attribute, and so there + should never be a value for the attribute that falls outside of the set of values prescribed by the domain. @@ -1027,10 +1024,10 @@ Definition of the associated code The definition describes the - code with which it is associated in enough + code with which it is associated in enough detail for scientists to interpret the meaning of the coded values. - high density, above 10 per square + high density, above 10 per square meter @@ -1060,13 +1057,13 @@ Order - Mechanism for specifying what the - order of the code-definitions included should + Mechanism for specifying what the + order of the code-definitions included should be - Ordinal scale measurements have a discrete list - of values with a specific ordering of those values. This attributes - specifies that order from low to high. For example, for LOW, - MEDIUM, HIGH, the order attribute might be "LOW=1, MEDIUM=2 and + Ordinal scale measurements have a discrete list + of values with a specific ordering of those values. This attributes + specifies that order from low to high. For example, for LOW, + MEDIUM, HIGH, the order attribute might be "LOW=1, MEDIUM=2 and HIGH=3". @@ -1148,7 +1145,7 @@ The entityCodeList is a list of codes and their definitions in a data entity that is present in this dataset. The fields - specify exactly which entity it is, and which + specify exactly which entity it is, and which attributes of that entity contain the codes, their definitions, and the order of the values. @@ -1406,7 +1403,7 @@ description for the attribute. For example, if the format string is "YYYY-MM-DD", then a valid minimum in the domain would be "2001-05-29". The "bounds" element is optional, and - if it is missing then any legitimate value from the Gregorian + if it is missing then any legitimate value from the Gregorian calendar system is allowed in the attribute as long as its representation matches its corresponding formatString. @@ -1430,7 +1427,7 @@ minimum and maximum values of a numeric attribute. These are theoretical or permitted values (ie. prescriptive), and not necessarily the actual minimum and maximum observed in - a given data set (descriptive). + a given data set (descriptive). @@ -1445,7 +1442,7 @@ minimum and maximum values of a numeric attribute. These are theoretical or permitted values (ie. prescriptive), and not necessarily the actual minimum and maximum observed in - a given data set (descriptive). Either or both a minimum and maximum may + a given data set (descriptive). Either or both a minimum and maximum may be set, and each has an attribute "exclusive" to define how the value should be interpreted. @@ -1553,7 +1550,7 @@ For example, if the format string is "YYYY-MM-DD", then a valid minimum in the domain would be "2001-05-29". The "bounds" element is optional, and - if it is missing then any legitimate value from the Gregorian + if it is missing then any legitimate value from the Gregorian calendar system is allowed in the attribute as long as its representation matches its corresponding formatString. @@ -1570,7 +1567,7 @@ minimum and maximum dates of a dateTime attribute. These are theoretical or permitted values (ie. prescriptive), and not necessarily the actual minimum and maximum observed in - a given data set (descriptive). Either or both a minimum and maximum may + a given data set (descriptive). Either or both a minimum and maximum may be set, and each has an attribute "exclusive" to define how the value should be interpreted. @@ -1705,7 +1702,7 @@ The number type for this attribute consists of the 'integer' numbers, which are the natural numbers, plus the - zero value, plus the negatives of the natural numbers: ..., -4, -3, + zero value, plus the negatives of the natural numbers: ..., -4, -3, -2, -1, 0, 1, 2, 3, 4, ... @@ -1719,7 +1716,7 @@ The number type for this attribute consists of the 'real' numbers, which contains both the rational numbers - that can be expressed as fractions and the irrational numbers + that can be expressed as fractions and the irrational numbers that can not be expressed as fractions (such as the square root of 2). 4.1516 diff --git a/xsd/eml-entity.xsd b/xsd/eml-entity.xsd index 7a979863..8abc6647 100644 --- a/xsd/eml-entity.xsd +++ b/xsd/eml-entity.xsd @@ -294,15 +294,10 @@ is used to associate precise measurement semantics with the entity. Each annotation consists of a propertyURI and valueURI, which define a property and a value that apply to the entity. - The propertyLabel and valueLabel can be used to display the + The associated labels can be used to display the property and value to users. Each URI is resolvable to a controlled vocabulary that provides a definition, relationships to other terms, and multiple labels for displaying the statement. - The propertyURI and propertyLabel fields are optional. When - they are omitted for an entity defniition, - then users should assume that the property being asserted is - from the Dublin Core Element Set vocabulary and represents the - dc:subject for the entity. diff --git a/xsd/eml-resource.xsd b/xsd/eml-resource.xsd index 1f45881c..aea7a242 100644 --- a/xsd/eml-resource.xsd +++ b/xsd/eml-resource.xsd @@ -438,16 +438,11 @@ resource. Each annotation consists of a propertyURI and valueURI that define a property and value that apply to the resource. The subject of the statement is implicitly the - eml:resource that contains the annotation. The propertyLabel - and valueLabel can be used to display the statement to users. + eml:resource that contains the annotation. The associated + labels can be used to display the statement to users. Each URI is resolvable to a controlled vocabulary that provides a definition, relationships to other terms, and multiple labels for displaying the statement. - The propertyURI and propertyLabel fields are optional. When - they are omitted for a resource, - then users should assume that the property being asserted is - from the Dublin Core Element Set vocabulary and represents the - dc:subject for the entity. diff --git a/xsd/eml-semantics.xsd b/xsd/eml-semantics.xsd index b81b2268..d4712786 100644 --- a/xsd/eml-semantics.xsd +++ b/xsd/eml-semantics.xsd @@ -25,7 +25,8 @@
- The eml-semantics module - Semantic annotations for formalizing meaning + The eml-semantics module - Semantic annotations for formalized + statements about EML components The eml-semantics module defines types and elements for @@ -39,6 +40,14 @@ semantically that the attribute is measuring the property "Temperature" from a sample of the entity "Air", where both of those terms are defined precisely in controlled vocabularies. + The eml-semantics module defines an 'annotation' element and + associated type that can be used within EML resources (dataset, + software, etc.), EML Entities (dataTable, spatialRaster, + spatialVector, otherEntity), and EML Attributes. They can also + be applied within the EML additionalMetadata field to label + arbitrary structures within EML, in which case the subject of + the annotation is the element listed in the describes element + within the additionalMetadata field.
@@ -63,6 +72,14 @@ clarify the measurement semantics of an attribute (variable) within a data set, or associate a specific statement with a data table or dataset. + The eml-semantics module defines an 'annotation' element and + associated type that can be used within EML resources (dataset, + software, etc.), EML Entities (dataTable, spatialRaster, + spatialVector, otherEntity), and EML Attributes. They can also + be applied within the EML additionalMetadata field to label + arbitrary structures within EML, in which case the subject of + the annotation is the element listed in the describes element + within the additionalMetadata field. @@ -71,55 +88,77 @@ - - - - Property URI - The persistent URI used to identify a property from a vocabulary. - - The URI for a property that is drawn from a controlled - vocabulary, and that links the subject of this annotation - (which is implied by its parent elements) to the annotation value. - The propertyURI represents a semantically well-defined concept - that one wants to apply to an element within EML. The propertyURI - and its associated label are optional. If omitted, then the interpretation - of the associated valueURI depends on where the annotation occurs. - - http://ecoinformatics.org/oboe/oboe.1.2/oboe-characteristics.owl#ofCharacteristic - http://ecoinformatics.org/oboe/oboe.1.2/oboe-standards.owl#usesStandard - - - - - - - Property Label - - A human readable representation of the controlled property. - - - A label that provides a human readable representation of - the controlled property. The label is often used to represent - the controlled property when displaying annotations in - software. The label is often redundant with one or more labels - that are defined in the controlled vocabulary itself, - but is repreated here within the EML document to make it easy - for consumers to display the annotation to users. Other - labels, including labels in multiple languages, may often be - accessible from within the ontology or controlled - vocabularly accessible at the propertyURI. Property definitions - and relationships to other properties are also typically - provided within the vocabularly accessible at the propertyURI. - Software and people may be able to resolve the URI to find out - the definition and retrieve additional labels and other metadata - for presentation to users. - - has characteristic - uses protocol - - + + + + Property URI + The persistent URI used to identify a property from a vocabulary. + + The URI for a property that is drawn from a controlled + vocabulary, and that links the subject of this annotation + (which is implied by its parent element) to the annotation value. + The propertyURI represents a semantically well-defined concept + that one wants to apply to an element within EML. + These properties should be precisely defined, + including defining various relationships to related concepts. + Property URIs are typically drawn from formal ontologies that + provide well-defined logical semantics and provide for various + types of reasoning about equivalence and overlap. + Property URIs should be persistent and unique over decades, + and the meaning of the URI should + be stable over time as well. These URIs are usually constructed + using an additional abstraction layer via link resolvers, + rather than directly resolving to the term definition itself. + Thus, when resolving the propertyURI, software user agents should + be prepared to follow multiple redirects before finally + resolving the current location of the definition. + However, there is no requirement that propertyURIs need to be + resolvable, and metadata parsers should expect to find + propertyURIs that do not resolve and are only used as unique + URIs for the property (i.e., a name). + + + http://ecoinformatics.org/oboe/oboe.1.2/oboe-characteristics.owl#ofCharacteristic + http://ecoinformatics.org/oboe/oboe.1.2/oboe-standards.owl#usesStandard + + + + + + + + + Property Label + + A human readable representation of the controlled property. + + + A label that provides a human readable representation of + the controlled property. The label is often used to represent + the controlled property when displaying annotations in + software. The label is often redundant with one or more labels + that are defined in the controlled vocabulary itself, + but is repreated here within the EML document to make it easy + for consumers to display the annotation to users. Other + labels, including labels in multiple languages, may often be + accessible from within the ontology or controlled + vocabularly accessible at the propertyURI. Property definitions + and relationships to other properties are also typically + provided within the vocabularly accessible at the propertyURI. + Software and people may be able to resolve the URI to find out + the definition and retrieve additional labels and other metadata + for presentation to users. + + has characteristic + uses protocol + + + + + + - + Value URI @@ -146,44 +185,50 @@ However, there is no requirement that valueURIs need to be resolvable, and metadata parsers should expect to find valueURIs that do not resolve and are only used as unique - URIs for the value. + URIs for the value (i.e., a name) http://ecoinformatics.org/oboe/oboe.1.2/oboe-characteristics.owl#Mass http://ecoinformatics.org/oboe/oboe.1.2/oboe-standards.owl#Kilogram - - - - - Value Label - - A human readable representation of the controlled value. - - - A label that provides a human readable representation of - the controlled value. The label is often used to represent the - controlled value when displaying annotations in software. The - label is often redundant with one or more labels that are - defined in the controlled vocabulary itself, but is repreated - here within the EML document to make it easy for consumers to - display the annotation to users. Other labels, including - labels in multiple languages, may often be accessible from - within the ontology or controlled vocabularly accessible at - the valueURI. Value definitions and relationships to - other terms are also typically provided within the vocabularly - accessible at the valueURI. Software and people can resolve - the value URI to find out the definition of the value and - retrieve additional labels and other metadata about the value - for presentation to users. - - Mass - Kilogram - Net Primary Production - Carbon - Density - - + + + + + + + Value Label + + A human readable representation of the controlled value. + + + A label that provides a human readable representation of + the controlled value. The label is often used to represent the + controlled value when displaying annotations in software. The + label is often redundant with one or more labels that are + defined in the controlled vocabulary itself, but is repreated + here within the EML document to make it easy for consumers to + display the annotation to users. Other labels, including + labels in multiple languages, may often be accessible from + within the ontology or controlled vocabularly accessible at + the valueURI. Value definitions and relationships to + other terms are also typically provided within the vocabularly + accessible at the valueURI. Software and people can resolve + the value URI to find out the definition of the value and + retrieve additional labels and other metadata about the value + for presentation to users. + + Mass + Kilogram + Net Primary Production + Carbon + Density + + + + + + diff --git a/xsd/eml.xsd b/xsd/eml.xsd index 5b7ff0e5..187fdf04 100644 --- a/xsd/eml.xsd +++ b/xsd/eml.xsd @@ -11,6 +11,7 @@ xmlns:ds="eml://ecoinformatics.org/dataset-2.2.0" xmlns:cit="eml://ecoinformatics.org/literature-2.2.0" xmlns:sw="eml://ecoinformatics.org/software-2.2.0" + xmlns:sem="eml://ecoinformatics.org/semantics-2.2.0" targetNamespace="eml://ecoinformatics.org/eml-2.2.0"> @@ -19,6 +20,7 @@ + '$RCSfile: eml.xsd,v $' Copyright: 1997-2002 Regents of the University of @@ -176,6 +178,57 @@ + + + + Semantic Annotation List + A list of precisely-defined semantic statements about this resource. + + A list of annotations defining precise semantics + for parts of this resource. The list consists of a set + of annotation elements, each referring to a subject within + the EML document using its references attribute, which provides + a pointer to the id of the element being annotated. + + + + + + + + + Semantic Annotation + A precisely-defined semantic statement about + an element in the EML document. + + An annotation represents a precisely-defined semantic + statement that applies to this resource. This semantic + statement is used to associate precise semantics with a + particular element in the EML document. The id of the + element being annotated is listed in the references attribute, + and must point to a unique id within the EML document. + Each annotation consists of a propertyURI and + valueURI that define a property and value that apply to the + resource. The subject of the statement is implicitly the + element that is listed in the references attribute. + The associated labels can be used to display the statement to users. + Each URI is resolvable to a controlled vocabulary + that provides a definition, relationships to other terms, and + multiple labels for displaying the statement. + + + + + + + + + + + + + +