From 74963d64d1294f4dbc61341021967254535d46da Mon Sep 17 00:00:00 2001 From: grammarware Date: Mon, 9 Aug 2010 13:38:15 +0000 Subject: [PATCH] XPath (and W3C in general) mapping progresses git-svn-id: https://slps.svn.sourceforge.net/svnroot/slps@819 ab42f6e0-554d-0410-b580-99e487e6eeb2 --- shared/xsd/ldf.xsd | 1572 +++++++++++++------------- topics/documents/xpath/spec2ldf.xslt | 436 ++++--- 2 files changed, 1053 insertions(+), 955 deletions(-) diff --git a/shared/xsd/ldf.xsd b/shared/xsd/ldf.xsd index aa79730e..70a828a5 100644 --- a/shared/xsd/ldf.xsd +++ b/shared/xsd/ldf.xsd @@ -1,13 +1,13 @@ - - - - - + + + + + Language Document Format - + This is basically a metamodel for language documents or documentations. Sample program tags: @@ -16,95 +16,95 @@ - Generation request for given grammar and nonterminal (and perhaps version, more control) - Sample suite extraction request (what are the constraints on the requested sample?) - - - - - - A document is essentially a sequence of several parts, such as title page, front matter, - lexical and syntax sections, placeholders for generated content and various lists. - - - - - - - - - - - - - - - - - - - - The ``title page'' can in reality be rendered as several pages, but it contains the basic - information that helps to identify this particular language definition and to distinguish - it from similar documents. In our experience, language standards are either marked as - organisation-created or person-authored ones. In the former case, the document must contain - the name of the organisation and the standard reference number within it (e.g, ISO 10279). - In the latter case, one or more authors are specified. It is also possible to mark some sections - as having been authored by a specific set of authors, but the ones defined here are the principal - authors that identify the specific standard. + + + + + + A document is essentially a sequence of several parts, such as title page, front matter, + lexical and syntax sections, placeholders for generated content and various lists. - - The ``topic'' of the language document is its pure textual title without the reference number - and status: e.g., ``Programming Language REXX'', - ``Information technology --- Programming languages --- Full BASIC'', etc. - Either version or edition follows. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + The ``title page'' can in reality be rendered as several pages, but it contains the basic + information that helps to identify this particular language definition and to distinguish + it from similar documents. In our experience, language standards are either marked as + organisation-created or person-authored ones. In the former case, the document must contain + the name of the organisation and the standard reference number within it (e.g, ISO 10279). + In the latter case, one or more authors are specified. It is also possible to mark some sections + as having been authored by a specific set of authors, but the ones defined here are the principal + authors that identify the specific standard. + + + The ``topic'' of the language document is its pure textual title without the reference number + and status: e.g., ``Programming Language REXX'', + ``Information technology --- Programming languages --- Full BASIC'', etc. + Either version or edition follows. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + According to W3C Process Document, each viable specification goes through the stages of Working Draft (WD), Candidate Recommendation (CR), Proposed Recommendation (PR), W3C Recommendation (REC), with possible continuation to Proposed Edited Recommendation and decline to Rescinded Recommendation. There has also been a ``Note'' status in the past for internal drafts. - + IEEE uses different publication types, they are: Changed Designation, Collection, @@ -121,7 +121,7 @@ Superseded, Withdrawn (IEEE Status Report). - + ISO/IEC operates with the following standard ratification stages: Approved Work Item (AWI), Working Draft (WD), Committee Draft (CD), @@ -137,584 +137,590 @@ Technical Report (ISO TR), Technical Specification (ISO TS). - + Some standardisation bodies like ANSI or ECMA do not have a long list of stages, the standard there is just either approved or not. Of course, it still can be revised, reaffirmed, withdrawn or be put into a category of technical reports. - + We summarise these and possibly other sets of statuses by the following enumeration. Conceptually it provides functionality for the same categories, but the concrete wording may vary (i.e., ``errata'' vs ``corrigenda'', ``obsolete'' vs ``rescinded''). If necessary, the schema can be extended with more standard publication types or even adapted to fit completely in some specific standardisation body classification. - - - - - - - - - - - - - - - - - - - - - - - - Previous versions of the same standard. - - - - - - - - - - - - - - - - - Document part. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Placeholders are used to designate places where generated content - should be inserted. - - - - - - - - - - - - - - - - - - - - - - Entities such as table of contents can be generated by the infrastructure automatically, - ensuring internal consistency and coherence. However, the language documentation creators - should be able to specify the places where such data needs to be inserted. + + + + + + + + + + + + + + + + + + + + + + + + Previous versions of the same standard. + + + + + + + + + + + + + + + + + Document part. - - - - - - - - - - - - - - - - Innermost sections do not have to belong to a certain pre-defined type - (like ``synopsis'' or ``design goals''), but they can have a title - and a possible list of authors which contributed directly to this section. - - - - - - - - - - - - - - - Just like scientific papers, some of language documents can have a very short - abstract that summarises their goals and covered topics in one or two - sentences. - - - - - - - Whatever the authors deem to be important enough to be put on one of the first - pages. For example, in C\# specifications Foreword is about the differences brought - to the language by the current standard, while in the Scheme specification - Foreword discusses programming languages design and demonstrates its principles - applied to the forthcoming document. - - - Technically speaking, Foreword is not a part of the specification. Instead, it - precedes the specification and introduces it by putting in the proper context. - - - - - - - Conformance (compliance) section defines several levels of compliance by - explaining what is a conforming program and a conforming - implementation with respect to this standard. - - - Definitions for meta-terms like "shall" and "should" - and their relation to the compliance issue explained above. - - - - - - - While conformance/compliance define how external artifacts should - conform to this standard, this section defines how this standard - complies with previously existing ones. - - - - - - - The goals of language design are sometimes encountered being explicitly - stated in the language document in one of the informative sections - of the front matter part. - - - For example: - "C\# is intended to be a simple, modern, general-purpose, object-oriented - programming language." - (from ECMA 334 3rd edition, page xvii) - - - - - - - This section informally describes how the document is organised, divided - into parts and chapters. Sometimes it explicitly states which sections are - normative and which are informative. - - - - - - - - - Formally lined up references to all other standards that are used or referenced to - from within the document. - - - - - - - Notation section defines grammar definition formalism used in the document: - mostly it is about the EBNF dialect. - - - - - - - Scope section explains the context for the language document. - - - - - - - A list of changes brought to the language by the current specification - replacing the previous one. - - - - - - - - - - Sections describing lexical structure tend to be shorter, less structured inside, - and very limited in scope: there is usually one lexical section dedicated to whitespace, - one lexical section about tokens, one about literals, one about comments, etc. - - - See the section on grammar notation for more details about how broad even the smallest - aspects (e.g., about line continuations) can vary. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - In big documents it is not uncommon to find one topic divided into - several subtopics, each one dedicated to a separate issue and each one - structured in the same way its parent section is. - DSLs and 4GLs specification authors often find it easier to lay out - different clauses of one language construct in different sections. - When the language has a lot of parametrised constructs, it makes sense - to dedicate a special subsection for each field. - Parameters, types, methods, operations, participants---whatever categories - inspire these subtopics, each of them is a fully structured section in itself. - - - - - - - - - - - - Even hardbound standards contain hypertext-like elements. - One of them is attaching a list of links - and backlinks to every section---that way, one can easily - find any related language constructs when learning the - language or debugging a particular feature. - - - - - - - Some subsections can be generated, especially those that consist - of structured content that is possible to derive automatically - from the information stored in or collected from other sections. - - - - - - - A singled named value can be bound to a language document section: it can be - a type of a parameter that is being described here, or an alternative name, - or a superclass, or anything else that is atomic and non-structured. - - - - - - - - - - - - - - - Normative sections are obligatory and usually contain strict content - that needs to be implemented by compiler developers or satisfied - by language end users in order to comply to this standard. - - - For instance, if a section with a grammar production is marked as - normative, this production must hold for the implemented language. - - - - - - - Informative sections are supplementary and provide some useful information - that can be omitted or overridden if deemed appropriately. - - - For instance, if all sections with code samples are marked as informative, - we cannot rely on the set of examples extracted from the standard to be - the test set for the language. Apparently, we can still run some analyses - on the basis of this, but it is not legitimate to make any conclusions - about standard inconsistencies based on the informative sections, nor can - they be legitimately be used to resolve inner conflicts of the documentation. - - - - - - - - - - - - - Normative sections form the core of the language standard: for each core section they belong to, - they define the purpose of the language construct, provide a description, a syntax definition, - list use constraints and other semantic details, etc. - - - - - - - A separate subsection named ``purpose'' is only encountered in some 4GL - language manuals (e.g., JCL). However, it is quite common for the first - paragraph of any new section of any language document to briefly introduce - the purpose of the language construct that is about to be described. - - - - - - - Description is the core of the parent section, containing the main details about the - defined topic, information about its usage, motivation behind its design. - - - - - - - Syntax sections consist of one or more BGF productions, possibly complemented by textual descriptions. - - - - - - - This section can list requirements needed for using a specific language construct, - applicability constraints and other restrictions. - - - - - - - Related language constructs can be named and referenced here. - It is not a simple list of references, but rather a comprehensive - overview on the kind of consequences other parts of the language - can bear if this one is used. - - - - - - - A section on semantics explains in plain English, if no other specific formalism - is used, how exactly the language construct works, what happens inside the system when - it is utilised. It also describes the context in which the introduced language construct - can be encountered and in which it should or should not be used. - - - - - - - It is quite common in the sections that describe an optional parameter - to tell the reader what will happen in the case nothing was specified. - ``There is no default'' can be as valid a definition as a real default value. - - - - - - - - - - - Unlike normative sections that impose some conformance constraints that need - to be satisfied by a language processor to claim compatibility with the standard, - the informative sections are only presented to provide some additional information - to the reader. - - - - - - - A subsection with an example can contain a code sample as well - as some accompanying text. - - - - - - - A rationale or a note usually lists some narrow places of - non-obvious usage, implementation details, incompatibility issues, - coding standards, common programming practices, etc. It is a - subsection of secondary importance, contributing some information - on a minor point that can still be interesting and useful for some - readers. Notes usually tell the readers how to use certain language - constructs or tell compiler vendors how to implement them. - - - - - - - It is quite uncommon practice, but some standardisation bodies really - put the information about language evolution directly into each section - that changed since the last public version of the language document. - - - If this is done consistently and carefully, it is possible to generate - the global ``What's new'' section automatically. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + In big documents it is not uncommon to find one topic divided into + several subtopics, each one dedicated to a separate issue and each one + structured in the same way its parent section is. + DSLs and 4GLs specification authors often find it easier to lay out + different clauses of one language construct in different sections. + When the language has a lot of parametrised constructs, it makes sense + to dedicate a special subsection for each field. + Parameters, types, methods, operations, participants---whatever categories + inspire these subtopics, each of them is a fully structured section in itself. + + + + + + + + + + + + Even hardbound standards contain hypertext-like elements. + One of them is attaching a list of links + and backlinks to every section---that way, one can easily + find any related language constructs when learning the + language or debugging a particular feature. + + + + + + + Some subsections can be generated, especially those that consist + of structured content that is possible to derive automatically + from the information stored in or collected from other sections. + + + + + + + A singled named value can be bound to a language document section: it can be + a type of a parameter that is being described here, or an alternative name, + or a superclass, or anything else that is atomic and non-structured. + + + + + + + + + + + + + + + Normative sections are obligatory and usually contain strict content + that needs to be implemented by compiler developers or satisfied + by language end users in order to comply to this standard. + + + For instance, if a section with a grammar production is marked as + normative, this production must hold for the implemented language. + + + + + + + Informative sections are supplementary and provide some useful information + that can be omitted or overridden if deemed appropriately. + + + For instance, if all sections with code samples are marked as informative, + we cannot rely on the set of examples extracted from the standard to be + the test set for the language. Apparently, we can still run some analyses + on the basis of this, but it is not legitimate to make any conclusions + about standard inconsistencies based on the informative sections, nor can + they be legitimately be used to resolve inner conflicts of the documentation. + + + + + + + + + + + + + Normative sections form the core of the language standard: for each core section they belong to, + they define the purpose of the language construct, provide a description, a syntax definition, + list use constraints and other semantic details, etc. + + + + + + + A separate subsection named ``purpose'' is only encountered in some 4GL + language manuals (e.g., JCL). However, it is quite common for the first + paragraph of any new section of any language document to briefly introduce + the purpose of the language construct that is about to be described. + + + + + + + Description is the core of the parent section, containing the main details about the + defined topic, information about its usage, motivation behind its design. + + + + + + + Syntax sections consist of one or more BGF productions, possibly complemented by textual descriptions. + + + + + + + This section can list requirements needed for using a specific language construct, + applicability constraints and other restrictions. + + + + + + + Related language constructs can be named and referenced here. + It is not a simple list of references, but rather a comprehensive + overview on the kind of consequences other parts of the language + can bear if this one is used. + + + + + + + A section on semantics explains in plain English, if no other specific formalism + is used, how exactly the language construct works, what happens inside the system when + it is utilised. It also describes the context in which the introduced language construct + can be encountered and in which it should or should not be used. + + + + + + + It is quite common in the sections that describe an optional parameter + to tell the reader what will happen in the case nothing was specified. + ``There is no default'' can be as valid a definition as a real default value. + + + + + + + + + + + Unlike normative sections that impose some conformance constraints that need + to be satisfied by a language processor to claim compatibility with the standard, + the informative sections are only presented to provide some additional information + to the reader. + + + + + + + A subsection with an example can contain a code sample as well + as some accompanying text. + + + + + + + A rationale or a note usually lists some narrow places of + non-obvious usage, implementation details, incompatibility issues, + coding standards, common programming practices, etc. It is a + subsection of secondary importance, contributing some information + on a minor point that can still be interesting and useful for some + readers. Notes usually tell the readers how to use certain language + constructs or tell compiler vendors how to implement them. + + + + + + + It is quite uncommon practice, but some standardisation bodies really + put the information about language evolution directly into each section + that changed since the last public version of the language document. + + + If this is done consistently and carefully, it is possible to generate + the global ``What's new'' section automatically. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - A figure in LDF can include several presentation variants in different formats. - Any of them can be chosen by the rendering tool: for example, in our prototype, - \TeX\ generators prefer PDF figures while HTML generators tend to prefer PNG ones. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Tables have header rows (optional) and regular rows, with each row filled with table cells. + + + + + + + + + + + + + + + + + + A figure in LDF can include several presentation variants in different formats. + Any of them can be chosen by the rendering tool: for example, in our prototype, + \TeX\ generators prefer PDF figures while HTML generators tend to prefer PNG ones. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tables have header rows (optional) and regular rows, with each row filled with table cells. + + + + + + + + + + + + + + + + + A list in LDF is nothing more sophisticated than a sequence of textual items. - - - - - - - - - - - - - - - - A list in LDF is nothing more sophisticated than a sequence of textual items. - - - - - - - - - + + + + + + + + Any unstructured element of the language document belongs to a so called mixed type: i.e., it is plain text with some keywords marked. - - - - - - - - - + + + + + + + + + Formulae can be used in language documentation in many ways. The internal representation format is MathML which is W3C Recommendation, but the external representation can vary, in our case there are two: \TeX\ and HTML. - - - - - - - - - - + + + + + + + + + + Keywords are usually printed in bolder font weight. They need to be marked as such for two purposes: for presentation and for meta-information. The former goal serves as a basis for typesetting and hyperlinking, while the latter allows for correct indexing and searching facilities. - - - - - + + + + + Inlined pieces of code are usually printed in a typewriter-like font. They are frequently incomplete, mostly nothing more than simple literals, and can only be checked to be correct tokens of the language in a lexical sense. - - - - - - Internal links are pairs of text that will become clickable in hypertext - presentation forms or precede the reference itself when this is the only - option. The reference points to a section or a subsection of the same - document that the link should refer to. If the explicit text is omitted, - its default value is the name of the section being referenced. - - - - - - - - - - - - - - - + + + + + + Internal links are pairs of text that will become clickable in hypertext + presentation forms or precede the reference itself when this is the only + option. The reference points to a section or a subsection of the same + document that the link should refer to. If the explicit text is omitted, + its default value is the name of the section being referenced. + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/topics/documents/xpath/spec2ldf.xslt b/topics/documents/xpath/spec2ldf.xslt index 0dc36688..7a6cde39 100644 --- a/topics/documents/xpath/spec2ldf.xslt +++ b/topics/documents/xpath/spec2ldf.xslt @@ -1,174 +1,266 @@ - - - - - - w3c - - - - - - - - - - - - - - - approved - - - unknown - - - - - 1.0 - - - 2.0 - - - - - - - - - - - <xsl:choose> - <xsl:when test="substring(text(),1,7) = 'http://'"> - <xsl:value-of select="substring-after(substring-after(substring-after(text(),'http://www.w3.org/'),'/'),'/')"/> - </xsl:when> - <xsl:otherwise> - <xsl:value-of select="text()"/> - </xsl:otherwise> - </xsl:choose> - - - - - - - - - - - - - - - - - front-matter - - - - scope - - Status of this document - - - - - - - - - - - abstract - - - - - - - - - - - list-of-contents - - - - core-part - - - - - - - foreword - - - - - - - - <xsl:value-of select="head"/> - - - - - - - - - - - - - - - - - - - - - <xsl:value-of select="head"/> - - - - description - - abstract - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + w3c + + + + + + + + + + + + + + + approved + + + unknown + + + + + 1.0 + + + 2.0 + + + + + + + + + + + <xsl:choose> + <xsl:when test="substring(text(),1,7) = 'http://'"> + <xsl:value-of select="substring-after(substring-after(substring-after(text(),'http://www.w3.org/'),'/'),'/')"/> + </xsl:when> + <xsl:otherwise> + <xsl:value-of select="text()"/> + </xsl:otherwise> + </xsl:choose> + + + + + + + + + + + + + + + + + front-matter + + + + scope + + Status of this document + + + + + + + + + + + abstract + + + + + + + + + + + list-of-contents + + + + core-part + + + + + + + foreword + + + + + + + + <xsl:value-of select="head"/> + + + + + + + + + + + + + + + + + <xsl:value-of select="head"/> + + + + description + + abstract + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [ + + ] + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file