Permalink
Browse files

first commit

  • Loading branch information...
0 parents commit d9f5238e5c0e1e2cfac76e3fdab40307d51b1450 @sramsay sramsay committed Nov 6, 2011
Showing with 8,749 additions and 0 deletions.
  1. +2 −0 CHANGES
  2. +7 −0 LICENSE
  3. +75 −0 README
  4. +75 −0 README.bak
  5. BIN abbot-0.1.0-SNAPSHOT-standalone.jar
  6. BIN abbot-0.1.0-SNAPSHOT.jar
  7. BIN classes/clojure/tools/cli$build_doc.class
  8. BIN classes/clojure/tools/cli$cli$fn__152.class
  9. BIN classes/clojure/tools/cli$cli$fn__154.class
  10. BIN classes/clojure/tools/cli$cli$fn__157.class
  11. BIN classes/clojure/tools/cli$cli$fn__160.class
  12. BIN classes/clojure/tools/cli$cli.class
  13. BIN classes/clojure/tools/cli$flag_for.class
  14. BIN classes/clojure/tools/cli$group$fn__144$fn__145.class
  15. BIN classes/clojure/tools/cli$group$fn__144.class
  16. BIN classes/clojure/tools/cli$group.class
  17. BIN classes/clojure/tools/cli$help_and_quit.class
  18. BIN classes/clojure/tools/cli$loading__4505__auto__.class
  19. BIN classes/clojure/tools/cli$name_for.class
  20. BIN classes/clojure/tools/cli$opt_QMARK_.class
  21. BIN classes/clojure/tools/cli$optional$fn__127$fn__128.class
  22. BIN classes/clojure/tools/cli$optional$fn__127$fn__130.class
  23. BIN classes/clojure/tools/cli$optional$fn__127$fn__133.class
  24. BIN classes/clojure/tools/cli$optional$fn__127.class
  25. BIN classes/clojure/tools/cli$optional.class
  26. BIN classes/clojure/tools/cli$parse_args$fn__106.class
  27. BIN classes/clojure/tools/cli$parse_args$fn__112.class
  28. BIN classes/clojure/tools/cli$parse_args.class
  29. BIN classes/clojure/tools/cli$parse_spec$fn__118.class
  30. BIN classes/clojure/tools/cli$parse_spec.class
  31. BIN classes/clojure/tools/cli$path_for.class
  32. BIN classes/clojure/tools/cli$print_and_fail.class
  33. BIN classes/clojure/tools/cli$required.class
  34. BIN classes/clojure/tools/cli$show_help$fn__55.class
  35. BIN classes/clojure/tools/cli$show_help$fn__57.class
  36. BIN classes/clojure/tools/cli$show_help$iter__59__63$fn__64$fn__65.class
  37. BIN classes/clojure/tools/cli$show_help$iter__59__63$fn__64.class
  38. BIN classes/clojure/tools/cli$show_help$iter__59__63.class
  39. BIN classes/clojure/tools/cli$show_help$iter__72__76$fn__77$fn__78$fn__79.class
  40. BIN classes/clojure/tools/cli$show_help$iter__72__76$fn__77$fn__78.class
  41. BIN classes/clojure/tools/cli$show_help$iter__72__76$fn__77$fn__82.class
  42. BIN classes/clojure/tools/cli$show_help$iter__72__76$fn__77.class
  43. BIN classes/clojure/tools/cli$show_help$iter__72__76.class
  44. BIN classes/clojure/tools/cli$show_help.class
  45. BIN classes/clojure/tools/cli$strip_parents.class
  46. BIN classes/clojure/tools/cli__init.class
  47. BIN classes/edu/unl/abbot/core$_main.class
  48. BIN classes/edu/unl/abbot/core$convert_files$fn__177.class
  49. BIN classes/edu/unl/abbot/core$convert_files$fn__179.class
  50. BIN classes/edu/unl/abbot/core$convert_files.class
  51. BIN classes/edu/unl/abbot/core$input_files$fn__166.class
  52. BIN classes/edu/unl/abbot/core$input_files$fn__168.class
  53. BIN classes/edu/unl/abbot/core$input_files$fn__170$fn__171.class
  54. BIN classes/edu/unl/abbot/core$input_files$fn__170.class
  55. BIN classes/edu/unl/abbot/core$input_files.class
  56. BIN classes/edu/unl/abbot/core$loading__4505__auto__.class
  57. BIN classes/edu/unl/abbot/core.class
  58. BIN classes/edu/unl/abbot/core__init.class
  59. BIN classes/edu/unl/abbot/stylesheets$convert.class
  60. BIN classes/edu/unl/abbot/stylesheets$fn__45.class
  61. BIN classes/edu/unl/abbot/stylesheets$loading__4505__auto__.class
  62. BIN classes/edu/unl/abbot/stylesheets__init.class
  63. BIN classes/edu/unl/abbot/utils$has_xml_extension_QMARK_.class
  64. BIN classes/edu/unl/abbot/utils$loading__4505__auto__.class
  65. BIN classes/edu/unl/abbot/utils__init.class
  66. +2,857 −0 config/abbot_config.xml
  67. BIN lib/clojure-1.3.0.jar
  68. BIN lib/clojure-contrib-1.2.0.jar
  69. BIN lib/clojure-saxon-0.9.2.jar
  70. BIN lib/saxon9-9.1.0.8.jar
  71. BIN lib/saxon9-s9api-9.1.0.8.jar
  72. BIN lib/tools.cli-0.1.0.jar
  73. +9 −0 project.clj
  74. +1 −0 source
  75. +58 −0 src/edu/unl/abbot/core.clj
  76. +41 −0 src/edu/unl/abbot/stylesheets.clj
  77. +29 −0 src/edu/unl/abbot/utils.clj
  78. +5,046 −0 target/tei-xl.rng
  79. +549 −0 xslt/metaStylesheetForRNGschemas.xsl
@@ -0,0 +1,2 @@
+
+0.1.0 Initial Github commit.
@@ -0,0 +1,7 @@
+Copyright © 2007-20011 The Board of Trustees of the University of Illinois, The Board of Regents of the University of Nebraska at Lincoln, The University of Georgia Research Foundation, Inc., University of Maryland College Park, McMaster University, University of Alberta and The National Center for Supercomputing Applications. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal with the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimers. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimers in the documentation and/or other materials provided with the distribution. Neither the names of The MONK Project, the names of any institution listed in the above copyright notice, nor the names of its contributors may be used to endorse or promote products derived from this Software without specific prior written permission.
+
+THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE.
75 README
@@ -0,0 +1,75 @@
+
+Abbot
+=====
+
+A sepulchral voice reverberating through the monastery, says . . .
+
+*This is pre-release software.* It may fail to compile. It might have unimplemented features. It certainly has undocumented features. It definitely has bugs. You have been warned. See CHANGES for details on how things are going.
+
+Description
+-----------
+
+Abbot is a tool for undertaking large-scale conversion of XML document collections in order to make them interoperable with one another. It can help you make one or more collections conform to a particular schema (including a schema used to define one of those collections). In the simplest case, where the "target schema" is a proper subset of the schema(s) used to define the collections in question, Abbot operates more-or-less automatically. More complicated cases require you to define specific transformations in a configuration file, but that configuration file uses a simple language unrelated to either XSLT or a schema language.
+
+[Note: The DSL used in the config file represents one of the major areas of volatility in this pre-release. In fact, it won't actually exist for awhile. In the meantime, you can still use XSLT.]
+
+By default, Abbot converts documents into TEI Analytics -- a TEI subset designed for text analysis applications.
+
+Abbot is designed to be furiously fast (it will automatically parallelize the conversion across n processor cores), and might be the right solution for jobs that don't explicitly involve interoperability, but merely seek to perform XSL transformations on massive document collections quickly.
+
+Abbot is designed to run on UNIX-like systems, and is being developed on Linux. Patches that make it play nice on other platforms will be warmly welcomed.
+
+Quickstart
+----------
+
+(We'll assume that you have one or more collections that do not conform to TEI-A and would like to make them conform).
+
+Everything you need to use abbot (aside from an up-to-date JVM) is contained in the file abbot-X.X.X-SNAPSHOT-standalone.jar.
+
+1. Set the environment variable ABBOT_HOME to this directory (the root directory of the Abbot distribution).
+
+2. Put the files you want to convert into $ABBOT_HOME/input.
+
+3. type "java -jar abbot-X.X.X-SNAPSHOT-standalone.jar"
+
+4. Behold your converted files in $ABBOT_HOME/output
+
+[Note: Passing -h after the jar will show you some command-line switches that allow you to change the input and output directories.]
+
+Slowstart
+---------
+
+In order to use Abbot effectively, it helps to understand a bit about how it works.
+
+Abbot reads a schema (we'll call it the "target schema") and automatically generates a stylesheet designed to convert a set of XML documents into a form that (ideally) validates against that schema. In its most basic form, it simply assumes that any element that appears in the document and which is also legal according to the target schema can be passed through unchanged. So if the target schema describes a proper subset of the elements used by the documents, Abbot can perform the conversion without much effort on your part.
+
+This works well for a surprising number of cases in which interoperability is the goal, but obviously won't work in more complicated cases. When the mapping from some element in the documents to some element described in the schema is less obvious, you'll need to provide Abbot with a description of how that transformation should be undertaken.
+
+The key files, then, are the target schema (which is kept in $ABBOT_HOME/target) and the abbot_config.xml file (which is kept in $ABBOT_HOME/config). The config file simply contains XSLT templates (under the <custom-transformations> element). Each one of these will be added at runtime to the stylesheet that is automatically generated from the target schema.
+
+[Note: Once again, replacing this config file with a simplified language is one of the major development goals of Abbot. In the meantime, you have provide your own templates. Whether this is still worth it in your particular case really depends on how much customization is required, though it may be that having Abbot take care of the trivial cases for you helps you quite a bit.]
+
+So if you want to start playing with different target schemas and custom mappings, those are the two files you want to replace and/or fiddle with.
+
+Building from (and Tinkering with) the Source
+---------------------------------------------
+
+Abbot is built using a combination of XSLT and Clojure using the Leiningen build tool. So assuming you have both Clojure 1.3 or greater and the current copy of Leiningen, you should be able to type:
+
+lein deps
+lein uberjar
+
+to generate the current SNAPSHOT.
+
+The abbot "runtime" is substantially written in Clojure, but that code is mainly concerned with compiling the "metastylesheet" (the stylesheet that generates the stylesheet that does the conversion -- or, as we call it, the "conversion stylesheet"). Once the conversion stylesheet is built (in the first few seconds of a typical run), abbot will will proceed to apply it to all the documents in the input directory.
+
+So, if you're interested in tinkering with the XSLT part of things, you want to look in $ABBOT_HOME/xslt (where the metastylesheet resides). All other parts of the system -- the compilation of the stylesheets, parallelized conversion, etc. -- are in the src directory.
+
+
+
+
+
+
+
+
+
@@ -0,0 +1,75 @@
+
+Abbot
+=====
+
+A sepulchral voice reverberating through the cave, says . . .
+
+*This is pre-release software.* It may fail to compile. It quite possibily has unimplemented features. It certainly has undocumented features. It definitely has bugs. You have been warned. See CHANGES for details on how things are going.
+
+Description
+-----------
+
+Abbot is a tool for undertaking large-scale conversion of XML document collections in order to make them interoperable with one another. It can help you make one or more collections conform to a particular schema (including a schema used to define one of those collections). In the simplest case, where the "target schema" is a proper subset of the schema(s) used to define the collections in question, Abbot operates more-or-less automatically. More complicated cases require you to define specific transformations in a configuration file, but that configuration file uses a simple language unrelated to either XSLT or a schema language.
+
+[Note: The DSL used in the config file represents one of the major areas of volatility in this pre-release. In fact, it won't actually exist for awhile. In the meantime, you can still use XSLT.]
+
+By default, Abbot converts documents into TEI Analytics -- a TEI subset designed for text analysis applications.
+
+Abbot is designed to be furiously fast (it will automatically parallelize the conversion across n processor cores), and might be the right solution for jobs that don't explicitly involve interoperability, but merely seek to perform XSL transformations on massive document collections quickly.
+
+Abbot is designed to run on UNIX-like systems, and is being developed on Linux. Patches that make it play nice on other platforms will be warmly welcomed.
+
+Quickstart
+----------
+
+(We'll assume that you have one or more collections that do not conform to TEI-A and would like to make them conform).
+
+Everything you need to use abbot (aside from an up-to-date JVM) is contained in the file abbot-X.X.X-SNAPSHOT-standalone.jar.
+
+1. Set the environment variable ABBOT_HOME to this directory (the root directory of the Abbot distribution).
+
+2. Put the files you want to convert into $ABBOT_HOME/input.
+
+3. type "java -jar abbot-X.X.X-SNAPSHOT-standalone.jar"
+
+4. Behold your converted files in $ABBOT_HOME/output
+
+[Note: Passing -h after the jar will show you some command-line switches that allow you to change the input and output directories.]
+
+Slowstart
+---------
+
+In order to use Abbot effectively, it helps to understand a bit about how it works.
+
+Abbot reads a schema (we'll call it the "target schema") and automatically generates a stylesheet designed to convert a set of XML documents into a form that (ideally) validates against that schema. In its most basic form, it simply assumes that any element that appears in the document and which is also legal according to the target schema can be passed through unchanged. So if the target schema describes a proper subset of the elements used by the documents, Abbot can perform the conversion without much effort on your part.
+
+This works well for a surprising number of cases in which interoperability is the goal, but obviously won't work in more complicated cases. When the mapping from some element in the documents to some element described in the schema is less obvious, you'll need to provide Abbot with a description of how that transformation should be undertaken.
+
+The key files, then, are the target schema (which is kept in $ABBOT_HOME/target) and the abbot_config.xml file (which is kept in $ABBOT_HOME/config). The config file simply contains XSLT templates (under the <custom-transformations> element). Each one of these will be added at runtime to the stylesheet that is automatically generated from the target schema.
+
+[Note: Once again, replacing this config file with a simplified language is one of the major development goals of Abbot. In the meantime, you have provide your own templates. Whether this is still worth it in your particular case really depends on how much customization is required, though it may be that having Abbot take care of the trivial cases for you helps you quite a bit.]
+
+So if you want to start playing with different target schemas and custom mappings, those are the two files you want to replace and/or fiddle with.
+
+Building from (and Tinkering with) the Source
+---------------------------------------------
+
+Abbot is built using a combination of XSLT and Clojure using the Leiningen build tool. So assuming you have both Clojure 1.3 or greater and the current copy of Leiningen, you should be able to type:
+
+lein deps
+lein uberjar
+
+to generate the current SNAPSHOT.
+
+The abbot "runtime" is substantially written in Clojure, but that code is mainly concerned with compiling the "metastylesheet" (the stylesheet that generates the stylesheet that does the conversion -- or, as we call it, the "conversion stylesheet"). Once the conversion stylesheet is built (in the first few seconds of a typical run), abbot will will proceed to apply it to all the documents in the input directory.
+
+So, if you're interested in tinkering with the XSLT part of things, you want to look in $ABBOT_HOME/xslt (where the metastylesheet resides). All other parts of the system -- the compilation of the stylesheets, parallelized conversion, etc. -- are in the src directory.
+
+
+
+
+
+
+
+
+
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Oops, something went wrong.

0 comments on commit d9f5238

Please sign in to comment.