noid

#!/usr/bin/php
<?php
/**
 * noid - a php script that mints and binds nice opaque identifiers
 *   using the Noid.php class.  This script can be invoked additionally
 *   via a URL interface as "noidu…", which formats output for the web.
 *
 * Author: John A. Kunze, jak@ucop.edu, California Digital Library
 *    Orginally created Nov. 2002 at UCSF Center for Knowledge Management
 * Ported to PHP by Daniel Berthereau for Mines ParisTech
 *
 * ---------
 * Copyright (c) 2002-2006 UC Regents
 * Copyright (c) 2016-2019 Daniel Berthereau
 *
 * Permission to use, copy, modify, distribute, and sell this software and
 * its documentation for any purpose is hereby granted without fee, provided
 * that (i) the above copyright notices and this permission notice appear in
 * all copies of the software and related documentation, and (ii) the names
 * of the UC Regents and the University of California are not used in any
 * advertising or publicity relating to the software without the specific,
 * prior written permission of the University of California.
 *
 * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
 *
 * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE FOR ANY
 * SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY
 * THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE
 * OR PERFORMANCE OF THIS SOFTWARE.
 * ---------
 *
 * XXX change 'NOID' as database name to 'dbnoid' (for case insensitive filesys)
 * XXX fix To Do alphabets
 * XXX document an example of how to set up a rewrite rule that responds
 *     to the ? and ?? at the end of an id, and convert to a CGI string
 * XXX add java interface
 * XXX fix env test to suggest that NFS and AFS filesystems not be used
 * XXX why does dbopen fail when doing dbinfo from an account that can't
 *     write the file -- should be doing readonly open
 * XXX record literal noid dbcreate (re)creation command used into README
 */

/**
 * Notes
 *
 * Command line is strict posix: options must be set before arguments, with or
 * without the "--" separator. Perl script allows any order.
 */

/**
 * added by Daniel Berthereau, 2019-07-29 00:00
 *
 * - Restructuration with namespace as a composer package
 * - Read all the database interfaces.
 */

namespace Noid;

require_once __DIR__ . '/vendor/autoload.php';

use Exception;
use Noid\Lib\Globals;
use Noid\Lib\Db;
use Noid\Lib\Log;
// use Noid\Noid;
use Noid\Storage\DatabaseInterface;

// Not used.
// $template = NULL;
// $snaa = NULL;
// $total = NULL;

// yyy make a noidmail (email robot interface?)
// yyy location field for redirect should include a discriminant
//     eg, ^c for client choice, ^i for ipaddr, ^f format, ^l language
//     and ^b for browser type, ^xyz for any http header??
// yyy add "file" command, like bind, but stores a file, either as file or
//     in a big concatenation stream (binding offset, length, checksum)?
// yyy figure out whether validate needs to open the database, and if not,
//     what that means

// yyy for locking and transactions: (1) ask Paul Marquess about what
//  Perl interface support for txn and locking really is (2) check into
//  lock and/or transaction timeout, (3) if I use cursors exclusively for
//  storage, that may solve everything(?) (eg, no more simple tied assignments,
//  which are db_put's in disguise, (4) make sure Noid.pm exit block releases
//  locks, aborts transactions, etc.

if(!class_exists('NoidApp')){
    /**
     * Main class - NoidApp
     *
     * @warning function run()
     */
    class NoidApp{
        /**
         * List of possible long options according to short ones.
         *
         * @const array OPTS
         */
        private const OPTS = array(
            'd' => 'debug', // flag
            'l:' => 'locktest:', // flag
            'f:' => 'fdbdir:', // filesystem directory name
            't:' => 'type:', // db type - bdb, mysql, xml, json…
            'v' => 'version', // flag
            'h' => 'help', // flag
        );

        /**
         * List of valid noid commands.
         *
         * @const array VALID_COMMANDS
         */
        private const VALID_COMMANDS = array(
            'bind',
            'dbinfo',
            'dbcreate',
            'dbimport',
            'fetch',
            'get',
            'hello',
            'help',
            'hold',
            'mint',
            'note',
            'peppermint',
            'queue',
            'validate',
        );

        /**
         * List of valid help topics.
         *
         * The valid commands are added when needed.
         *
         * @var array
         */
        private $_valid_help_topics = array(
            'intro',
            'all',
            'templates',
        );

        /**
         * Content of help topics.
         *
         * @internal purposely empty for now: filled only when needed (usage).
         *
         * @var array $_info
         */
        private $_info;

        /**
         * @var string
         */
        private $settings_file = 'config/settings.php';

        /**
         * @var array
         */
        private $settings;

        /**
         * @var string $data_dir where bdb/log files are placed.
         *                    though mysql mode, it is used for logging.
         */
        private $data_dir;

        /**
         * @var resource $stderr Redirect errors for web (redirect stderr to standard output).
         */
        private $stderr;

        /**
         * @var string $contact
         */
        private $contact;

        /**
         * @var bool $is_web Is web context?
         */
        private $is_web;

        /**
         * Main function in main class
         *
         * @param array $argv arguments array
         *
         * @return bool TRUE - successful execution, FALSE - error
         */
        public function run($argv)
        {
            // The default date timezone allows to normalize all dates.
            date_default_timezone_set('UTC');

            $this->is_web = FALSE;

            /**
             * when initialized 1st, bulk-command mode is off.
             *
             * @var bool $bulk_mode
             */
            $bulk_mode = FALSE;

            $this->stderr = STDERR;
            // If called with the URL interface.
            if(preg_match('|noidu[^/]*$|', $argv[0])){
                if(PHP_SAPI === 'cli' || empty($_SERVER['REMOTE_ADDR'])){
                    print 'Not called via http.' . PHP_EOL;
                    return FALSE;
                }
                $this->is_web = TRUE; // orient output for HTTP
                print 'Content-Type: text/plain' . PHP_EOL . PHP_EOL;
                // Seems not possible directly in php.
                // if (fopen(STDERR, ">&STDOUT") === false) {
                //     $error = error_get_last();
                //     die("Can't combine stderr and stdout: " . (isset($error) ? $error['message'] : '[no message]') . PHP_EOL);
                // }
                $this->stderr = STDOUT;
                if(empty($_REQUEST)){
                    print 'No QUERY_STRING (hence no command) defined.' . PHP_EOL;
                    return FALSE;
                }

                /**
                 * List of arguments from command line ($argv) or web request ($_REQUEST).
                 *
                 * @var array $args
                 */
                $args = $_REQUEST;
                // print "ARGV: " . join('|', $argv) . PHP_EOL;
            }
            // If called for RewriteMap resolving.
            // See Apache Rewrite mod documentation.
            // TODO Check "noidr" for RewriteMap via Apache.
            else if(preg_match('|noidr[^/]*$|', $argv[0])){
                flush(); // very important to empty the output buffer.
                $bulk_mode = TRUE;
                // yyy should we set a timeout to prevent hanging the server?

                /**
                 * List of arguments from command line ($argv) or web request ($_REQUEST).
                 *
                 * @var array $args
                 */
                $args = $argv;
            }
            else{
                /**
                 * List of arguments from command line ($argv) or web request ($_REQUEST).
                 *
                 * @var array $args
                 */
                $args = $argv;
            }

            $this->contact = $this->who_are_you($this->is_web);
            if(empty($this->contact)){
                $error = error_get_last();
                fwrite($this->stderr, "Can't tell who you are: " . (isset($error) ? $error['message'] : '[no message]') . PHP_EOL);
                return FALSE;
            }

            $options = $this->getoptreq();
            if($options === FALSE){
                print 'error: GetOptions' . PHP_EOL;
                $this->usage(1, 1, 'intro');
                return FALSE;
            }

            $args = $this->getOnlyArguments($args, $options);
            $options = $this->unifyOptionsAsLong($options);

            if(isset($options['locktest'])){
                $locktest = (int)$options['locktest'];
                if($locktest < 0){
                    print 'error: locktest value must be a positive number of seconds to sleep' . PHP_EOL;
                    // NOTE Why exit 0 for an error in Perl script (and somewhere else)?
                    return FALSE;
                }
                $this->command(array_combine(array('locktest'), array($locktest)));
            }

            if($this->is_web && !empty($options['debug'])){
                print "contact={$this->contact}, pwd=" . getcwd();
            }

            // Handle -v or -h, and exit early.
            if(isset($options['version'])){
                // We take our version number from the Noid module version.
                printf('This is "noid" version %s.' . PHP_EOL, Globals::VERSION);
                return TRUE;
            }
            if(isset($options['help'])){
                // yyy should we encode help output?   print 'help:' . PHP_EOL;
                $this->usage(0, 0, 'intro');
                return TRUE;
            }

            // NOTE Not clear why checked later in Perl script.
            if($this->is_web && isset($options['fdbdir'])){
                print '-f option not allowed in URL interface.' . PHP_EOL;
                return FALSE;
            }

            $is_fdbdir_config = false;

            $this->data_dir = isset($options['fdbdir']) ? $options['fdbdir'] : NULL;

            if ($this->data_dir) {
                if (substr($this->data_dir, 0, 1) !== '/') {
                    $this->data_dir = __DIR__ . DIRECTORY_SEPARATOR . $this->data_dir;
                }
                if (file_exists($this->data_dir)) {
                    if (!is_readable($this->data_dir)) {
                        print sprintf('error: path %s is not readable.', $this->data_dir) . PHP_EOL;
                        return FALSE;
                    }
                    if (is_file($this->data_dir) && !is_dir($this->data_dir)) {
                        if (strtolower((string) pathinfo($this->data_dir, PATHINFO_EXTENSION)) !== 'php') {
                            print sprintf('error: path %s is not a php config file.', $this->data_dir) . PHP_EOL;
                            return FALSE;
                        }
                        $this->settings_file = $this->data_dir;
                        $this->data_dir = null;
                        $is_fdbdir_config = true;
                    }
                }
            }

            if (substr($this->settings_file, 0, 1) !== '/') {
                $this->settings_file = __DIR__ . DIRECTORY_SEPARATOR . $this->settings_file;
            }

            $this->settings = include $this->settings_file;

            // get the option "-t/--type" (database type)
            // default - bdb (BerkeleyDB)
            $db_type_option = isset($options['type']) ? $options['type'] : null;
            if (empty($db_type_option)) {
                $db_type_option = $this->settings['db_type'];
            }
            if (empty($db_type_option)) {
                $db_type_option = 'bdb';
            } else if (!in_array(strtolower($db_type_option), array_keys(Globals::DB_TYPES))) {
                print 'error: invalid DbType (database type).' . PHP_EOL;
                $this->usage(1, 1, 'intro');
                return FALSE;
            }

            $settings_storage = $this->settings['storage'][$db_type_option];

            // Now try to find a database directory string.
            // In the special case of dbcreate, we may create
            // and name the directory on behalf of the user.
            if(empty($this->data_dir)){
                if(getenv('NOID')){ // is NOID env variable defined?
                    $this->data_dir = getenv('NOID');
                }
                // Executable link reveals data_dir?
                // For example "/path/to/noid_test" returns "test".
                else if(preg_match('|_([^/]+)$|', $args[0], $matches)){
                    $this->data_dir = $matches[1];
                }
                // Use the dir set in the config file.
                else if(!empty($settings_storage['data_dir'])){
                    $dir = $settings_storage['data_dir'];
                    if (substr($dir, 0, 1) !== '/') {
                        $dir = __DIR__ . DIRECTORY_SEPARATOR . $dir;
                    }
                    $this->data_dir = $dir;
                }
                // Else try current directory.
                else{
                    $this->data_dir = __DIR__;
                }

                $this->data_dir = trim($this->data_dir);
                if(empty($this->data_dir)){
                    print 'error: no database directory.' . PHP_EOL;
                    $this->usage(1, 1, 'intro');
                    return FALSE;
                }
            }

            // Now check the validity of $this->data_dir. yyy we can do better?
            if(preg_match('|^(.*)$|', $this->data_dir, $matches)){
                $this->data_dir = $matches[1];
            }
            else{
                print('error: bad database directory.' . PHP_EOL);
                $this->usage(1, 1, 'intro');
                return FALSE;
            }

            if (file_exists($this->data_dir) && !is_dir($this->data_dir)) {
                print sprintf('error: path %s is not a directory.', $this->data_dir) . PHP_EOL;
                return false;
            }
            if (file_exists($this->data_dir) && !is_readable($this->data_dir)) {
                print sprintf('error: path %s is not a readable.', $this->data_dir) . PHP_EOL;
                return false;
            }
            if (!file_exists($this->data_dir)) {
                $result = mkdir($this->data_dir, 0775, true);
                if (!$result) {
                    print sprintf('error: cannot create directory %s.', $this->data_dir) . PHP_EOL;
                    return false;
                }
            }

            // Update the settings with real type and data dir.
            $this->settings['db_type'] = $db_type_option;
            $this->settings['storage'][$db_type_option]['data_dir'] = $this->data_dir;

            // Bulk command mode is signified by a single final argument of "-".
            // If we're _not_ in bulk command mode, expect a single command
            // represented by the remaining arguments; do it and exit.
            $bulk_mode = $bulk_mode || (count($args) == 2 && $args[1] == '-');

            /* normal execution */
            if(!$bulk_mode){
                // Remove the command itself.
                unset($args[0]);
                $this->command($args);
            }
            /* bulk("endless") command by while-loop */
            else{
                // If we get here, we're in bulk command mode.  Read, tokenize,
                // and execute commands from the standard input.  Test with
                //   curl --data-binary @cmd_file http://dot.ucop.edu/nd/noidu_kt5\?-
                // where cmd_file contains newline-separated commands.
                // XXX make sure to %-decode web QUERY_STRING, so we don't have
                //     to always put +'s for spaces
                //
                // NOTE The delimiter is the '"', even missing, but command line allows "'" too.
                while($args = fgetcsv(STDIN, ' ', '"')){
                    $this->command($args);
                }
            }
            return TRUE;
        }

        /**
         * @param array $args
         *
         * @return int|mixed
         */
        public function command($args = array())
        {
            // Any remaining args should form a noid command.
            // Look at the command part (if any) now, and complain about
            // a non-existent database unless the command is "dbcreate".
            $command = array_shift($args);
            $command = trim($command);
            if(empty($command)){    // if no command arg
                $this->usage(1, 1, 'intro');
                return 0;
            }
            if(!is_dir($this->data_dir)
                && $command != 'dbcreate'
                && $command != 'help'
            ){
                // if the database doesn't exist when it needs to
                fwrite($this->stderr, sprintf(
                    'error: no storage path (%s) -- use dbcreate?',
                    $this->data_dir
                ) . PHP_EOL . PHP_EOL);
                $this->usage(1, 1, 'intro');
                return 0;
            }
            if(!in_array($command, self::VALID_COMMANDS)){
                printf('error: no such command: %1$s (%2$s).' . PHP_EOL,
                    $command, implode(' ', $args));
                $this->usage(1, 1, 'intro');
                return 0;
            }
            // Perform extra checks in $this->is_web case.
            if($this->is_web && $command == 'dbcreate'){
                printf('error: command "%s" not allowed in URL interface.' . PHP_EOL, $command);
                $this->usage(1, 1, 'intro');
                return 0;
            }

            return call_user_func_array(array($this, $command), $args);
        }

        /* --- begin almost alphabetic listing of functions --- */

        /**
         * Returns number of elements successfully bound.
         *
         * yyy what is the sensible thing to do if (a) no element given,
         *     (b) if no value, or (c) if there are multiple values?
         * yyy vbind(…, template, …)?  nvbind()?
         *
         * yyy what about append at the list vs the string level?
         *
         * @param string $how
         * @param string $id
         * @param string $elem
         * @param string $value
         *
         * @return int 0 (error) or 1 (success)
         * @throws Exception
         */
        public function bind($how = NULL, $id = NULL, $elem = NULL, $value = NULL)
        {
            $validate = 1;
            $noid = Db::dbopen($this->settings, DatabaseInterface::DB_WRITE);
            if(!$noid){
                fwrite($this->stderr, Log::errmsg($noid));
                return 0;
            }
            $report = NULL;
            if(is_null($elem)){
                $elem = '';
            }
            if($elem === ':'){    // expect name/value pairs up to blank line
                if(strlen($value)){
                    fwrite($this->stderr, sprintf(
                        'Why give a value (%1$s) with an element "%2$s"?',
                        $value, $elem
                    ) . PHP_EOL);
                    Db::dbclose($noid);
                    return 0;
                }

                // Slurp a paragraph (double end of line).
                // Multi lines input from command line: loop until two end of lines.
                $input = '';
                $eol = 0;
                do{
                    $line = fgets(STDIN); // reads one line from STDIN
                    // Check if this is a double end of line (without white spaces).
                    $line == PHP_EOL ? $eol++ : $eol = 0;
                    $input .= $line;
                }while($eol < 1); // Means paragraph mode.
                $para = trim($input);
                // Only the first comment is removed.
                $para = preg_replace('/^#.*\R/', '', $para); // remove comment lines
                $para = preg_replace('/\R\s+/', ' ', $para); // merge continuation lines
                // Keep only lines with at least one ":".
                $elemvals = preg_split('/^([^:]+)\s*:\s*/m', $para, -1, PREG_SPLIT_DELIM_CAPTURE);
                array_shift($elemvals); // throw away first null
                $bound = 0;
                $total = 0;
                while(TRUE){
                    $elem = trim(array_shift($elemvals));
                    $value = trim(array_shift($elemvals));
                    if(strlen($elem) == 0 && strlen($value) == 0){
                        break;
                    }
                    $total++;
                    if(strlen($elem) == 0){
                        Log::addmsg($noid, sprintf(
                            'error: %1$s: bad element associated with value "%2$s".',
                            $id, $value
                        ));
                        break;
                    }
                    $report = Noid::bind($noid, $this->contact, $validate,
                        $how, $id, $elem, $value);
                    if(is_null($report)){
                        fwrite($this->stderr, $report . PHP_EOL);
                        $this->usage(1, 1, 'bind');
                        // yyy how/who should log failures in "hard" case
                    }
                    else{
                        $bound++;
                        print $report . PHP_EOL;
                    }
                }
                // yyy summarize for log $total and $bound
                Db::dbclose($noid);
                return is_null($report) ? 1 : 0;
            }
            else if($elem == ':-'){        // expect name/value to be rest of file
                if(strlen($value)){
                    fwrite($this->stderr, sprintf('Why give a value (%1$s) with an element "%2$s"?',
                            $value, $elem) . PHP_EOL);
                    Db::dbclose($noid);
                    return 0;
                }

                // Read all of STDIN into array "@input_lines".
                // And remove all newlines.
                $input_lines = file_get_contents('php://stdin', 'r');
                $input_lines = explode(PHP_EOL, trim($input_lines));
                $input_lines = array_map('trim', $input_lines);

                // Ignore any leading lines that start with a pound sign
                // or contain nothing but white space.
                while(count($input_lines) > 0){
                    if(substr($input_lines[0], 0, 1) == '#'
                        || trim($input_lines[0]) == ''
                    ){
                        array_shift($input_lines);
                        continue;
                    }
                    break;
                }

                // If we don't have any lines, there's a problem.
                if(count($input_lines) == 0){
                    fwrite($this->stderr, 'error:  no non-blank, non-comment input.' . PHP_EOL);
                    Db::dbclose($noid);
                    return 0;
                }

                // There must be an element and a colon on the first line.
                if(!preg_match('/^\s*(\w+)\s*:\s*(.*)$/', $input_lines[0], $matches)){
                    fwrite($this->stderr, 'error:  missing element or colon on first non-blank, non-comment line.' . PHP_EOL);
                    Db::dbclose($noid);
                    return 0;
                }

                // Save the element, and any part of the value that there
                // might be on the first line.
                $elem = $matches[1];
                $value = $matches[2];

                // Remove the first line from the array.
                array_shift($input_lines);

                // Append any additional lines to the value.
                foreach($input_lines as $v){
                    $value .= PHP_EOL . $v;
                }

                // Put on the final newline.
                $value .= PHP_EOL;
                // Now drop through to end of if-elseif clause to real binding.
            }
            // yyy eg, :fragment:Offset:Length:Path
            // yyy eg, :fragment:Offset:Length:Path
            // yyy eg, :file:Path
            // yyy eg, ":xml",
            else if(strpos($elem, ':') === 0){
                fwrite($this->stderr, sprintf('Binding to element syntax "%s" not supported.', $elem) . PHP_EOL);
                Db::dbclose($noid);
                return 0;
            }
            $report = Noid::bind($noid, $this->contact, $validate, $how, $id, $elem, $value);
            if(empty($report)){
                fwrite($this->stderr, Log::errmsg($noid));
                $this->usage(0, 1, 'bind');
            }
            else{
                print $report . PHP_EOL;
            }
            // yyy make sure return(0)'s do dbclose…
            Db::dbclose($noid);
            return empty($report) ? 1 : 0;
        }

        /**
         * This routine may not make sense in the URL interface.
         *
         * @param string $template
         * @param string $policy
         * @param string $naan
         * @param string $naa
         * @param string $subnaa
         *
         * @return int 0 (error) or 1 (success)
         * @throws Exception
         */
        public function dbcreate($template = NULL, $policy = NULL, $naan = NULL, $naa = NULL, $subnaa = NULL)
        {
            $dbreport = Db::dbcreate($this->settings, $this->contact, $template, $policy, $naan, $naa, $subnaa);
            if(!$dbreport){
                print Log::errmsg() . PHP_EOL;
                return 0;
            }
            print $dbreport . PHP_EOL;
            return 1;
        }

        /**
         * @param string $src_type
         *
         * @return int 0 (error) or 1 (success)
         * @throws Exception
         */
        public function dbimport($src_type)
        {
            if(!isset($src_type)){
                print "error: missing argument for dbimport (bdb/mysql/sqlite/xml)." . PHP_EOL;
                return 0;
            }

            $src_type = strtolower($src_type);
            if (!in_array($src_type, array_keys(Globals::DB_TYPES))) {
                print 'error: invalid source database type.' . PHP_EOL;
                return 0;
            }

            $db_type = $this->settings['db_type'];
            if(!strcmp($db_type, $src_type)){
                print sprintf('error: the source db type (%1$s) must differ to the destination (%2$s).', $src_type, $db_type) . PHP_EOL;
                return 0;
            }

            print sprintf('Importing from %1$s to %2$s… ', $src_type, $db_type);
            if(!Db::dbimport($this->settings, $src_type)){
                print "[failed]" . PHP_EOL;
                return 0;
            }
            print "[OK]" . PHP_EOL;
            return 1;
        }

        /**
         * Return info about database.
         *
         * @param string $level
         *
         * @return int 0 (error) or 1 (success)
         * @throws Exception
         */
        public function dbinfo($level = NULL)
        {
            if(empty($level)){
                $level = 'brief';
            }
            $noid = Db::dbopen($this->settings, DatabaseInterface::DB_RDONLY);
            if(!$noid){
                print Log::errmsg($noid);
                return 0;
            }
            Db::dbinfo($noid, $level);
            Db::dbclose($noid);
            return 1;
        }

        /**
         * Fetch elements.
         *
         * @param string       $id
         * @param array|string $elems All arguments will be fetched if not an array.
         *
         * @return int 0 (error) or 1 (success)
         * @throws Exception
         */
        public function fetch($id = NULL, $elems = array())
        {
            if(!is_array($elems)){
                $elems = func_get_args();
                array_shift($elems);
            }

            return $this->_getfetch(1, $id, $elems);
        }

        /**
         * Get elements.
         *
         * @param string       $id
         * @param array|string $elems All arguments will be gotten if not an array.
         *
         * @return int 0 (error) or 1 (success)
         * @throws Exception
         */
        public function get($id = NULL, $elems = array())
        {
            if(!is_array($elems)){
                $elems = func_get_args();
                array_shift($elems);
            }

            return $this->_getfetch(0, $id, $elems);
        }

        /**
         * Helper to get/fetch elements.
         *
         * @param int    $verbose
         * @param string $id
         * @param array  $elems
         *
         * @return int 0 (error) or 1 (success)
         * @throws Exception
         */
        private function _getfetch($verbose, $id, $elems)
        {
            $noid = Db::dbopen($this->settings, DatabaseInterface::DB_RDONLY);
            if(!$noid){
                fwrite($this->stderr, Log::errmsg($noid));
                return 0;
            }
            $fetched = Noid::fetch($noid, $verbose, $id, $elems);
            if(is_null($fetched)){
                fwrite($this->stderr, Log::errmsg($noid));
            }
            else{
                print($fetched);
                if($verbose){
                    print PHP_EOL;
                }
            }
            Db::dbclose($noid);
            return 1;
        }

        /**
         * Get a user note from a key.
         *
         * @param string $key
         *
         * @return int 0 (error) or 1 (success)
         * @throws Exception
         */
        public function get_note($key)
        {
            $noid = Db::dbopen($this->settings, DatabaseInterface::DB_RDONLY);
            if(!$noid){
                fwrite($this->stderr, Log::errmsg($noid));
                return 0;
            }
            $note = Log::get_note($noid, $key);
            if(is_null($note)){
                fwrite($this->stderr, Log::errmsg($noid));
            }
            else{
                print($note);
            }
            Db::dbclose($noid);
            return 1;
        }

        /**
         * A useless function to check the module.
         *
         * @return void
         */
        public function hello()
        {
            print 'Hello.' . PHP_EOL;
        }

        /**
         * Return help about a command.
         *
         * @param string $topic
         *
         * @return int 1 (success)
         */
        public function help($topic = '')
        {
            $in_error = 0;
            $brief = 0;
            return $this->usage($in_error, $brief, $topic);
        }

        // yyy what about a "winnow" routine that is either started
        //     from cron or is started when an exiting noid call notices
        //     that there's some harvesting/garbage collecting to do and
        //     schedules it for, say, 10 minutes hence (by not exiting,
        //     but sleeping for 10 minutes and then harvesting)?

        /**
         * Hold ids.
         *
         * @param string       $on_off "hold" or "release"
         * @param array|string $ids    All arguments will be processed if not an array.
         *
         * @return int 0 (error) or 1 (success)
         * @throws Exception
         */
        public function hold($on_off = NULL, $ids = array())
        {
            if(!is_array($ids)){
                $ids = func_get_args();
                array_shift($ids);
            }

            $noid = Db::dbopen($this->settings, DatabaseInterface::DB_WRITE);
            if(!$noid){
                fwrite($this->stderr, Log::errmsg($noid));
                return 0;
            }
            if(!Noid::hold($noid, $this->contact, $on_off, $ids)){
                fwrite($this->stderr, Log::errmsg($noid));
                $this->usage(1, 1, 'hold');
                Db::dbclose($noid);
                return 0;
            }
            print(Log::errmsg($noid) . PHP_EOL);    // no error message at all
            Db::dbclose($noid);
            return 1;
        }

        /**
         * Peppermint a noid.
         *
         * @param int    $n
         * @param string $elem
         * @param string $value
         *
         * @return int 0 (error) or 1 (success)
         * @throws Exception
         */
        public function peppermint($n = 0, $elem = NULL, $value = NULL)
        {
            return $this->mint($n, $elem, $value, TRUE);
        }

        /**
         * Mint a noid.
         *
         * @param int    $n
         * @param string $elem
         * @param string $value
         * @param bool   $pepper
         *
         * @return int 0 (error) or 1 (success)
         * @throws Exception
         */
        public function mint($n = NULL, $elem = NULL, $value = NULL, $pepper = FALSE)
        {
            if($pepper){
                fwrite($this->stderr,
                    'The peppermint command is not implemented yet.' . PHP_EOL);
                return 0;
            }
            if(is_null($n) || !is_numeric($n)){
                fwrite($this->stderr, sprintf(
                    'Argument error: expected positive integer, got %s',
                    is_null($n) ? 'nothing' : $n
                ) . PHP_EOL);
                $this->usage(1, 1, 'mint');
                return 0;
            }
            $noid = Db::dbopen($this->settings, DatabaseInterface::DB_WRITE);

            if(!$noid){
                print Log::errmsg($noid);
                return 0;
            }
            while($n--){
                $id = Noid::mint($noid, $this->contact, $pepper);
                if(is_null($id)){
                    fwrite($this->stderr, Log::errmsg($noid));
                    Db::dbclose($noid);
                    return 0;
                }
                print "id: $id" . PHP_EOL;
            }
            Db::dbclose($noid);
            print PHP_EOL;
            return 1;
        }

        /**
         * Add a note.
         *
         * @param string $key
         * @param string $value
         *
         * @return int 0 (error) or 1 (success)
         * @throws Exception
         */
        public function note($key = '', $value = '')
        {
            if(!strlen($key) || !strlen($value)){
                fwrite($this->stderr,
                    'You must supply a key and a value.' . PHP_EOL);
                $this->usage(1, 1, 'note');
                return 0;
            }
            $noid = Db::dbopen($this->settings, DatabaseInterface::DB_WRITE);
            if(!Log::note($noid, $this->contact, $key, $value)){
                print Log::errmsg($noid);
            }
            Db::dbclose($noid);
            return 1;
        }

        /**
         * Queue.
         *
         * @param string       $when
         * @param array|string $ids All arguments will be queued if not an array.
         *
         * @return int 0 (error) or 1 (success)
         * @throws Exception
         */
        public function queue($when = NULL, $ids = array())
        {
            if(!is_array($ids)){
                $ids = func_get_args();
                array_shift($ids);
            }

            $noid = Db::dbopen($this->settings, DatabaseInterface::DB_WRITE);
            if(!$noid){
                fwrite($this->stderr, Log::errmsg($noid));
                return 0;
            }
            $queued = Noid::queue($noid, $this->contact, $when, $ids);
            if(empty($queued)){
                $retval = 0;
                fwrite($this->stderr, Log::errmsg($noid) . PHP_EOL);
            }
            else{
                $retval = 1;
                print(join(PHP_EOL, $queued) . PHP_EOL);
            }
            $n = array_filter($queued, function($v){
                return stripos($v, 'error:') !== 0;
            });
            $n = count($n);
            printf('note: %1$d %2$s queued' . PHP_EOL, $n, $n == 1 ? 'identifier' : 'identifiers');
            Db::dbclose($noid);
            return $retval;
        }

        /**
         * Returns the number of valid ids.
         *
         * @param string       $template
         * @param array|string $ids All arguments will be validated if not an array.
         *
         * @return int 0 (error) or 1 (success)
         * @throws Exception
         */
        public function validate($template = NULL, $ids = array())
        {
            if(!is_array($ids)){
                $ids = func_get_args();
                array_shift($ids);
            }

            $noid = Db::dbopen($this->settings, DatabaseInterface::DB_RDONLY);
            if(!$noid){
                print Log::errmsg($noid);
                return 0;
            }
            $valids = Noid::validate($noid, $template, $ids);
            if(empty($valids)){
                fwrite($this->stderr, Log::errmsg($noid));
                Db::dbclose($noid);
                $this->usage(1, 1, 'validate');
                return 0;
            }

            $iderrs = array_filter($valids, function($v){
                return stripos($v, 'error:') === 0;
            });
            foreach($valids as $valid){
                print($valid . PHP_EOL);
            }
            Db::dbclose($noid);
            return count($ids) - count($iderrs);
        }

        /**
         * Print a blank (space) in front of every newline.
         * First arg must be a filehandle.
         *
         * @param resource     $out
         * @param string|array $args
         *
         * @return bool|int
         */
        private function _bprint($out, $args)
        {
            $string = is_array($args) ? implode('', $args) : $args;
            $string = str_replace(PHP_EOL, PHP_EOL . ' ', $string);
            return fwrite($out, $string);
        }

        /**
         * Print a message about a topic.
         *
         * @param int    $in_error
         * @param int    $brief
         * @param string $topic
         *
         * @return int Always returns 1 so it can be used in boolean blocks.
         */
        private function usage($in_error = NULL, $brief = NULL, $topic = NULL)
        {
            $info = &$this->_info;

            if(is_null($in_error)){
                $in_error = 1; // default is to treat as error
            }
            if($in_error){
                flush(); // flush any pending output
            }
            // where to send output
            $out = $in_error ? $this->stderr : STDOUT;
            // default is to be brief
            if(is_null($brief)){
                $brief = 1;
            }
            $topic = $topic ? : 'intro';
            $topic = strtolower($topic);

            // Initialize info topics if need be.
            if(empty($info)){
                $this->_init_help();
            }

            $blurbs = array_filter(
                $this->_valid_help_topics,
                function($v) use ($topic){
                    return strpos($v, $topic) === 0;
                });
            $blurbs = array_intersect($this->_valid_help_topics, $blurbs);
            if(count($blurbs) != 1){
                $msg = count($blurbs) < 1
                    ? sprintf('Sorry: nothing under "%s".', $topic)
                    : sprintf("Help: Your request (%s), matches more than one topic: \n\t(%s).", $topic, join(', ', $blurbs));
                fwrite($out, $msg . PHP_EOL . ' You might try one of these topics:');
                $topics = $this->_valid_help_topics;
                $n = 0;
                $topics_per_line = 8;
                while(TRUE){
                    if(empty($topics)){
                        print PHP_EOL;
                        break;
                    }
                    if($n++ % $topics_per_line == 0){
                        print(PHP_EOL . "\t");
                    }
                    else{
                        print(' ' . array_shift($topics));
                    }
                }
                print PHP_EOL . PHP_EOL;
                return 1;
            }
            // If we get here, $blurbs names one story.
            $blurb = array_shift($blurbs);

            // Big if-elseif clause to switch on requested topic.
            //
            // Note that we try to make the output conform to ANVL syntax;
            // in the case of help output, every line tries to be a continuation
            // line for the value of an element called "Usage".  To do this we
            // pass all output through a routine that just adds a space after
            // every newline.  The end of the output should end the ANVL record,
            // so we print "\n\n" at the end.
            if($blurb == 'intro'){
                $msg = 'Usage:
              noid [-f DbDirOrConfig] [-t DbType] [-v] [-h] Command Arguments';
                if($brief){
                    $msg .= '
              noid -h             (for help with a Command summary).';
                    $msg .= '
              DbType: ';
                    $msg .= implode(', ', array_keys(Globals::DB_TYPES)) . '…';
                    $this->_bprint($out, $msg);
                    print(PHP_EOL . PHP_EOL);
                    return 1;
                }
                $msg .= '

DbDirOrConfig defaults to "." if not found from -f/--fdbdir, or a NOID
environment variable, or the param set in file "config/settings.php".
When the path is a config file, its params will be used. The default config
file is "config/settings.php" in other cases.

DbType defaults to "bdb" (means Berkeley DB) if not found from -t/--type or
the param set in config/settings.php.

For more information, try "perldoc noid" or "noid help Command".

Summary:
';
                $this->_bprint($out, $msg);
                foreach(self::VALID_COMMANDS as $t){
                    $i = $t . '/brief';
                    if(!empty($info[$i])){
                        $this->_bprint($out, $info[$i]);
                    }
                }
                $msg = <<<'EOD'

If invoked as "noidu…", output is formatted for a web client.  Give Command
as "-" to run a block of noid Commands read from stdin or from POST data.
EOD;
                $this->_bprint($out, $msg);
                print PHP_EOL . PHP_EOL;
                return 1;
            }
            if($brief){
                $blurb .= '/brief';
            }
            if(empty($info[$blurb])){
                fwrite($out, sprintf('Sorry: no information on "%s".', $blurb) . PHP_EOL . PHP_EOL);
                return 1;
            }
            $this->_bprint($out, $info[$blurb]);
            print PHP_EOL;
            return 1;

            // yyy fix these verbose messages
            /**
             * $yyyy = <<<'EOD'
             * Called as "noid", an id generator accompanies every COMMAND.  Called as
             * "noi", the id generator is supplied implicitly by looking first for a
             * NOID environment variable and, failing that, for a file calld ".noid" in
             * the current directory.  Examples show the explicit form.  To create a
             * generator, use
             *
             * noid ck8 dbcreate TPL SNAA
             *
             * where you replace TPL with a template that defines the shape and number
             * of all identifiers to be minted by this generator.  You replace SNAA with
             * the name (eg, the initials) of the sub NAA (Name Assigning Authority) that
             * will be responsible for this generator; for example, if the Online Archive
             * of California is the sub-authority for a template, SNAA could be "oac".
             * This example of generator intialization,
             *
             * noid oac.noid dbcreate pd2.wwdwwdc oac
             *
             * sets up the "oac.noid" identifier generator.  It can create "nice opaque
             * identifiers", such as "pd2pq5dk9z", suitable for use as persistent
             * identifiers should the supporting organization wish to provide such a
             * level of commitment.  This generator is also capable of holding a simple
             * sequential counter (starting with 1), which some callers may wish to use
             * as an internal number to keep track of minted external identifiers.
             * [ currently accessible only via the count() routine ]
             *
             * In the example template, "pd2" is a constant prefix for an identifier
             * generator capable of producing 70,728,100 identifiers before it runs out.
             * A template has the form "prefix.mask", where 'prefix' is a literal string
             * prepended to each identifier and 'mask' specifies the form of the generated
             * identifier that will appear after the prefix (but with no '.' between).
             * Mask characters are 'd' (decimal digit), 'w' (limited alpha-numeric
             * digit), 'c' (a generated check character that may only appear in the
             * terminal position).
             *
             * Alternatively, if the mask contains an 's' (and no other letters), dbcreate
             * initializes a generator of sequential numbers.  Instead of seemingly random
             * creates sequentially generated number.  Use '0s'
             * to indicate a constant width number padded on the left with zeroes.
             *
             * EOD;
             */
        }

        /**
         * Helper to manage help messages.
         *
         * @return int 1
         */
        private function _init_help()
        {
            $info = &$this->_info;

            // For convenient maintenance, we store individual topics in separate
            // array elements.  So as not to slow down script start up, we don't
            // pre-load anything.  In this way only the requester of help info,
            // who does not need speed for this purpose, pays for it.
            $this->_valid_help_topics += self::VALID_COMMANDS;
            $info = array();
            $info['bind/brief'] = <<<'EOD'

   noid bind How Id Element Value    # to bind an Id's Element, where
      How is set|add|insert|new|replace|mint|append|prepend|delete|purge.
      Use an Id of :idmap/Idpattern, Value=PerlReplacementPattern so that
      fetch returns variable values.  Use ":" as Element to read Elements
      and Values up to a blank line from stdin (up to EOF with ":-").

EOD;
            $info['bind'] = '';
            $info['dbinfo/brief'] = '';
            $info['dbinfo'] = '';
            $info['dbcreate/brief'] = <<<'EOD'

   noid dbcreate [ Template (long|-|short) [ NAAN NAA SubNAA ] ]
      where Template=prefix.Tmask, T=(r|s|z), and mask=string of (e|d|k)

EOD;
            $info['dbcreate'] = <<<'EOD'

To create an identifier minter governed by Template and Term ("long" or "-"),

   noid dbcreate [ Template Term [ NAAN NAA SubNAA ] ]

The Template gives the number and form of generated identifiers.  Examples:

    .rddd        minter of random 3-digit numbers that stops after the 1000th
    .zd          sequential numbers without limit, adding new digits as needed
  bc.sdddd       sequential 4-digit numbers with constant prefix "bc"
    .rdedeede    .7 billion random ids, extended-digits at chars 2, 4, 5 and 7
  fk.rdeeek      .24 million random ids with prefix "fk" and final check char

For persistent identifiers, use "long" for Term, and specify the NAAN, NAA,
and SubNAA.  Otherwise, use "-" for Term or omit it.  The NAAN is a globally
registered Name Assigning Authority Number; for identifiers conforming to the
ARK scheme, this is a 5-digit number registered with ark@cdlib.org, or 00000.
The NAA is the character string equivalent registered for the NAAN; for
example, the NAAN, 13030, corresponds to the NAA, "cdlib.org".  The SubNAA
is also a character string, but it is a locally determined and possibly
structured subauthority string (e.g., "oac", "ucb/dpg", "practice_area") that
is not globally registered.

EOD;
            $info['dbimport/brief'] = <<<'EOD'

   noid dbimport src_db_type    # bdb, mysql or xml
       Import all data from src_db_type format database into the database
       specified by -t option.

EOD;
            $info['fetch'] = <<<'EOD'

   noid dbimport src_db_type    # bdb, mysql or xml
       Import all data from src_db_type format database into the database
       specified by -t option.
       Info: src_db_type must differ to -t value.

EOD;
            $info['fetch/brief'] = <<<'EOD'

   noid fetch Id Element …        # fetch/map one or more Elements

EOD;
            $info['fetch'] = <<<'EOD'

To bind,

   noid bind replace fk0wqkb myGoto http://www.cdlib.org/foobar.html

sets "myGoto" element of identifier "fk0wqkb" to a string (here a URL).

EOD;
            $info['get/brief'] = <<<'EOD'

   noid get Id Element …        # fetch/map Elements without labels

EOD;
            $info['get'] = '';
            $info['hello/brief'] = '';
            $info['hello'] = '';
            $info['hold/brief'] = <<<'EOD'

   noid hold (set|release) Id …    # place or remove a "hold" on Id(s)

EOD;
            $info['hold'] = '';
            $info['mint/brief'] = <<<'EOD'

   noid mint N [ Elem Value ]    # to mint N identifiers (optionally binding)

EOD;
            $info['mint'] = '';
            $info['note/brief'] = '';
            $info['note'] = '';
            $info['peppermint/brief'] = '';
            $info['peppermint'] = '';
            $info['queue/brief'] = <<<'EOD'

   noid queue (now|first|lvf|Time) Id …    # queue (eg, recycle) Id(s)
      Time is NU, meaning N units, where U= d(ays) | s(econds).
      With "lvf" (Lowest Value First) lowest value of id will mint first.

EOD;
            $info['queue'] = '';
            $info['validate/brief'] = <<<'EOD'

   noid validate Template Id …    # to check if Ids are valid
      Use Template of "-" to use the minter's native template.

EOD;
            $info['validate'] = '';
            return 1;
        }

        /**
         * Return info about user running noid.
         *
         * @param bool $web
         *
         * @return string
         */
        private function who_are_you($web = FALSE)
        {
            $user = NULL;
            if($web){
                $user = $_SERVER['REMOTE_USER'] ? : '';
                $host = ($_SERVER['REMOTE_HOST'] ? : $_SERVER['REMOTE_ADDR']) ? : '';
                $user .= '@' . $host;
            }

            // Look up by REAL_USER_ID first.
            $username = get_current_user();
            if(empty($username)){
                return '';
            }
            $groupInfo = function_exists('posix_getgrgid')
                ? posix_getgrgid(getmygid())
                : NULL;
            $groupname = $groupInfo ? $groupInfo['name'] : '';
            $ugid = $username . '/' . $groupname;

            // If EFFECTIVE_USER_ID differs from REAL_USER_ID, get its info too.
            if(function_exists('posix_geteuid')){
                $uid = getmyuid();
                $euid = posix_geteuid();
                if($uid != $euid){
                    $eUserInfo = posix_getpwuid($euid);
                    if(empty($eUserInfo)){
                        return '';
                    }
                    $eUsername = $eUserInfo['name'];
                    $eGroupId = posix_getegid();
                    $eGroupInfo = posix_getgrgid($eGroupId);
                    $eGroupname = $eGroupInfo ? $eGroupInfo['name'] : '';
                    $ugid .= " ($eUsername/$eGroupname)";
                }
            }

            $user = $user ? "$user $ugid" : $ugid;
            return $user;
        }

        /** These methods replaces Perl dependancies (GetOpt::Long, Text::ParseWord). */

        /**
         * Get options from the command line or the web request.
         *
         * @link https://php.net/manual/en/function.getopt.php#113015
         * @return array|bool
         */
        private function getoptreq()
        {
            // Command line.
            if(PHP_SAPI === 'cli' || empty($_SERVER['REMOTE_ADDR'])){
                $shortopts = join('', array_keys(self::OPTS));
                $longopts = array_values(self::OPTS);
                $result = getopt($shortopts, $longopts);
                if($result === FALSE){
                    return FALSE;
                }
            }
            // Web script.
            else if(isset($_REQUEST)){
                $result = array();

                $shortopts = array_keys(self::OPTS);
                $longopts = array_values(self::OPTS);
                $opts = array_merge($shortopts, $longopts);

                foreach($opts as $opt){
                    // Optional.
                    if(substr($opt, -2) === '::'){
                        $key = substr($opt, 0, -2);

                        if(isset($_REQUEST[$key]) && !empty($_REQUEST[$key])){
                            $result[$key] = $_REQUEST[$key];
                        }
                        else if(isset($_REQUEST[$key])){
                            $result[$key] = FALSE;
                        }
                    }
                    // Required value.
                    else if(substr($opt, -1) === ':'){
                        $key = substr($opt, 0, -1);

                        if(isset($_REQUEST[$key]) && !empty($_REQUEST[$key])){
                            $result[$key] = $_REQUEST[$key];
                        }
                    }
                    // No value.
                    else if(ctype_alnum($opt)){
                        if(isset($_REQUEST[$opt])){
                            $result[$opt] = FALSE;
                        }
                    }
                }
            }
            // Error.
            else{
                return FALSE;
            }

            return $result;
        }

        /**
         * Remove options from the arguments.
         *
         * The command is returned as first value of the array, as in $argv.
         * Adapted from https://php.net/manual/en/function.getopt.php#100573
         *
         * @param array $arguments From $argv or $_REQUEST (with command as first).
         * @param array $options   Extracted options from arguments via getopt().
         *
         * @return array
         */
        private function getOnlyArguments(array $arguments, array $options)
        {
            $args = array_search('--', $arguments);
            if($args){
                return array_splice($arguments, ++$args);
            }

            $pruneargv = array();
            foreach($options as $option => $item){
                $values = is_array($item) ? $item : array($item);
                foreach($values as $value){
                    foreach($arguments as $key => $chunk){
                        $regex = '/^' . (strlen($option) > 1 ? '--' : '-') . $option . '/';
                        if($chunk == $value && $arguments[$key - 1][0] == '-' ||
                            preg_match($regex, $chunk)){
                            array_push($pruneargv, $key);
                        }
                    }
                }
            }
            while($key = array_pop($pruneargv)){
                unset($arguments[$key]);
            }

            return array_values($arguments);
        }

        /**
         * Unify short and long options as long options.
         *
         * @param array $options
         *
         * @return array Unified options.
         */
        private function unifyOptionsAsLong($options)
        {
            $result = array();
            $shortopts = array_map(
                function($v){
                    return rtrim($v, ':');
                },
                array_keys(self::OPTS));
            $longopts = array_map(
                function($v){
                    return rtrim($v, ':');
                },
                self::OPTS);
            $opts = array_combine($shortopts, $longopts);
            foreach($options as $key => $value){
                // Check if the key is a short option.
                if(isset($opts[$key])){
                    $key = $opts[$key];
                }
                $result[$key] = $value;
            }
            return $result;
        }
    }
}

/** @var object $app */
$app = new NoidApp();

// Now, let's go.
$app->run($argv);

// yyy Possible for 'c' mask char:
//     ASCII 33 to 126 (no SPACE, no DEL)  --> 94 (not prime)
//     MINUS the 5 chars:  / \ - % "       --> 89 (prime)
//  or MINUS the 5 chars:  / \ - % .       --> 89 (prime)
//  or MINUS the 5 chars:  / \ - % SPACE   --> 89 (prime)
#
// Note: current (1/2004) restrictions on ARKs are alphanums plus
//    = @ $ _ * + #
// with the following reserved for special purposes
//    / . - %

// yyy noid example:  shuffle play (as in random song list)
// yyy bind is pair-wise or triple-wise?  (how to explain consistently)

// yyy add java class to distro
// yyy add pdf of doc to distro

/**
 * __END__
 *
 * =pod
 *
 * =for roff
 * .nr PS 12p
 * .nr VS 14.4p
 *
 * =head1 NAME
 *
 * noid - nice opaque identifier generator commands
 *
 * =head1 SYNOPSIS
 *
 * B<noid> [ B<-f> I<DbDirOrConfig> ] [ B<-t> I<DbType> ] [ B<-vh> ] I<Command> I<Arguments>
 *
 * =head1 DESCRIPTION
 *
 * The B<noid> utility creates minters (identifier generators) and accepts
 * commands that operate them.  Once created, a minter can be used to produce
 * persistent, globally unique names for documents, databases, images,
 * vocabulary terms, etc.  Properly managed, these identifiers can be used as
 * long term durable information object references within naming schemes such
 * as ARK, PURL, URN, DOI, and LSID.  At the same time, alternative minters
 * can be set up to produce short-lived names for transaction identifiers,
 * compact web server session keys, and other ephemera.
 *
 * A B<noid> minter is a lightweight database designed for efficiently
 * generating, tracking, and binding unique identifiers, which are produced
 * without replacement in random or
 * sequential order, and with or without a check character that can be used
 * for detecting transcription errors.  A minter can bind identifiers to
 * arbitrary element names and element values that are either stored or
 * produced upon retrieval from rule-based transformations of requested
 * identifiers, the latter having application in identifier resolution.  Noid
 * minters are very fast, scalable, easy to create and tear down, and have a
 * relatively small footprint.  They use BerkeleyDB as the underlying database.
 *
 * An identifier generated by a B<noid> minter is also known generically
 * as a "noid" (standing for nice opaque identifier and rhyming with void).
 * While a minter can record and bind any identifiers
 * that you bring to its attention, often it is used to generate, bringing
 * to your attention, identifier strings that carry no widely recognizable
 * meaning.  This semantic opaqueness reduces their vulnerability to era-Z<>
 * and language-specific change, and helps persistence by making for
 * identifiers that can age and travel well.
 *
 * The form, number, and intended longevity of a minter's identifiers are given
 * by a Template and a Term supplied when the generator database is created.
 * A supplied Term of "long" establishes extra restrictions and logging
 * appropriate for the support of persistent identifiers.  Across successive
 * minting operations, the generator "uses up" its namespace (the pool of
 * identifiers it is capable of minting) such that no identifier will ever be
 * generated twice unless the supplied Term is "short" and the namespace is
 * finite and completely exhausted.  The default Term is "medium".
 *
 * The B<noid> utility parameters -- flags, I<DbType> (database type),
 * I<DbDirOrConfig> (database location or config), I<Command>, I<Arguments> --
 * are described later under COMMANDS AND MODES.
 * There are also sections covering persistence, templates, rule-based
 * mapping, URL interface, and name resolution.
 *
 * =head1 TUTORIAL INTRODUCTION
 *
 * Once the noid utility is installed, the command,
 *
 * noid dbcreate s.zd
 *
 * will create a minter for an unlimited number of identifiers.
 * It produces a generator for medium term identifiers (the default) with
 * the Template, C<s.zd>, governing the order, number, and form of minted
 * identifier strings.  These identifiers will begin with the constant part
 * C<s> and end in a digit (the final C<d>), all within an unbounded sequential
 * (C<z>) namespace.  The TEMPLATES section gives a full explanation.
 * This generator will mint the identifiers, in order,
 *
 * s0, s1, s2, …, s9, s10, …, s99, s100, …
 *
 * and never run out.  To mint the first ten identifiers,
 *
 * noid mint 10
 *
 * When you're done, on a UNIX platform you can remove that minter with
 *
 * rm -fr NOID
 *
 * Now let's create a more complex minter.
 *
 * noid dbcreate f5.reedeedk long 13030 cdlib.org oac/cmp
 *
 * This produces a generator for long term identifiers that begin with the
 * constant part C<13030/f5>.  Exactly 70,728,100 identifiers will be minted
 * before running out.
 *
 * The 13030 parameter is the registered Name Assigning Authority Number
 * (NAAN) for the assigning authority known as "cdlib.org", and "oac/cmp"
 * is a string chosen by the person setting up this minter to identify the
 * project that will be operating it.  This particular minter generates
 * identifiers that start with the prefix C<f5> in the 13030 namespace.
 * If long term information retention is within the mission of your
 * organization (this includes national and university libraries and archives),
 * you may register for a globally unique NAAN by sending email to ark at
 * cdlib dot org.
 *
 * Identifiers will emerge in "quasi-random" order, each consisting of six
 * characters matching up one-for-one with the letters C<eedeed>.
 *
 * noid mint 1
 *
 * The first identifier should be C<13030/f54x54g11>, with the namespace
 * ranging from a low of C<13030/f5000000s> to a high of C<13030/f5zz9zz94>.
 * You can create a "locations" element under a noid and bind three URLs
 * to it with the command,
 *
 * noid bind set 13030/f54x54g11 locations \
 * 'http://a.b.org/foo|http://c.d.org/bar|http://e.f.org/zaf'
 *
 * The template's final C<k> causes a computed check character to be added
 * to the end of every generated identifier.  It also accounts for why the
 * lowest and highest noids look a little odd on the end.  The final check
 * character allows detection of the most common transcription errors,
 * namely, incorrect entry of one character and the transposition of two
 * characters.  The next command takes three identifiers that someone
 * might ask you about and determines that, despite appearances, only the
 * first is in the namespace of this minter.
 *
 * noid validate - 13030/f54x54g11 13030/f54y54g11 \
 * 13030/f54x45g11
 *
 * To make way for creation of another minter, you can move the entire
 * minter into a subdirectory with the command,
 *
 * mkdir f57 ; mv NOID f57
 *
 * A minter may be set up on a web server, allowing the NAA organization
 * easily to distribute name assigment to trusted parties operating from
 * remote locations.  The URL INTERFACE section describes the procedure in
 * detail.  Once set up, you could mint one identifier by entering a URL such
 * as the following into your web browser:
 *
 * http://foo.ucop.edu/nd/noidu_f57?mint+1
 *
 * Using a different procedure, you can also make your identifier bindings
 * (e.g., location information) visible to the Internet via a few web server
 * configuration directives.  The NAME RESOLUTION section explains this further.
 *
 * =head1 IDENTIFIER - AN ASSOCIATION SUPPORTED BY BINDINGS
 *
 * An identifier is not a string of character data -- an identifier is an
 * association between a string of data and an object.  This abstraction
 * is necessary because without it a string is just data.  It's
 * nonsense to talk about a string's breaking, or about its being strong,
 * maintained, and authentic.  But as a representative of an association,
 * a string can do, metaphorically, the things that we expect of it.
 *
 * Without regard to whether an object is physical, digital, or conceptual,
 * to identify it is to claim an association between it and a representative
 * string, such as "Jane" or "ISBN 0596000278".  What gives your claim
 * credibility is a set of verifiable assertions, or metadata, about the
 * object, such as age, height, title, or number of pages.  Verifiability is
 * outside the scope of the noid utility, but you can use a minter to record
 * assertions supporting an association by binding arbitrary named elements
 * and values to the identifier.  Noid database elements can be up to 4
 * gigabytes in length, and one noid minter is capable of recording billions
 * of identifiers.
 *
 * You don't have to use the noid binding features at all if you prefer to
 * keep track of your metadata elsewhere, such as in a separate database
 * management system (DBMS) or on a sheet of paper.  In any case, for each
 * noid generated, the minter automatically stores its own lightweight
 * "circulation" record asserting who generated it and when.  If most of
 * your metadata is maintained in a separate database, the minter's own
 * records play a back up role, providing a small amount of redundancy that
 * may be useful in reconstructing database bindings that have become damaged.
 *
 * An arbitrary database system can complement a noid minter without any
 * awareness or dependency on noids.  On computers, identifier bindings are
 * typically managed using methods that at some point map identifier strings
 * to database records and/or to filesystem entries (effectively using the
 * filesystem as a DBMS).  The structures and logistics for bindings
 * maintenance may reside entirely with the minter database, entirely outside
 * the minter database, or anywhere in between.  An individual organization
 * defines whatever maintenance configuration suits it.
 *
 * =head1 PERSISTENCE
 *
 * A persistent identifier is an identifier that an organization commits to
 * retain in perpetuity.  Associations, the I<sine qua non> of identifiers,
 * last only as long as they (in particular, their bindings) are maintained.
 * Often maintaining identifiers goes hand in hand with controlling the
 * objects to which they are bound.  No technology exists that automatically
 * manages objects and associations; persistence is a matter of service
 * commitment, tools that support that commitment, and information that
 * allows users receiving identifiers to make the best judgment regarding
 * an organization's ability and intention to maintain them.
 *
 * It will be normal for organizations to maintain their own assertions
 * about identifiers that you issue, and vice versa.  In general there is
 * nothing to prevent discrepancies among sets of assertions.  Effectively,
 * the association -- the identifier -- is in the eye of the beholder.  As a
 * simple example, authors create bibliography entries for cited works, and
 * in that process they make their claims, often with small errors, about
 * such things as the author and title of the identified thing.  It is common
 * for a provider of an identifier-driven service such as digital object
 * retrieval to allow users to review its own, typically better-maintained
 * sets of identifier assertions (i.e., metadata), even if it minted none
 * of the identifiers
 * that it services.  We call such an organization a Name Mapping Authority
 * (NMA) because it "maps" identifiers to services.  It is possible for an
 * NMA to service identifiers even if it neither hosts nor controls any
 * objects.
 *
 * It will also be normal for archiving organizations to maintain their own
 * peculiar ideas about what persistence means.  Different flavors will
 * exist even within one organization, where, for example, it may be
 * appropriate to apply corrections to persistent objects of one category,
 * to never change objects of another, and to remove objects of a third
 * category with a promise never to re-assign those objects' identifiers.
 * One institution will guarantee persistence for certain
 * things, while the strongest commitment made by some
 * prominent archives will be "reasonable effort".  Given the range of
 * possibilities, a memory organization will need to record not only the
 * identities but also the support policies for objects in its care.
 * Any database, including a noid minter, can be used for this purpose.
 *
 * For persistence across decades or centuries, opinions regarding an
 * object's identity and commitments made to various copies of it will
 * tend naturally to become more diverse.  An object may have been inherited
 * through a chain of stewardship, subtle identity changes, and peaks of
 * renewed interest
 * stretching back to a completely unrelated and now defunct organization
 * that created and named it.  For its original identifier to have persisted
 * across the intervening years, it must look the same as when first minted.
 * At that particular time, global uniqueness required the minted identifier
 * to bear the imprint of the issuing organization (the NAA, or Name
 * Assigning Authority), which long ago ceased to have any responsibility
 * for its persistence.  There is thus no conflict in a mapping authority
 * (NMA) servicing identifiers that originate in many different assigning
 * authorities.
 *
 * These notions of flavors of persistence and separation of name authority
 * function are built into the ARK (Archival Resource Key) identifier scheme
 * that the B<noid> utility was partly created to support.  By design, noid
 * minters also work within other schemes in recognition that persistence
 * has nothing to do with identifier syntax.  Opaque identifiers can be used
 * by any application needing to reduce the liability created when identifier
 * strings contain linguistic fragments that, however appropriate or even
 * meaningless they are today, may one day create confusion, give offense,
 * or infringe on a trademark as the semantic environment around us and our
 * communities evolves.  If employed for persistence, noid minters ease the
 * unavoidable costs of long term maintenance by having a small technical
 * footprint and by being implemented completely as open source software.
 * For more information on ARKs, please see L<http://ark.cdlib.org/> .
 *
 * =head1 COMMANDS AND MODES
 *
 * Once again, the overall utility summary is
 *
 * =over 5
 *
 * B<noid> [ B<-f> I<DbDirOrConfig> ] [ B<-t> I<DbType> ] [ B<-vh> ] I<Command> I<Arguments>
 *
 * =back
 *
 * In all invocations, output is intended to be both human- and
 * machine-readable.  Batch operations are possible, allowing multiple
 * minting and binding commands within one invocation.  In particular,
 * if I<Command> is given as a "-" argument, then actual I<Commands>
 * are read in bulk from the standard input.
 *
 * The string, I<DbDirOrConfig>, specifies the directory where the database
 * resides. Id it is a config file, its settings are used (the default config
 * file is "config/settings.php").
 * To protect database coherence, it should not be located on a filesystem
 * such as NFS or AFS that doesn't support POSIX file locking semantics.
 * I<DbDirOrConfig> may be given with the B<NOID>
 * environment variable, overridable with the B<-f> option.  If those strings
 * are empty, the name or link name of the B<noid> executable (argv[0] for C
 * programmers) is checked to see if it reveals I<DbDirOrConfig>.  If that check
 * (described next) fails, I<DbDirOrConfig> is taken to be the current directory.
 *
 * To check the name of the executable for I<DbDirOrConfig>, the final pathname
 * component (tail) is examined and split at the first "_" encountered.  If none,
 * the check fails.  Otherwise, the check is considered successful and the
 * latter half is taken as naming I<DbDirOrConfig> relative to the current
 * directory.
 * This mechanism is designed for cases when it is inconvenient to specify
 * I<DbDirOrConfig> (such as in the URL interface) or when you are running
 * several minters at once.  As an example, F</usr/bin/noid_fk9> specifies a
 * I<DbDirOrConfig> of F<fk9>.
 *
 * All files associated with a minter will be organized in a subdirectory,
 * F<NOID>, of I<DbDirOrConfig>; this has the consequence that there can be at
 * most one minter in a directory.  To allow B<noid> to create a new minter in
 * a directory already containing a F<NOID> subdirectory, remove or rename
 * the entire F<NOID> subdirectory.
 *
 * The B<noid> utility may be run as a URL-driven web server application,
 * such as in a CGI that allows name assignment via remote operator.
 * If the executable begins B<noidu…>, the noid URL mode is in effect.
 * Input parameters, separated by a "+" sign, are expected to arrive
 * embedded in the query part of a URL, and output will be formatted
 * for display on an ordinary web browser.  An executable of B<noidu_xk4>,
 * for example, would turn on URL mode and set I<DbDirOrConfig> to F<xk4>.
 * This is further described under URL INTERFACE.
 *
 * The B<noid> utility may be run as a name resolver running behind a web
 * server.  If the executable begins B<noidr…>, the noid resolver mode is
 * in effect, which means that commands will be read from the standard input
 * (as if only the "-" argument had been given) and the script output will
 * be unbuffered.  This mode is designed for machine interaction and is
 * intended to be operated by rewriting rules listed in a web server
 * configuration file as described later under NAME RESOLUTION AND
 * REDIRECTION INTERFACE.
 *
 * At minter creation time, a report summarizing its properties is produced
 * and stored in the file, F<NOID/README>.  This report may be useful to the
 * organization articulating the operating policy of the minter.  In a formal
 * context, such as the creation of a minter for long term identifiers, that
 * organization is the Name Assigning Authority.
 *
 * The B<-v> option prints the current version of the B<noid> utility and
 * B<-h> prints a help message.
 *
 * In the I<Command> list below, capitalized symbols indicate values to be
 * replaced by the caller.
 * Optional arguments are in [brackets] and (A|B|C) means one of A or B or C.
 *
 * =over 4
 *
 * =item B<noid dbcreate> [ I<Template> [ I<Term> [ I<NAAN NAA SubZ<>NAA> ] ] ]
 *
 * Create a database that will mint (generate) identifiers according to the
 * given Template and Term.  As a side-effect this causes the creation of
 * a directory, F<NOID>, within I<DbDirOrConfig>.  If you have several
 * generators, it may be convenient to operate each from within a
 * I<DbDirOrConfig> that uniquely identifies each Template; for example, you
 * might change to a directory that you named F<fk6> after the Template
 * C<fk.rdeedde> ("fk" followed by 6 variable characters) of the minter that
 * resides there.
 *
 * The Term declares whether the identifiers are intended to be "long",
 * "medium" (the default), or "short".  A short term identifier minter is
 * the only one that will re-mint identifiers after the namespace is
 * exhausted, simply returning the oldest previously minted identifier.
 * As mentioned earlier, however, some namespaces are unbounded and never
 * run out of identifiers.
 *
 * If Term is "long", the arguments NAAN, NAA, and SubZ<>NAA are required,
 * and all minted identifiers will be returned with the NAAN and a "/"
 * prepended to them.  The NAAN is a namespace identifier and should be a
 * globally unique Name Assigning Authority (NAA) number.  Apply for one
 * by email to ark@cdlib.org, or for testing purposes, use "00000" as a
 * non-unique NAAN.
 *
 * The NAA argument is the character string equivalent for the NAAN; for
 * example, 13960 corresponds to the NAA, "archive.org".  The SubNAA argument
 * is also a character string, but is a locally determined and possibly
 * structured subauthority string (e.g., "oac", "ucb/dpg", "practice_area")
 * that is not globally registered.
 *
 * If Template is not supplied, the minter freely binds any identifier that
 * you submit without validating it first.  In this case it also mints
 * medium term identifiers under the default Template, C<.zd>.
 *
 * =item B<noid mint> I<N> [ I<Element Value> ]
 *
 * Generate N identifiers.  If other arguments are specified, for each
 * generated noid, add the given Element and bind it to the given Value.
 * [Element-Value binding upon minting is not implemented yet.]
 *
 * There is no "unmint" command.  Once an identifier has been circulated in
 * the outside world, it may be hard to withdraw because external users and
 * systems will have bound it with their own assertions.
 * Even within the minting organization, removing all of the identifier's
 * supporting bindings could entail actions such as file deletion that are
 * outside the scope of the minter.  While there is no command capable of
 * withdrawing a circulated identifier, it is nonetheless easy to B<queue>
 * an identifier for reminting and to B<hold> it against the possibility of
 * minting at all.  Identifiers that are long term should be treated as
 * non-renewable resources except when you are absolutely sure about
 * recycling them.
 *
 * =item B<noid peppermint> I<N> [ I<Element Value> ]
 *
 * [This command is not implemented yet.]
 * Generate N "peppered" identifiers.  A peppered identifier is a regular
 * identifier concatenated with a "!" character and a randomly generated
 * cookie -- the pepper -- which serves as a kind of per-identifier password.
 * (Salt is a technical term for some extra data that makes it harder to
 * crack encrypted values; we use pepper for some extra data that makes it
 * harder to crack unencrypted values.)  To provide an extra level of
 * database security, the base identifier, which is everything up to the "!",
 * should be used in all public communication, but the complete peppered
 * identifier is required for all noid operations that would change values
 * in the database.
 *
 * As with the B<mint> command, if other arguments are specified, for each
 * generated noid, add the given Element and bind it to the given Value.
 *
 * =item B<noid bind> I<How Id Element Value>
 *
 * For the given Id, bind the Element to Value according to How.  The
 * Element and Value may be arbitrary strings.  There are two reserved
 * Element names allowing Values to be entered that are too large or
 * syntactically inconvenient (depending on the calling environment's
 * quoting restrictions) to pass in as command-line tokens.
 *
 * If the Element is ":" and no Value is present, lines are read
 * from the standard input up to a blank line; they will contain
 * Element-colon-Value pairs in essentially email header format,
 * with long values continued on indented lines.  If the Element is ":-"
 * and no Value is present, lines are read from the standard input up
 * to end-of-file; the first non-comment, non-blank line must have an
 * Element-colon to specify an Element name, and all the remaining input
 * (up to EOF) is taken as its corresponding Value.  Lines beginning
 * with "#" are considered "comment" lines and are skipped.
 *
 * =for later XXX test the "and no Value is present" in the both cases above
 *
 * The I<How> argument specifies one of the following kinds of binding.
 * Of these, the B<set>, B<add>, B<insert>, and B<purge> kinds "don't care"
 * if there is no current binding.
 *
 * =over 6
 *
 * =item B<new>
 *
 * Only if Element does not exist, create a new binding.
 *
 * =item B<replace>
 *
 * Only if Element exists, undo any old bindings and create a new binding.
 *
 * =item B<set>
 *
 * Means B<new> or, failing that, B<replace>.
 *
 * =item B<append>
 *
 * Only if Element exists, place Value at the end of the old binding.
 *
 * =item B<add>
 *
 * Means B<new> or, failing that, B<append>.
 *
 * =item B<prepend>
 *
 * Only if Element exists, place Value at the beginning of the old binding.
 *
 * =item B<insert>
 *
 * Means B<new> or, failing that, B<prepend>.
 *
 * =item B<delete>
 *
 * Remove any trace of Element, returning an error if it did not exist to
 * begin with.
 *
 * =item B<purge>
 *
 * Remove any trace of Element, returning success whether or not it existed
 * to begin with.
 *
 * =item B<mint>
 *
 * Means B<new>, but ignore the Id argument (actually, confirm that it
 * was given as B<new>) and mint a new Id first.
 *
 * =item B<peppermint>
 *
 * [This kind of binding is not implemented yet.]
 * Means B<new>, but ignore the Id argument (B<new>)
 * and peppermint a new Id first.
 *
 * =back
 *
 * The RULEZ<>-Z<>BASED MAPPING section explains how to set up retrieval
 * using non-stored values.
 *
 * =item B<noid fetch> I<Id> [ I<Element> … ]
 *
 * For the noid, Id, print with labels all bindings for the given
 * Elements.  If no Element is given, find and print all bindings for the
 * given Id.  This is the verbose version of the B<get> command, in that it
 * prints headers and labels for everything it finds.
 *
 * =item B<noid get> I<Id> [ I<Element> … ]
 *
 * For the noid, Id, print without labels all bindings for the given
 * Elements.  If no Element is given, find and print all bindings for the
 * given Id.  This is the quiet version of the B<fetch> command, in that it
 * suppresses all headers and labels.  Between each Element requested,
 * the output will be separated by a blank line.
 *
 * =item B<noid hold> (B<set>|B<release>) I<Id> …
 *
 * Place or remove a B<hold> on one or more Ids.  A hold placed on an Id that
 * has not been minted will cause it to be skipped when its turn to be minted
 * comes around.  A hold placed on an Id that has been minted will make it
 * impossible to queue (typically for recycling).  Minters of long term
 * identifiers automatically place a hold on every minted noid.
 * Holds can be placed or removed manually at any time.
 *
 * =item B<noid queue> (B<now>|B<first>|B<lvf>|I<Time>) I<Id> …
 *
 * Queue one or more Ids for minting.  Time is a number followed by units,
 * which can be B<d> for days or B<s> for seconds (the default units).  This
 * can be used to recycle noids B<now> or after a delay period.  With
 * B<first>, the Id(s) will be queued such that they will be minted before
 * any of the time-delayed entries.  With B<lvf> (Lowest Value First), the
 * lowest valued identifier (intended for use with numeric identifiers) will
 * be taken from the queue for minting before all others.  [ needs testing ]
 *
 * =for comment XXX is lvf tested enough?
 *
 * =item B<noid validate> (I<Template>|B<->) I<Id> …
 *
 * Validate one or more Ids against a given Template, which, if given as "-",
 * causes the minter's native I<Template> to be used.
 *
 * =back
 *
 * =head1 TEMPLATES
 *
 * A Template is a coded string of the form Prefix.Mask that is given to
 * the noid B<dbcreate> command to govern how identifiers will be minted.
 * The Prefix, which may be empty, specifies an initial constant string.
 * For example, upon database creation, in the Template
 *
 * tb7r.zdd
 *
 * the Prefix says that every minted identifier will begin with the literal
 * string C<tb7r>.  Each identifier will end in at least two digits (C<dd>),
 * and because of the C<z> they will be sequentially generated without limit.
 * Beyond the first 100 mint operations, more digits will be added as needed.
 * The minted noids will be, in order,
 *
 * tb7r00, tb7r01, …, tb7r100, tb7r101, …, tb7r1000, …
 *
 * The period (".") in the Template does not appear in the identifiers but
 * serves to separate the constant first part (Prefix) from the variable
 * second part (Mask).  In the Mask, the first letter determines either
 * random or sequential ordering and the remaining letters each match up
 * with characters in a generated identifier.  Perhaps the best way to
 * introduce templates is with a series of increasingly complex examples.
 *
 * =over 12
 *
 * =item C<.rddd>
 *
 * to mint random 3-digit numbers, stopping after 1000th
 *
 * =item C<.sdddddd>
 *
 * to mint sequential 6-digit numbers, stopping after millionth
 *
 * =item C<.zd>
 *
 * sequential numbers without limit, adding new digits as needed
 *
 * =item C<bc.rdddd>
 *
 * random 4-digit numbers with constant prefix C<bc>
 *
 * =item C<8rf.sdd>
 *
 * sequential 2-digit numbers with constant prefix C<8rf>
 *
 * =item C<.se>
 *
 * sequential extended-digits (from 0123456789bcdfghjkmnpqrstvwxz)
 *
 * =item C<h9.reee>
 *
 * random 3-extended-digit numbers with constant prefix C<h9>
 *
 * =item C<.zeee>
 *
 * unlimited sequential numbers with at least 3 extended-digits
 *
 * =item C<.rdedeedd>
 *
 * random 7-char numbers, extended-digits at chars 2, 4, and 5
 *
 * =item C<.zededede>
 *
 * unlimited mixed digits, adding new extended-digits as needed
 *
 * =item C<sdd.sdede>
 *
 * sequential 4-mixed-digit numbers with constant prefix C<sdd>
 *
 * =item C<.rdedk>
 *
 * random 3 mixed digits plus final (4th) computed check character
 *
 * =item C<.sdeeedk>
 *
 * 5 sequential mixed digits plus final extended-digit check char
 *
 * =item C<.zdeek>
 *
 * sequential digits plus check char, new digits added as needed
 *
 * =item C<63q.redek>
 *
 * prefix plus random 4 mixed digits, one of them a check char
 *
 * =back
 *
 * The first letter of the Mask, the generator type, determines the order
 * and boundedness of the namespace.  For example, in the Template C<.sddd>,
 * the Prefix is empty and the C<s> says that the namespace is sequentially
 * generated but bounded.  The generator type may be one of,
 *
 * =over 4
 *
 * =item C<r>
 *
 * for quasi-randomly generated identifiers,
 *
 * =item C<s>
 *
 * for sequentially generated identifiers, limited in length and
 * number by the length of the Mask,
 *
 * =item C<z>
 *
 * for sequentially generated identifiers, unlimited in length or
 * number, re-using the most significant mask character (the second
 * character of the Mask) as needed.
 *
 * =back
 *
 * Although the order of minting is not obvious for C<r> type minters,
 * it is "quasi-random" in the sense that on your machine a minter created
 * with the same Template will always produce the same sequence of noids
 * over its lifetime.  Quasi-random is a shade more predictable than
 * pseudo-random (which, techically, is as random as computers get).
 * This is a feature designed to help noid managers in case they are
 * forced to start minting again from scratch; they simply process their
 * objects over in the same order as before to recover the original
 * assignments.
 *
 * After the generator type, the rest of the Mask determines the form of
 * the non-Prefix part, matching up letter-for-character with each generated
 * noid character (an exception for the C<z> case is described below).
 * In the case of the Template C<xv.sdddd>, the last four C<d> Mask characters
 * say that all identifiers will end with four digits, so the last identifier
 * in the namespace is C<xv9999>.
 *
 * When C<z> is used, the namespace is unbounded and therefore identifiers
 * will eventually need to grow in length.  To accommodate the growth, the
 * second character (C<e> or C<d>) of the Mask will be repeated as often as
 * needed; for instance, when all 4-digit numbers are used up, a 5th digit
 * will be added.  After the generator type character, Mask characters have
 * the following meanings:
 *
 * =over 4
 *
 * =item C<d>
 *
 * a pure digit, one of { 0123456789 }
 *
 * =item C<e>
 *
 * an "extended digit", one of { 0123456789bcdfghjkmnpqrstvwxz }
 * (lower case only)
 *
 * =item C<k>
 *
 * a computed extended digit check character;
 * if present, it must be the final Mask character
 *
 * =back
 *
 * The set of extended digits is designed to help create more compact
 * noids (a larger namespace for the same length of identifier) and
 * discourage "accidental semantics", namely, the introduction of strings that
 * have unintended but commonly recognized meanings.  Opaque identifiers are
 * desirable in many situations and the absence of vowels in extended digits
 * is a step in that direction.  To reduce visual mismatches, there is also
 * no letter "l" (ell) because it is often mistaken for the digit "1".
 *
 * The optional C<k> Mask character, which may only appear at the end, enables
 * detection of cases when a single character is mistyped and when two
 * adjacent characters have been transposed -- the most common transcription
 * errors.  A final C<k> in the Mask will cause a check character to be
 * appended after first computing it on the entire identifier generated so
 * far, including the NAAN if one was specified at database creation time.
 * For example, the final digit C<1> in
 *
 * 13030/f54x54g11
 *
 * was first computed over the string C<13030/f54x54g1> and then added to the end.
 *
 * =head1 RULEZ<>-Z<>BASED MAPPING
 *
 * Any Element may be bound to a class of Ids such that retrieval
 * against that Element for any Id in the class returns a computed value when
 * no stored value exists.  The class of Ids is specified via a regular
 * expression (Perl-style) that will be checked for a match against Ids
 * submitted via a retrieval operation (B<get> or B<fetch>) that names any
 * Element bound in this manner.  If the match succeeds, the element Value
 * that was bound with the Id class is used as the right-hand side of a Perl
 * substitution, and the resulting transformation is returned.  We call this
 * rule-based mapping, and it is probably best explained by working through
 * the examples below.
 *
 * To set up rule-based mapping for an Id class, construct a B<bind> operation
 * with an Id of the form C<:idmap/>I<Idpattern>, where I<Idpattern> is a Perl
 * regular expression.  Then choose an Element name that you wish to have
 * trigger the pattern match check whenever that Element is requested via a
 * retrieval operation and a stored value does NOT exist; any Element will
 * work as long as you use it for both binding and retrieving.  Finally,
 * specify a Value to be used as replacement text that transforms matching
 * Ids into computed values via a Perl s/// substitution.  As a simple example,
 *
 * noid bind set :idmap/^ft redirect g7h
 *
 * would cause any subsequent retrieval request against the Element named
 * "redirect" to try pattern matching when no stored value is found.
 * If the Id begins with "ft", it would then try to replace the "ft" with
 * "g7h" and return the result as if it were a stored value.  So if the Id
 * were C<ft89xr2t>, the command
 *
 * noid get ft89xr2t redirect
 *
 * would return C<g7h89xr2t>.  Fancier substitutions are possible, including
 * replacement patterns that reference subexpressions in the original
 * matching I<Idpattern>.  For example, the second command below,
 *
 * noid bind set ':idmap/^ft([^x]+)x(.*)' my_elem '$2/g7h/$1'
 * noid get ft89xr2t my_elem
 *
 * would return C<r2t/g7h/89>.  For ease of implementation, internally this kind
 * of binding is stored and reported (which can be confusing) as the special
 * noid, C<:idmap/>I<Element>, under element name I<Idpattern>.
 *
 * =head1 URL INTERFACE
 *
 * Any number of minters can be operated behind a web server from a browser
 * or any tool that activates URLs.  This section describes a one-time set up
 * procedure to make your server aware of minters, followed by another set up
 * procedure for each minter.  The one-time procedure involves creating a
 * directory in your web server document tree where you will place one or
 * more noid minter databases.  In this example, the directory is F<htdocs/nd>
 * and we'll assume the B<noid> script was originally installed in
 * F</usr/local/bin>.
 *
 * mkdir htdocs/nd
 * cp -p /usr/local/bin/noid htdocs/nd/
 *
 * The second command above creates an executable copy of the noid script
 * that will be linked to for each minter you intend to expose to the web.
 * To make your server recognize such links, include the line
 *
 * ScriptAliasMatch ^/nd/noidu(.*) "/srv/www/htdocs/nd/noidu$1"
 *
 * in your server configuration file and restart the server before trying the
 * commands that follow.  If you did not install the supporting F<Noid.pm>
 * module normally, you may also have to store a copy of it next to the script.
 * This completes the one-time server set up.
 *
 * Thereafter, for each minter that you wish to expose, it must first be
 * allowed to write to its own database when invoked via the web server.
 * Because it will be running under a special user at that time, before you
 * create it, first become the user that your server runs under.  In this
 * example that user is "wwwrun".
 *
 * cd htdocs/nd
 * su wwwrun
 * noid dbcreate kt.reeded
 * mkdir kt5
 * mv NOID kt5/
 * ln noid noidu_kt5
 *
 * The third command above creates a minter for noids beginning with
 * C<kt> followed by 5 characters.  The minter is then moved into its own
 * directory within F<htdocs/nd>.  Finally, the last command makes a hard
 * link (not a soft link) to the noid script, which for this minter will
 * be invoked under the name B<noidu_kt5>.
 *
 * The URL interface is similar to the command line interface, but
 * I<Commands> are passed in via the query string of a URL where by
 * convention a plus sign ("+") is used instead of spaces to separate
 * arguments.  You will likely want to set up access restrictions (e.g.,
 * with an F<.htaccess> file) so that only the parties you designate
 * can generate identifiers.  There is also no B<dbcreate> command
 * available from the URL interface.
 *
 * To mint one identifier, you could enter the following URL into your
 * web browser, but replace "foo.ucop.edu" with your server's name:
 *
 * http://foo.ucop.edu/nd/noidu_kt5?mint+1
 *
 * Reload to mint again. If you change the 1 to 20, you get twenty new and
 * different noids.
 *
 * http://foo.ucop.edu/nd/noidu_kt5?mint+20
 *
 * To bind some data to an element called "myGoto" under one of the noids
 * already minted,
 *
 * http://foo.ucop.edu/nd/noidu_kt5?
 * bind+set+13030/kt639k9+myGoto+http://foo.ucsd.edu/
 *
 * In this case we stored a URL in "myGoto".  This kind of convention can
 * underly a redirection mechanism that is part of an organization's overall
 * identifier resolution strategy.  To retrieve that stored data,
 *
 * http://foo.ucop.edu/nd/noidu_kt5?get+13030/kt639k9+myGoto
 *
 * Bulk operations can be performed over the web by invoking the URL with a
 * query string of just "-", which will cause the minter to look for noid
 * commands, one per line, in the POST data part of the HTTP request.  If
 * you put noid commands in a file F<myCommands> and run the Unix utility
 *
 * curl --data-binary @myCommands \
 * 'http://foo.ucop.edu/nd/noidu_kt5?-'
 *
 * you could, for example, change the "myGoto" bindings for 500 noids
 * in that one shell command.  The output from each command in the file
 * will be separated from the next (on the standard output) by a blank line.
 *
 * =head1 NAME RESOLUTION AND REDIRECTION INTERFACE
 *
 * In a URI context, I<name resolution> is a computation, sometimes
 * multi-stage, that translates a name into associated information of a
 * particular type, often another name or an address.  A I<resolver> is
 * a system that can perform one or more stages of a resolution.  Noid
 * minters can be set up as resolvers.
 *
 * In our case, we're interested in automatically translating access
 * requests for each of a number of identifiers into requests for another
 * kind of identifier.  This is one tool in the persistent access strategy
 * for naming schemes such as URL, ARK, PURL, Handle, DOI, and URN.  You
 * can use a noid minter to bind a second name to each identifier, even
 * to identifiers that the minter did not generate.  In principle, this
 * will work with names from any scheme.
 *
 * With web browsers, a central mechanism for name resolution is known as the
 * server redirect, and mainstream web servers can easily be configured to
 * redirect a half million different names without suffering in performance.
 * You might choose not to use native web server redirects if you require
 * resolution of several million names, or if you require software and
 * infrastructure for non-URL-based names.  Whatever your choice, maintaining
 * a table that maps the first name to the second is an unavoidable burden.
 *
 * As with the URL interface, any number of resolvers (minters underneath)
 * can be operated behind a web server from a browser or a tool that
 * activates URLs.  This section describes a one-time set up procedure to
 * make your server aware of resolvers, followed by another set up procedure
 * for each resolver.  The one-time procedure involves creating a directory
 * in your web server document tree where you will place one or more noid
 * resolver databases.  In this example (and in the previous example),
 * we use F<htdocs/nd>:
 *
 * mkdir htdocs/nd
 * cp -p /usr/local/bin/noid htdocs/nd/
 *
 * The second command above creates an executable copy of the noid script
 * that will be linked to for each resolver you intend to expose.  To make
 * your server recognize such links, include the line (this is slightly
 * different from the similar line in the previous section),
 *
 * ScriptAliasMatch ^/nd/noidr(.*) "/srv/www/htdocs/nd/noidr$1"
 *
 * in your server configuration file.  If you did not install the supporting
 * F<Noid.pm> module normally, you may also have to store a copy of it next
 * to the script.  Then include the following lines in the configuration file;
 * they form the start of a rewriting rule section that you will add to later
 * for each resolver that you set up.
 *
 * RewriteEngine on
 * # These next two files and their containing
 * # directory should be owned by "wwwrun".
 * RewriteLock   /var/log/rewrite/lock
 * RewriteLog    /var/log/rewrite/log
 * ## RewriteLogLevel 9
 *
 * The non-comment lines above initialize the rewriting system, identify the
 * lock file used to synchronize access to the resolver, and identify the log
 * file which can help in finalizing the exact rewrite rules that you use;
 * disable logging with the default RewriteLogLevel value of 0, or set it as
 * high as 9, with higher numbers producing more detailed information.
 * This completes the one-time server set up for resolvers.
 *
 * Thereafter, for each resolver that you wish to run, you need to set up
 * a noid database and create a link of the form B<noidr…> so that the
 * noid script can be invoked in resolution mode.  Unlike the URL interface,
 * the resolution interface does not itself mint from the underlying minter.
 * A separate URL interface may still be set up to mint and bind identifiers
 * in the resolver database, or minting and binding can take place off the net.
 *
 * In what follows, we will assume that you have set up a noid database with
 * the same location and template as in the previous section.  As before, the
 * server is assumed to run under the user "wwwrun" and the database resides
 * in F<htdocs/nd/kt5>.  As if our intentions included persistent identification,
 * the minter in this example is for generating long term identifiers.
 *
 * cd htdocs/nd
 * noid dbcreate kt.reeded long 13030 cdlib.org dpg
 * mkdir kt5
 * mv NOID kt5/
 * ln noid noidr_kt5
 *
 * The last command makes a new hard link (not a soft link) to the noid
 * script, which for this resolver will be invoked under the name B<noidr_kt5>.
 * The resolution interface is not called by a URL directly, but is invoked
 * once upon server startup, where the B<noidr…> prefix tells it to run in
 * resolution mode.  In this mode it loops, waiting for and responding to
 * individual resolution attempts from the server itself.
 *
 * To set up an individual resolver, define a Rewrite Map followed by a set
 * of Rewrite Rules.  This is done using server configuration file lines as
 * shown in the next example.  As with any change to the file, you will need
 * to restart the server before it will have the desired effect.
 *
 * # External resolution; start program once on server start
 * RewriteMap  rslv           prg:/srv/www/htdocs/nd/noidr_kt5
 * # Main lookup; add artificial prefix for subsequent testing
 * RewriteRule ^/ark:/(13030/.*)$ "_rslv_${rslv:get $1 myGoto}"
 *
 * # Test: redirect [R] if it looks like a redirect
 * RewriteRule ^_rslv_([^:]*://.*)$    $1 [R]
 * # Test: strip prefix; pass through [PT] if intended for us
 * RewriteRule ^_rslv_(/.*)$           $1 [PT]
 * # Test: restore value if lookup failed; let come what may
 * RewriteRule ^_rslv_$                %{REQUEST_URI}
 * # Alternative: redirect failed lookup to a global resolver
 *
 * When a request received by the server matches a Rewrite Rule, an attempt
 * to resolve it via the running B<noidr…> script is made.  In this example,
 * we will need to have bound a string representing a URL to the value for
 * the fixed element name "myGoto" under each identifier that we wish to be
 * resolvable.  Building on the example from the previous section, assume the
 * element "myGoto" holds the same URL as before for the noid C<13030/kt639k9>.
 * A browser retrieval request made by entering or clicking on
 *
 * http://foo.ucop.edu/ark:/13030/kt639k9
 *
 * would then result in a server redirect to
 *
 * http://foo.ucsd.edu/
 *
 * The resolution result for an identifier is whatever the B<get> returns,
 * which could as easily have retrieved a stored value as a rule-based
 * value (allowing you to redirect many similar identifiers with one rule).
 *
 * This approach to resolution does not address resolver discovery.
 * An identifier found in the wild need not easily reveal whether it is
 * actionable or resolvable, let alone which system or resolver to ask.
 * The usual strategy for contemporary (web era) identifier schemes relies
 * on well-known, scheme-dependent resolvers and web proxying of identifiers
 * embedded in URLs.  For example, global resolution for a non-proxied URN
 * or Handle uses an undisclosed internet address, hard-coded into the
 * resolver program, from which to start the resolution process.  An ARK,
 * PURL, or proxied Handle or URN tend to rely on a disclosed starting point.
 * Whatever method is used for discovery, a noid resolver can in principle
 * be used to resolve identifiers from any scheme.
 *
 * =head1 NOID CHECK DIGIT ALGORITHM
 *
 * The following describes the Noid Check Digit Algorithm (NCDA).  Digits
 * in question are actually "extended digits", or I<xdigits>, which form
 * an ordered set of I<R> digits and characters.  This set has radix I<R>.
 * In the examples below, we use a specific set of I<R=29> xdigits.
 *
 * When applied to substrings of well-formed identifiers, where the length
 * of the substring is less than I<R>, the NCDA is "perfect" for single digit
 * and transposition errors, by far the most common user transcription errors
 * (see David Bressoud, Stan Wagon, "Computational Number Theory, 2000, Key
 * College Publishing").  The NCDA is complemented by well-formedness rules
 * that confirm the placement of constant data, including fixed labels and
 * any characters that are not extended digits.  After running the NCDA on
 * the selected substring, the resulting check digit, an xdigit actually,
 * is used either for comparing with a received check digit or for appending
 * to the substring prior to issuing the identifier that will contain it.
 *
 * For the algorithm to work, the substring in question must be less than
 * I<R> characters.  The extended digit set used in the current instance is
 * a sequence of I<R=29> printable characters defined as follows:
 *
 * xdigit:  0  1  2  3  4  5  6  7  8  9  b  c  d  f  g
 * value:  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14
 *
 * xdigit:  h  j  k  m  n  p  q  r  s  t  v  w  x  z
 * value: 15 16 17 18 19 20 21 22 23 24 25 26 27 28
 *
 * Each xdigit in the identifier has the corresponding ordinal value shown.
 * Any character not in the xdigit set is considered in the algorithm to
 * have an ordinal value of zero.
 *
 * A check digit is an xdigit computed from the base substring and then
 * appended to form the "checked substring" (less than I<R+1> characters long).
 * To determine if a received identifier has been corrupted by a single
 * digit or transposition error, the relevant substring is extracted and
 * its last character is compared to the result of the same computation
 * performed on the preceding substring characters.
 *
 * The computation has two steps.  Consider a base substring (no check
 * digit appended) such as
 *
 * C<    13030/xf93gt2        >(base substring)
 *
 * Step 1.  Check that the substring is well-formed, that is, that all
 * non-xdigit characters (often constant character data) are exactly
 * where expected; if not, the substring is not well-formed and the
 * computation aborts.  (This step is required to accommodate characters
 * such as "/" that contribute nothing to the overall computation.)
 *
 * Step 2.  Multiply each character's ordinal value by its position number
 * (starting at position 1), and sum the products.  For example,
 *
 * char: 1   3   0   3   0   /   x   f   9   3   g   t   2
 * ord: 1   3   0   3   0   0  27  13   9   3  14  24   2
 * pos: 1   2   3   4   5   6   7   8   9  10  11  12  13
 * prod: 1 + 6 + 0 +12 + 0 + 0+189+104 +81 +30+154+288 +26=891
 *
 * Step 3.  The check digit is the xdigit whose ordinal value is that sum
 * modulo I<R> (divide the sum by I<R> and take the remainder).
 *
 * In the example, I<891 = 21> mod I<R> (29) and so the check digit is C<q>.
 * This is appended to obtain the "checked substring", which is
 *
 * C<    13030/xf93gt2q       >(substring with check digit appended)
 *
 * What follows is a two-part proof that this algorithm is "perfect" with
 * respect to single digit and transposition errors.
 *
 * B<Lemma 1:>  The NCDA is guaranteed against single-character errors.
 *
 * Proof:  We must prove that if two strings differ in one single character,
 * then the check digit (xdigit) also differs.  If the I<n>-th xdigit's
 * ordinal is I<d> in one string and I<e> in another, the sums of products
 * differ only by
 *
 * (… + nd + …) - (… + ne + …) = n(d - e)
 *
 * The check digits differ only if I<n(d - e)> is not I<0> mod I<R>.  Assume
 * (contrapositively) that I<n(d - e)> does equal I<0> mod I<R>.  First, we
 * know that I<n(d - e)> is not zero because I<n> is positive and I<d> is
 * different from I<e>.  Therefore, there must be at least one positive
 * integer I<i> such that
 *
 * n(d - e) = Ri     =>     (n/i)(d - e) = R
 *
 * Now, because I<R> is prime,
 *
 * either   (a)  n/i = 1    and   d - e = R
 * or       (b)  n/i = R    and   d - e = 1
 *
 * But (a) cannot hold because xdigit ordinals differ by at most I<R-1>.
 * This leaves (b), which implies that there is an integer I<i = n/R>.
 * But since I<R> is prime and I<n> (a position number) is a positive
 * integer less than I<R>, then I<S<0 E<lt> i E<lt> 1>>, which cannot be true.
 * So the check digits must differ.
 *
 * B<Lemma 2:>  The NCDA is guaranteed against transposition of two single
 * characters.
 *
 * Proof:  Non-contributing characters (non-xdigits) transposed with other
 * characters will be detected in Step 1 when checking the constraints for
 * well-formedness (e.g., the "/" must be at position 6 and only at position
 * 6).  Therefore we need only consider transposition of two xdigit
 * characters.  We must prove that if one string has an xdigit of ordinal
 * I<e> in position I<i> and an xdigit of ordinal I<d> in position I<j>,
 * and if another string is the same except for having I<d> in position I<i>
 * and I<e> in position I<j>, then the check digits also differ.  The sums
 * of the products differ by
 *
 * (… + ie + … + jd + …) - (… + id + … + je + …)
 * = (ie + jd) - (id + je) = e(i - j) + d(j - i)
 * =  d(j - i) - e(j - i)  = n(d - e)
 *
 * where I<< n = j - i > 0 >> and I<< n < R >>.  The check digits differ
 * only if I<n(d - e) = 0> mod I<R>.  This reduces to the central statement
 * of Lemma 1, which has been proven.
 *
 * =head1 TO DO
 *
 * Add features that are documented but not implemented yet:  Element-Value
 * binding upon minting; the B<peppermint> command.  The B<append> and
 * B<prepend> kinds of binding currently have string-level semantics
 * (new data is added as characters to an existing element); should there
 * also be list-level semantics (new data added as an extra subelement)?
 *
 * Add extra options for B<dbcreate>.  An option to specify one or more
 * identifier labels to strip from requests, and one canonical label to
 * add upon minting and reporting.  An option to set the initial seed for
 * quasi-random ordering.  Utilize the granular BerkeleyDB transaction and
 * locking protection mechanisms.
 *
 * Extend the Template Mask to allow for other character repertoires with
 * prime numbers of elements.  These would trade a some eye-friendliness
 * for much more compact identifiers (cf. UUID/GUID), possibly also a way
 * of asking that the last character of the repertoire only appear in the
 * check character (e.g., for i and x below).
 *
 * { 0-9 x }             cardinality 11, mask char i
 * { 0-9 a-f _ }             cardinality 17, mask char x
 * { 0-9 a-z _ }             cardinality 37, mask char v
 * { 1-9 b-z B-Z } - {l, vowels}     cardinality 47, mask char E
 * { 0-9 a-z A-Z # * + @ _ }     cardinality 67, mask char w
 * Visible ASCII - { % - . / \ }      cardinality 89, mask char c
 *
 * Add support for simple file management associated with identifiers.
 * For example, minting (and reminting) the noid C<xv8t984> could result in
 * the creation (and re-creation) of a corresponding canonical directory
 * C<xv/8t/98/4/>.
 *
 * =head1 BETA SOFTWARE
 *
 * This utility is in the beta phase of development.  It is open source
 * software written in the Perl scripting language with strictest type,
 * value, and security checking enabled.  While its readiness for
 * long term application is still being evaluated, it comes with a
 * growing suite of regression tests (currently about 250).
 *
 * =head1 COPYRIGHT AND LICENSE
 *
 * Copyright 2002-2006 UC Regents.  BSD-type open source license.
 *
 * =head1 BUGS
 *
 * Under case-insensitive file systems (e.g., Mac OS X), there is a chance
 * for conflicts between the directory name F<NOID>, script name F<noid>,
 * and module documentation requested (via perldoc) as F<Noid>.
 *
 * Not yet platform-independent.
 *
 * Please report bugs to jak at ucop dot edu.
 *
 * =head1 FILES
 *
 * =over 17
 *
 * =item F<NOID>
 *
 * directory containing all database files related to a minter
 *
 * =item F<NOID/noid.bdb>
 *
 * the BerkeleyDB database file at the heart of a minter
 *
 * =item F<NOID/README>
 *
 * the creation record containing minter analysis details
 *
 * =back
 *
 * =head1 SEE ALSO
 *
 * L<dbopen(3)>, L<perl(1)>, L<uuidgen(1)>, L<http://www.cdlib.org/inside/diglib/ark/>
 *
 * =head1 AUTHORS
 *
 * John A. Kunze, Michael A. Russell
 *
 * =head1 PREREQUISITES
 *
 * Perl Modules: L<Noid>, L<BerkeleyDB>, L<Config>, L<Text::ParseWords>,
 * L<Getopt::Long>, L<Fcntl>, L<Sys::Hostname>
 *
 * Script Categories:
 *
 * =pod SCRIPT CATEGORIES
 *
 * CGI
 * UNIX : System_administration
 * Web
 *
 * =cut
 */