Major restructuring of all Text_Wiki parsing and rendering rules is r…

…eflected by the addition of theses files. Of note: * Rules are now split up: one class for parse, another class for each render-format * Formats themselves (XHTML, DocBook, Plain, etc) have their own class and get their own pre- and post-render methods * Instead of set/getRuleConf(), mostly use set/getRenderConf() instead * Wikilinks parser uses numbers as lower-case letters * List parser has better token type names * Xhtml List renders with proper semantics for nested lists (per notes from Michael Wallner and others) * Xhtml List and Table renderers apply CSS classes now instead of inline attributes * Renamed all Entities rules to Translatehtml (more accurately descriptive) * In Translatehtml parser, added conf option for translation table type (HTML_ENTITIES, HTML_SPECIALCHARS, etc) -- helps with i18n * In abstract parser, renamed getOptions() to getAttrs() (more accurately descriptive for parsing markup attributes) git-svn-id: https://svn.php.net/repository/pear/packages/Text_Wiki/trunk@160658 c90b9560-bf6c-de11-be94-00142212c4b1
horde · Jun 6, 2004 · 241808f · 241808f
1 parent c38d93f
commit 241808f
Show file tree

Hide file tree

Showing 71 changed files with 5,143 additions and 0 deletions.
diff --git a/Text/Wiki/Parse.php b/Text/Wiki/Parse.php
@@ -0,0 +1,253 @@
+<?php
+
+/**
+* 
+* Baseline rule class for extension into a "real" parser component.
+* 
+* Text_Wiki_Rule classes do not stand on their own; they are called by a
+* Text_Wiki object, typcially in the transform()method. Each rule class
+* performs three main activities: parse, process, and render.
+* 
+* The parse() method takes a regex and applies it to the whole block of
+* source text at one time. Each match is sent as $matches to the
+* process() method.
+* 
+* The process() method acts on the matched text from the source, and
+* then processes the source text is some way.  This may mean the
+* creation of a delimited token using addToken().  In every case, the
+* process() method returns the text that should replace the matched text
+* from parse().
+* 
+* @author Paul M. Jones <pmjones@ciaweb.net>
+* 
+* @package Text_Wiki
+* 
+* $Id$
+* 
+*/
+
+class Text_Wiki_Parse {
+
+
+	/**
+	* 
+	* Configuration options for this parser rule.
+	* 
+	* @access public
+	* 
+	* @var string
+	* 
+	*/
+
+	var $conf = array();
+
+
+	/**
+	* 
+	* Regular expression to find matching text for this rule.
+	* 
+	* @access public
+	* 
+	* @var string
+	* 
+	* @see parse()
+	* 
+	*/
+
+	var $regex = null;
+
+
+	/**
+	* 
+	* The name of this rule for new token array elements.
+	* 
+	* @access public
+	* 
+	* @var string
+	* 
+	*/
+
+	var $rule = null;
+
+
+	/**
+	* 
+	* A reference to the calling Text_Wiki object.
+	* 
+	* This is needed so that each rule has access to the same source
+	* text, token set, URLs, interwiki maps, page names, etc.
+	* 
+	* @access public
+	* 
+	* @var object
+	*/
+
+	var $wiki = null;
+
+
+	/**
+	* 
+	* Constructor for this parser rule.
+	* 
+	* @access public
+	* 
+	* @param object &$obj The calling "parent" Text_Wiki object.
+	* 
+	*/
+
+	function Text_Wiki_Parse(&$obj)
+	{
+		// set the reference to the calling Text_Wiki object;
+		// this allows us access to the shared source text, token
+		// array, etc.
+		$this->wiki =& $obj;
+
+		// set the name of this rule; generally used when adding
+		// to the tokens array. strip off the Text_Wiki_Parse_ portion.
+		// text_wiki_parse_
+		// 0123456789012345
+		$tmp = substr(get_class($this), 16);
+		$this->rule = ucwords(strtolower($tmp));
+
+		// override config options for the rule if specified
+		if (isset($this->wiki->parseConf[$this->rule]) &&
+			is_array($this->wiki->parseConf[$this->rule])) {
+
+			$this->conf = array_merge(
+				$this->conf,
+				$this->wiki->parseConf[$this->rule]
+			);
+
+		}
+	}
+
+
+	/**
+	* 
+	* Abstrct method to parse source text for matches.
+	*
+	* Applies the rule's regular expression to the source text, passes
+	* every match to the process() method, and replaces the matched text
+	* with the results of the processing.
+	*
+	* @access public
+	* 
+	* @see Text_Wiki_Parse::process()
+	* 
+	*/
+
+	function parse()
+	{
+		$this->wiki->source = preg_replace_callback(
+			$this->regex,
+			array(&$this, 'process'),
+			$this->wiki->source
+		);
+	}
+
+
+	/**
+	* 
+	* Abstract method to generate replacements for matched text.
+	* 
+	* @access public
+	* 
+	* @param array $matches An array of matches from the parse() method
+	* as generated by preg_replace_callback.  $matches[0] is the full
+	* matched string, $matches[1] is the first matched pattern,
+	* $matches[2] is the second matched pattern, and so on.
+	* 
+	* @return string The processed text replacement; defaults to the
+	* full matched string (i.e., no changes to the text).
+	* 
+	* @see Text_Wiki_Parse::parse()
+	* 
+	*/
+
+	function process(&$matches)
+	{
+		return $matches[0];
+	}
+
+
+	/**
+	* 
+	* Simple method to safely get configuration key values.
+	* 
+	* @access public
+	* 
+	* @param string $key The configuration key.
+	* 
+	* @param mixed $default If the key does not exist, return this value
+	* instead.
+	* 
+	* @return mixed The configuration key value (if it exists) or the
+	* default value (if not).
+	* 
+	*/
+
+	function getConf($key, $default = null)
+	{
+		if (isset($this->conf[$key])) {
+			return $this->conf[$key];
+		} else {
+			return $default;
+		}
+	}
+
+
+	/**
+	* 
+	* Extract 'attribute="value"' portions of wiki markup.
+	*
+	* This kind of markup is typically used only in macros, but is useful
+	* anywhere.
+	* 
+	* The syntax is pretty strict; there can be no spaces between the
+	* option name, the equals, and the first double-quote; the value
+	* must be surrounded by double-quotes.  You can escape characters in
+	* the value with a backslash, and the backslash will be stripped for
+	* you.
+	* 
+	* @access public
+	* 
+	* @param string $text The "attributes" portion of markup.
+	* 
+	* @return array An associative array of key-value pairs where the
+	* key is the option name and the value is the option value.
+	* 
+	*/
+
+	function getAttrs($text)
+	{
+		// find the =" sections;
+		$tmp = explode('="', trim($text));
+
+		// basic setup
+		$k = count($tmp) - 1;
+		$attrs = array();
+		$key = null;
+
+		// loop through the sections
+		foreach ($tmp as $i => $val) {
+
+			// first element is always the first key
+			if ($i == 0) {
+				$key = trim($val);
+				continue;
+			}
+
+			// find the last double-quote in the value.
+			// the part to the left is the value for the last key,
+			// the part to the right is the next key name
+			$pos = strrpos($val, '"');
+			$attrs[$key] = stripslashes(substr($val, 0, $pos));
+			$key = trim(substr($val, $pos+1));
+
+		}
+
+		return $attrs;
+
+	}
+}
+?>