Browse files

Small performance optimisation to normalise() function. Its called a …

…lot in the parser, so worth speeding up.
  • Loading branch information...
1 parent c4cb779 commit 718846f87dcb602021ce86feb12bc6456e442310 dgheath committed Aug 5, 2009
Showing with 5 additions and 9 deletions.
  1. +5 −9 page.php
View
14 page.php
@@ -530,16 +530,12 @@ function getField($selector) {
* @access public
*/
static function normalise($html) {
- $text = preg_replace('|<!--.*?-->|si', '', $html);
- $text = preg_replace('|<script[^>]*>.*?</script>|si', '', $text);
- $text = preg_replace('|<option[^>]*>.*?</option>|si', '', $text);
- $text = preg_replace('|<textarea[^>]*>.*?</textarea>|si', '', $text);
- $text = preg_replace('|<img[^>]*alt\s*=\s*"([^"]*)"[^>]*>|', ' \1 ', $text);
- $text = preg_replace('|<img[^>]*alt\s*=\s*\'([^\']*)\'[^>]*>|', ' \1 ', $text);
- $text = preg_replace('|<img[^>]*alt\s*=\s*([a-zA-Z_]+)[^>]*>|', ' \1 ', $text);
- $text = preg_replace('|<[^>]*>|', '', $text);
+ $text = preg_replace('#<!--.*?-->#si', '', $html);
+ $text = preg_replace('#<(script|option|textarea)[^>]*>.*?</\1>#si', '', $text);
+ $text = preg_replace('#<img[^>]*alt\s*=\s*("([^"]*)"|\'([^\']*)\'|([a-zA-Z_]+))[^>]*>#', ' \2\3\4 ', $text);
+ $text = preg_replace('#<[^>]*>#', '', $text);
$text = html_entity_decode($text, ENT_QUOTES);
- $text = preg_replace('|\s+|', ' ', $text);
+ $text = preg_replace('#\s+#', ' ', $text);
return trim(trim($text), "\xA0"); // TODO: The \xAO is a &nbsp;. Add a test for this.
}
}

0 comments on commit 718846f

Please sign in to comment.