/
tidyword.phx.php
79 lines (59 loc) · 2.34 KB
/
tidyword.phx.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
<?php
// Get the Word infested input
$text = $output;
// Remove font tags
$text = strip_selected_tags($text, "<font>");
// Remove weird quotes and accents
// http://uk3.php.net/manual/en/function.preg-replace.php#64828
$text = preg_replace('/([\xc0-\xdf].)/se', "'&#' . ((ord(substr('$1', 0, 1)) - 192) * 64 + (ord(substr('$1', 1, 1)) - 128)) . ';'", $text);
$text = preg_replace('/([\xe0-\xef]..)/se', "'&#' . ((ord(substr('$1', 0, 1)) - 224) * 4096 + (ord(substr('$1', 1, 1)) - 128) * 64 + (ord(substr('$1', 2, 1)) - 128)) . ';'", $text);
// Strip inline styles
$text = strip_styles($text);
// Remove class="MsoNormal"
$text = str_replace('class="MsoNormal"', '', $text);
// Return it
return $text;
/**
* strip_selected_tags ( string str [, string strip_tags[, strip_content flag]] )
* ---------------------------------------------------------------------
* Like strip_tags() but inverse; the strip_tags tags will be stripped, not kept.
* strip_tags: string with tags to strip, ex: "<a><p><quote>" etc.
* strip_content flag: TRUE will also strip everything between open and closed tag
* http://uk3.php.net/manual/en/function.preg-replace.php#71266
*/
function strip_selected_tags($str, $tags = "", $stripContent = false)
{
preg_match_all("/<([^>]+)>/i", $tags, $allTags, PREG_PATTERN_ORDER);
foreach ($allTags[1] as $tag) {
$replace = "%(<$tag.*?>)(.*?)(<\/$tag.*?>)%is";
if ($stripContent) {
$str = preg_replace($replace,'',$str);
}
$str = preg_replace($replace,'${2}',$str);
}
return $str;
}
// Remove styles
// http://uk3.php.net/manual/en/function.preg-replace.php#63219
function strip_styles($source=NULL) {
$exceptions = str_replace(',', '|', 'text-align');
/* First we want to fix anything that might potentially break the styler stripper, sow e try and replace
* in-text instances of : with its html entity replacement.
*/
function Replacer($text) {
$check = array (
'@:@s',
);
$replace = array(
':',
);
return preg_replace($check, $replace, $text[0]);
}
$source = preg_replace_callback('@>(.*)<@Us', 'Replacer', $source);
$regexp = '@([^;"]+)?(?<!'. $exceptions. ')(?<!\>\w):(?!\/\/(.+?)\/|<|>)((.*?)[^;"]+)(;)?@is';
$source = preg_replace($regexp, '', $source);
$source = preg_replace('@[a-z]*=""@is', '', $source);
return $source;
}
?>
?>