/
htmlex-filter.pl
executable file
·85 lines (75 loc) · 1.41 KB
/
htmlex-filter.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#! /usr/bin/env perl
# htmlex-filter.pl
# Copyright (C) 2003 by David Capello
#
# This file is released under the terms of the MIT license.
# Read LICENSE.txt for more information.
#
# Usage:
# htmlex file.htex | htmlex-filter.pl > file.html
#
# Remove whitespaces outside <pre></pre> tags.
my $c, $chr, $len, $pre, $space;
$pre = 0;
LINE: while (<STDIN>) {
# remove trailing whitespaces
s/[\ \t]+$//g;
# try a fast mode
if (!$pre) {
# nothing in this line?
next LINE if /^[\ \t\r\n]+$/;
# check is this line doesn't have <pre> tag
if (!($_ =~ /\<[pP][rR][eE]\>/)) {
# remove leading whitespaces
s/^[\ \t]+//g;
# remove duplicate whitespaces
s/[\ \t]+/ /g;
print $_;
next LINE;
}
}
$len = length;
$space = 1;
for ($c=0; $c<$len; $c++) {
$chr = substr ($_, $c, 1);
if ($chr =~ /[\ \t\r\n]+/) {
if (!$pre) {
if (!$space) {
$space = 1;
if ($chr eq "\t") {
print (" ");
}
else {
print ($chr);
}
}
}
else {
print ($chr);
}
}
else {
if ($space) {
$space = 0;
}
if ($chr eq '<') {
if (substr ($_, $c, 5) =~ /\<[pP][rR][eE]\>/) {
print ("<pre>");
$pre++;
$c += 4;
}
elsif (substr ($_, $c, 6) =~ /\<\/[pP][rR][eE]\>/) {
print ("</pre>");
$pre--;
$c += 5;
}
else {
print ($chr);
}
}
else {
print ($chr);
}
}
}
}