@@ -3,177 +3,150 @@ var sys = require('sys'),
3
3
util = require ( 'util' ) ,
4
4
_ = require ( 'underscore' ) ,
5
5
htmlparser = require ( 'htmlparser2' ) ,
6
- xml2js = require ( 'xml2js' ) ,
7
- html = require ( 'html' ) ;
6
+ html = require ( 'html' ) ,
7
+ schema = require ( './schema' ) ,
8
+ elements = schema [ "xs:schema" ] [ "xs:element" ] ,
9
+ complex = schema [ "xs:schema" ] [ "xs:complexType" ] ;
8
10
9
- var xml_parser = new xml2js . Parser ( ) ,
10
- schema ,
11
- elements ,
12
- headers = [ 'h1' , 'h2' , 'h3' , 'h4' , 'h5' , 'h6' ] ,
11
+ var headers = [ 'h1' , 'h2' , 'h3' , 'h4' , 'h5' , 'h6' ] ,
13
12
heirarchy = [ 'bookmaindiv' , 'sect1' , 'sect2' , 'sect3' , 'sect4' , 'sect5' , 'sect6' ] ;
14
13
15
- var helpers = {
16
- existy : function ( x ) {
17
- return x != null ;
18
- } ,
19
- // Converts an integer to 2 space indentation.
20
- indentation : function ( n ) {
14
+ ( function ( ) {
15
+ var helpers = {
16
+ existy : function ( x ) {
17
+ return x != null ;
18
+ } ,
19
+ // Converts an integer to 2 space indentation.
20
+ indentation : function ( n ) {
21
+ return _ . times ( n , function ( ) {
22
+ return " "
23
+ } ) . join ( "" )
24
+ }
25
+ }
26
+
27
+ var normalize_headings = function ( arr ) {
28
+ return _ . map ( arr , function ( n , i ) {
29
+ if ( i == 0 )
30
+ return n
31
+ else if ( n > arr [ i - 1 ] + 1 )
32
+ return arr [ i - 1 ] + 1
33
+ else
34
+ return n
35
+ } ) ;
36
+ }
37
+
38
+ var concat_times = function ( n , str , connector ) {
21
39
return _ . times ( n , function ( ) {
22
- return " "
23
- } ) . join ( "" )
40
+ return str
41
+ } ) . join ( connector )
24
42
}
25
- }
26
-
27
- var normalize_headings = function ( arr ) {
28
- return _ . map ( arr , function ( n , i ) {
29
- if ( i == 0 )
30
- return n
31
- else if ( n > arr [ i - 1 ] + 1 )
32
- return arr [ i - 1 ] + 1
33
- else
34
- return n
35
- } ) ;
36
- }
37
-
38
- var concat_times = function ( n , str , connector ) {
39
- return _ . times ( n , function ( ) {
40
- return str
41
- } ) . join ( connector )
42
- }
43
-
44
- var close_sections = function ( dom ) {
45
- doc_headings = [ ] ;
46
-
47
- _ . forEach ( dom , function ( node ) {
48
- if ( helpers . existy ( node . name ) && _ . contains ( headers , node . name ) ) {
49
- doc_headings . push ( parseInt ( node . name . substr ( 1 ) ) ) ;
50
- }
51
- } ) ;
52
-
53
- console . log ( 'normalize' , normalize_headings ( doc_headings ) ) ;
54
- }
55
-
56
- // Parse the html input and pass off to the traverse callback
57
- var parse = function ( raw_html , callback ) {
58
- var handler = new htmlparser . DomHandler ( function ( error , dom ) {
59
- if ( error ) {
60
- console . log ( 'error dog' ) ; process . exit ( 1 )
61
- } else {
62
- closings = 0 ;
63
- openings = 0 ;
64
- console . log ( callback ( dom ) + concat_times ( openings - closings , "</section>" , "\n" ) ) ;
65
- }
66
- } ) ;
67
- var parser = new htmlparser . Parser ( handler ) ;
68
- parser . write ( raw_html ) ;
69
- parser . done ( )
70
- }
71
-
72
- // Converts an object of attributes to a string.
73
- // TODO: verify that all attributes are getting converted properly.
74
- var attribs_to_string = function ( obj ) {
75
- if ( ! helpers . existy ( obj ) )
76
- return ""
77
-
78
- return _ . reduce ( _ . pairs ( obj ) , function ( memo , v ) {
79
- return memo + " " + v [ 0 ] + "='" + v [ 1 ] + "'"
80
- } , "" ) ;
81
- }
82
-
83
- // Construct an opening tag with the specified attributes.
84
- var open_tag = function ( node ) {
85
- return "<" + node . name + attribs_to_string ( node . attribs ) + ">"
86
- }
87
-
88
- // TODO: Self closing tags
89
- var close_tag = function ( node ) {
90
- return "</" + node . name + ">"
91
- }
92
-
93
- var section_starter = function ( diff , level ) {
94
- return _ . times ( diff , function ( ) { return "</section>" } ) . join ( "\n" ) + "\n<section data-type='" + heirarchy [ level ] + "'>"
95
- }
96
-
97
- var compare_headings = function ( book_section , book_heading , html_heading ) {
98
- var book_val = parseInt ( book_heading . substr ( 1 ) ) ;
99
- var html_val = parseInt ( html_heading . substr ( 1 ) ) ;
100
-
101
- if ( book_section === "chapter" ) {
102
- return { heading : "h1" , closings : 0 , heirarchy : 1 }
43
+
44
+ var close_sections = function ( o , c ) {
45
+ return concat_times ( o - c , "</section>" , "\n" ) ;
103
46
}
104
- else if ( book_val === html_val ) {
105
- return { heading : "h" + book_val , closings : 1 , heirarchy : _ . indexOf ( heirarchy , "sect" + book_val ) }
47
+
48
+ function HTMLBook ( input , opts , callback ) {
49
+ this . input = input ;
106
50
}
107
- else if ( book_val < html_val ) {
108
- return { heading : "h" + ( book_val + 1 ) , closings : 0 , heirarchy : _ . indexOf ( heirarchy , "sect" + ( book_val + 1 ) ) }
51
+
52
+ // Parse the html input and pass off to the traverse callback
53
+ HTMLBook . prototype . parse = function ( ) {
54
+ var handler = new htmlparser . DomHandler ( function ( error , dom ) {
55
+ if ( error ) {
56
+ console . log ( 'error dog' ) ; process . exit ( 1 )
57
+ } else {
58
+ closings = 0 ;
59
+ openings = 0 ;
60
+ console . log ( traverse ( dom ) + close_sections ( openings , closings ) ) ;
61
+ }
62
+ } ) ;
63
+ var parser = new htmlparser . Parser ( handler ) ;
64
+ parser . write ( this . input ) ;
65
+ parser . done ( )
109
66
}
110
- else if ( book_val > html_val ) {
111
- return { heading : "h" + html_val , closings :( book_val - html_val + 1 ) , heirarchy : _ . indexOf ( heirarchy , "sect" + html_val ) }
67
+
68
+ // Converts an object of attributes to a string.
69
+ var attribs_to_string = function ( obj ) {
70
+ if ( ! helpers . existy ( obj ) )
71
+ return ""
72
+
73
+ return _ . reduce ( _ . pairs ( obj ) , function ( memo , v ) {
74
+ return memo + " " + v [ 0 ] + "='" + v [ 1 ] + "'"
75
+ } , "" ) ;
76
+ }
77
+
78
+ // Construct an opening tag with the specified attributes.
79
+ var open_tag = function ( node ) {
80
+ return "<" + node . name + attribs_to_string ( node . attribs ) + ">"
81
+ }
82
+
83
+ var close_tag = function ( node ) {
84
+ return "</" + node . name + ">"
85
+ }
86
+
87
+ var section_starter = function ( diff , level ) {
88
+ return _ . times ( diff , function ( ) { return "</section>" } ) . join ( "\n" ) + "\n<section data-type='" + heirarchy [ level ] + "'>"
112
89
}
113
- }
114
-
115
- var traverse = function ( dom_tree , htmlbook_tracker ) {
116
- // Set depth if not passed.
117
- if ( ! helpers . existy ( htmlbook_tracker ) )
118
- htmlbook_tracker = { "heirarchy" : 0 }
119
- output = ""
120
-
121
- _ . forEach ( dom_tree , function ( node , i ) {
122
- // When the node is a text type, it has no children, just return it.
123
- if ( node . type === "text" ) {
124
- output += node . data
125
- // Check to see if this node is a header, which should signal the start of
126
- // a new section.
127
- } else if ( _ . contains ( headers , node . name ) ) {
128
- openings += 1
129
- // output += section_starter(htmlbook_tracker, node);
130
- bookpart = _ . find ( complex , function ( el ) {
131
- return el [ "$" ] [ "name" ] === heirarchy [ htmlbook_tracker . heirarchy ] ;
132
- } ) ;
133
- bookpart_heading = bookpart [ "xs:sequence" ] [ 0 ] [ "xs:element" ] [ 0 ] [ "$" ] [ 'ref' ]
134
- bookpart_name = bookpart [ "$" ] [ "name" ]
135
-
136
- if ( bookpart_name === "bookmaindiv" )
137
- bookpart_name = "chapter"
138
-
139
- r = compare_headings ( bookpart_name , bookpart_heading , node . name )
140
-
141
- htmlbook_tracker . heirarchy = r . heirarchy
142
- closings += r . closings
143
-
144
- node . name = r . heading
145
-
146
- output += section_starter ( r . closings , r . heirarchy ) + "\n" + open_tag ( node ) + traverse ( node . children , htmlbook_tracker ) + close_tag ( node )
147
-
148
- } else if ( helpers . existy ( node . children ) ) {
149
- // Something here to parse the tag and adjust its attribs to align with
150
- //
151
- output += open_tag ( node ) + traverse ( node . children , htmlbook_tracker ) + close_tag ( node ) ;
152
- }
153
- } ) ;
154
- return output ;
155
- }
156
-
157
- var htmlbook = function ( input ) {
158
- parse ( input , traverse ) ;
159
- }
160
-
161
- // When parsing finishes
162
- xml_parser . addListener ( 'end' , function ( result ) {
163
- // save the result
164
- schema = result ;
165
- elements = schema [ "xs:schema" ] [ "xs:element" ] ;
166
- complex = schema [ "xs:schema" ] [ "xs:complexType" ] ;
167
90
168
- fs . writeFile ( "schema.js" , JSON . stringify ( schema , null , 2 ) ) ;
91
+ var compare_headings = function ( book_section , book_heading , html_heading ) {
92
+ var book_val = parseInt ( book_heading . substr ( 1 ) ) ;
93
+ var html_val = parseInt ( html_heading . substr ( 1 ) ) ;
169
94
170
- // Read the source, start conversion
171
- fs . readFile ( "test/documents/test.html" , "utf-8" , function ( e , d ) {
172
- htmlbook ( d ) ;
173
- } ) ;
174
- } ) ;
95
+ if ( book_section === "chapter" ) {
96
+ return { heading : "h1" , closings : 0 , heirarchy : 1 }
97
+ }
98
+ else if ( book_val === html_val ) {
99
+ return { heading : "h" + book_val , closings : 1 , heirarchy : _ . indexOf ( heirarchy , "sect" + book_val ) }
100
+ }
101
+ else if ( book_val < html_val ) {
102
+ return { heading : "h" + ( book_val + 1 ) , closings : 0 , heirarchy : _ . indexOf ( heirarchy , "sect" + ( book_val + 1 ) ) }
103
+ }
104
+ else if ( book_val > html_val ) {
105
+ return { heading : "h" + html_val , closings :( book_val - html_val + 1 ) , heirarchy : _ . indexOf ( heirarchy , "sect" + html_val ) }
106
+ }
107
+ }
108
+
109
+ var traverse = function ( dom_tree , htmlbook_tracker ) {
110
+ // Set depth if not passed.
111
+ if ( ! helpers . existy ( htmlbook_tracker ) )
112
+ htmlbook_tracker = { "heirarchy" : 0 }
113
+ output = ""
114
+
115
+ _ . forEach ( dom_tree , function ( node , i ) {
116
+ // When the node is a text type, it has no children, just return it.
117
+ if ( node . type === "text" ) {
118
+ output += node . data
119
+ // Check to see if this node is a header, which should signal the start of
120
+ // a new section.
121
+ } else if ( _ . contains ( headers , node . name ) ) {
122
+ openings += 1 ;
123
+ // output += section_starter(htmlbook_tracker, node);
124
+ bookpart = _ . find ( complex , function ( el ) {
125
+ return el [ "$" ] [ "name" ] === heirarchy [ htmlbook_tracker . heirarchy ] ;
126
+ } ) ;
127
+ bookpart_heading = bookpart [ "xs:sequence" ] [ 0 ] [ "xs:element" ] [ 0 ] [ "$" ] [ 'ref' ]
128
+ bookpart_name = bookpart [ "$" ] [ "name" ]
129
+
130
+ if ( bookpart_name === "bookmaindiv" )
131
+ bookpart_name = "chapter"
132
+
133
+ r = compare_headings ( bookpart_name , bookpart_heading , node . name )
134
+
135
+ htmlbook_tracker . heirarchy = r . heirarchy
136
+ closings += r . closings ;
137
+
138
+ node . name = r . heading
139
+
140
+ output += section_starter ( r . closings , r . heirarchy ) + "\n" + open_tag ( node ) + traverse ( node . children , htmlbook_tracker ) + close_tag ( node )
141
+
142
+ } else if ( helpers . existy ( node . children ) ) {
143
+ // Something here to parse the tag and adjust its attribs to align with
144
+ //
145
+ output += open_tag ( node ) + traverse ( node . children , htmlbook_tracker ) + close_tag ( node ) ;
146
+ }
147
+ } ) ;
148
+ return output ;
149
+ }
175
150
176
- // Start by parsing schema
177
- fs . readFile ( "../HTMLBook/schema/htmlbook.xsd" , function ( err , data ) {
178
- xml_parser . parseString ( data ) ;
179
- } ) ;
151
+ module . exports = function ( str ) { return new HTMLBook ( str ) } ;
152
+ } ) . call ( this ) ;
0 commit comments