Skip to content

Commit 2ca5e9b

Browse files
author
Steve Klise
committed
once again using export
1 parent ae225a8 commit 2ca5e9b

17 files changed

+7861
-340
lines changed

index.js

+135-162
Original file line numberDiff line numberDiff line change
@@ -3,177 +3,150 @@ var sys = require('sys'),
33
util = require('util'),
44
_ = require('underscore'),
55
htmlparser = require('htmlparser2'),
6-
xml2js = require('xml2js'),
7-
html = require('html');
6+
html = require('html'),
7+
schema = require('./schema'),
8+
elements = schema["xs:schema"]["xs:element"],
9+
complex = schema["xs:schema"]["xs:complexType"];
810

9-
var xml_parser = new xml2js.Parser(),
10-
schema,
11-
elements,
12-
headers = ['h1','h2','h3','h4','h5','h6'],
11+
var headers = ['h1','h2','h3','h4','h5','h6'],
1312
heirarchy = ['bookmaindiv', 'sect1', 'sect2', 'sect3', 'sect4', 'sect5', 'sect6'];
1413

15-
var helpers = {
16-
existy: function (x) {
17-
return x != null;
18-
},
19-
// Converts an integer to 2 space indentation.
20-
indentation: function (n) {
14+
(function () {
15+
var helpers = {
16+
existy: function (x) {
17+
return x != null;
18+
},
19+
// Converts an integer to 2 space indentation.
20+
indentation: function (n) {
21+
return _.times(n, function () {
22+
return " "
23+
}).join("")
24+
}
25+
}
26+
27+
var normalize_headings = function (arr) {
28+
return _.map(arr, function (n, i) {
29+
if (i == 0)
30+
return n
31+
else if (n > arr[i-1]+1)
32+
return arr[i-1] + 1
33+
else
34+
return n
35+
});
36+
}
37+
38+
var concat_times = function (n, str, connector) {
2139
return _.times(n, function () {
22-
return " "
23-
}).join("")
40+
return str
41+
}).join(connector)
2442
}
25-
}
26-
27-
var normalize_headings = function (arr) {
28-
return _.map(arr, function (n, i) {
29-
if (i == 0)
30-
return n
31-
else if (n > arr[i-1]+1)
32-
return arr[i-1] + 1
33-
else
34-
return n
35-
});
36-
}
37-
38-
var concat_times = function (n, str, connector) {
39-
return _.times(n, function () {
40-
return str
41-
}).join(connector)
42-
}
43-
44-
var close_sections = function (dom) {
45-
doc_headings = [];
46-
47-
_.forEach(dom, function(node) {
48-
if (helpers.existy(node.name) && _.contains(headers,node.name)) {
49-
doc_headings.push(parseInt(node.name.substr(1)));
50-
}
51-
});
52-
53-
console.log('normalize', normalize_headings(doc_headings));
54-
}
55-
56-
// Parse the html input and pass off to the traverse callback
57-
var parse = function (raw_html, callback) {
58-
var handler = new htmlparser.DomHandler(function (error, dom) {
59-
if (error){
60-
console.log('error dog'); process.exit(1)
61-
} else {
62-
closings = 0;
63-
openings = 0;
64-
console.log(callback(dom) + concat_times(openings - closings, "</section>", "\n") );
65-
}
66-
});
67-
var parser = new htmlparser.Parser(handler);
68-
parser.write(raw_html);
69-
parser.done()
70-
}
71-
72-
// Converts an object of attributes to a string.
73-
// TODO: verify that all attributes are getting converted properly.
74-
var attribs_to_string = function (obj) {
75-
if (!helpers.existy(obj))
76-
return ""
77-
78-
return _.reduce(_.pairs(obj), function (memo, v) {
79-
return memo + " " + v[0] + "='" + v[1]+ "'"
80-
}, "");
81-
}
82-
83-
// Construct an opening tag with the specified attributes.
84-
var open_tag = function (node) {
85-
return "<" + node.name + attribs_to_string(node.attribs) + ">"
86-
}
87-
88-
// TODO: Self closing tags
89-
var close_tag = function (node) {
90-
return "</" + node.name + ">"
91-
}
92-
93-
var section_starter = function (diff, level) {
94-
return _.times(diff, function() {return "</section>"}).join("\n") + "\n<section data-type='" + heirarchy[level] + "'>"
95-
}
96-
97-
var compare_headings = function (book_section, book_heading, html_heading) {
98-
var book_val = parseInt(book_heading.substr(1));
99-
var html_val = parseInt(html_heading.substr(1));
100-
101-
if (book_section === "chapter"){
102-
return {heading: "h1", closings: 0, heirarchy: 1}
43+
44+
var close_sections = function (o, c) {
45+
return concat_times(o - c, "</section>", "\n");
10346
}
104-
else if (book_val === html_val){
105-
return {heading: "h" + book_val, closings: 1, heirarchy: _.indexOf(heirarchy, "sect"+ book_val)}
47+
48+
function HTMLBook (input, opts, callback) {
49+
this.input = input;
10650
}
107-
else if (book_val < html_val){
108-
return {heading: "h" + (book_val+1), closings: 0, heirarchy: _.indexOf(heirarchy, "sect"+ (book_val+1))}
51+
52+
// Parse the html input and pass off to the traverse callback
53+
HTMLBook.prototype.parse = function () {
54+
var handler = new htmlparser.DomHandler(function (error, dom) {
55+
if (error){
56+
console.log('error dog'); process.exit(1)
57+
} else {
58+
closings = 0;
59+
openings = 0;
60+
console.log(traverse(dom) + close_sections(openings,closings));
61+
}
62+
});
63+
var parser = new htmlparser.Parser(handler);
64+
parser.write(this.input);
65+
parser.done()
10966
}
110-
else if (book_val > html_val){
111-
return {heading: "h" + html_val, closings:(book_val - html_val + 1), heirarchy: _.indexOf(heirarchy, "sect"+ html_val)}
67+
68+
// Converts an object of attributes to a string.
69+
var attribs_to_string = function (obj) {
70+
if (!helpers.existy(obj))
71+
return ""
72+
73+
return _.reduce(_.pairs(obj), function (memo, v) {
74+
return memo + " " + v[0] + "='" + v[1]+ "'"
75+
}, "");
76+
}
77+
78+
// Construct an opening tag with the specified attributes.
79+
var open_tag = function (node) {
80+
return "<" + node.name + attribs_to_string(node.attribs) + ">"
81+
}
82+
83+
var close_tag = function (node) {
84+
return "</" + node.name + ">"
85+
}
86+
87+
var section_starter = function (diff, level) {
88+
return _.times(diff, function() {return "</section>"}).join("\n") + "\n<section data-type='" + heirarchy[level] + "'>"
11289
}
113-
}
114-
115-
var traverse = function (dom_tree, htmlbook_tracker) {
116-
// Set depth if not passed.
117-
if (!helpers.existy(htmlbook_tracker))
118-
htmlbook_tracker = {"heirarchy" : 0}
119-
output = ""
120-
121-
_.forEach(dom_tree, function (node, i) {
122-
// When the node is a text type, it has no children, just return it.
123-
if (node.type === "text") {
124-
output += node.data
125-
// Check to see if this node is a header, which should signal the start of
126-
// a new section.
127-
} else if (_.contains(headers, node.name)) {
128-
openings += 1
129-
// output += section_starter(htmlbook_tracker, node);
130-
bookpart = _.find(complex, function (el) {
131-
return el["$"]["name"] === heirarchy[htmlbook_tracker.heirarchy];
132-
});
133-
bookpart_heading = bookpart["xs:sequence"][0]["xs:element"][0]["$"]['ref']
134-
bookpart_name = bookpart["$"]["name"]
135-
136-
if (bookpart_name === "bookmaindiv")
137-
bookpart_name = "chapter"
138-
139-
r = compare_headings(bookpart_name, bookpart_heading, node.name)
140-
141-
htmlbook_tracker.heirarchy = r.heirarchy
142-
closings += r.closings
143-
144-
node.name = r.heading
145-
146-
output += section_starter(r.closings, r.heirarchy) + "\n" + open_tag(node)+ traverse(node.children, htmlbook_tracker) + close_tag(node)
147-
148-
} else if (helpers.existy(node.children)) {
149-
// Something here to parse the tag and adjust its attribs to align with
150-
//
151-
output += open_tag(node) + traverse(node.children, htmlbook_tracker) + close_tag(node);
152-
}
153-
});
154-
return output;
155-
}
156-
157-
var htmlbook = function (input) {
158-
parse(input, traverse);
159-
}
160-
161-
// When parsing finishes
162-
xml_parser.addListener('end', function(result) {
163-
// save the result
164-
schema = result;
165-
elements = schema["xs:schema"]["xs:element"];
166-
complex = schema["xs:schema"]["xs:complexType"];
16790

168-
fs.writeFile("schema.js", JSON.stringify(schema, null, 2));
91+
var compare_headings = function (book_section, book_heading, html_heading) {
92+
var book_val = parseInt(book_heading.substr(1));
93+
var html_val = parseInt(html_heading.substr(1));
16994

170-
// Read the source, start conversion
171-
fs.readFile("test/documents/test.html", "utf-8", function (e,d) {
172-
htmlbook(d);
173-
});
174-
});
95+
if (book_section === "chapter"){
96+
return {heading: "h1", closings: 0, heirarchy: 1}
97+
}
98+
else if (book_val === html_val){
99+
return {heading: "h" + book_val, closings: 1, heirarchy: _.indexOf(heirarchy, "sect"+ book_val)}
100+
}
101+
else if (book_val < html_val){
102+
return {heading: "h" + (book_val+1), closings: 0, heirarchy: _.indexOf(heirarchy, "sect"+ (book_val+1))}
103+
}
104+
else if (book_val > html_val){
105+
return {heading: "h" + html_val, closings:(book_val - html_val + 1), heirarchy: _.indexOf(heirarchy, "sect"+ html_val)}
106+
}
107+
}
108+
109+
var traverse = function (dom_tree, htmlbook_tracker) {
110+
// Set depth if not passed.
111+
if (!helpers.existy(htmlbook_tracker))
112+
htmlbook_tracker = {"heirarchy" : 0}
113+
output = ""
114+
115+
_.forEach(dom_tree, function (node, i) {
116+
// When the node is a text type, it has no children, just return it.
117+
if (node.type === "text") {
118+
output += node.data
119+
// Check to see if this node is a header, which should signal the start of
120+
// a new section.
121+
} else if (_.contains(headers, node.name)) {
122+
openings += 1;
123+
// output += section_starter(htmlbook_tracker, node);
124+
bookpart = _.find(complex, function (el) {
125+
return el["$"]["name"] === heirarchy[htmlbook_tracker.heirarchy];
126+
});
127+
bookpart_heading = bookpart["xs:sequence"][0]["xs:element"][0]["$"]['ref']
128+
bookpart_name = bookpart["$"]["name"]
129+
130+
if (bookpart_name === "bookmaindiv")
131+
bookpart_name = "chapter"
132+
133+
r = compare_headings(bookpart_name, bookpart_heading, node.name)
134+
135+
htmlbook_tracker.heirarchy = r.heirarchy
136+
closings += r.closings;
137+
138+
node.name = r.heading
139+
140+
output += section_starter(r.closings, r.heirarchy) + "\n" + open_tag(node)+ traverse(node.children, htmlbook_tracker) + close_tag(node)
141+
142+
} else if (helpers.existy(node.children)) {
143+
// Something here to parse the tag and adjust its attribs to align with
144+
//
145+
output += open_tag(node) + traverse(node.children, htmlbook_tracker) + close_tag(node);
146+
}
147+
});
148+
return output;
149+
}
175150

176-
// Start by parsing schema
177-
fs.readFile("../HTMLBook/schema/htmlbook.xsd", function(err, data) {
178-
xml_parser.parseString(data);
179-
});
151+
module.exports = function (str) {return new HTMLBook(str)};
152+
}).call(this);

make_schema.js

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
var xml2js = require('xml2js'),
2+
fs = require('fs'),
3+
xml_parser = new xml2js.Parser();
4+
5+
// When parsing finishes
6+
xml_parser.addListener('end', function(result) {
7+
// save the result
8+
fs.writeFile("schema.js", "module.exports = " + JSON.stringify(result, null, 2));
9+
});
10+
11+
// Start by parsing schema
12+
fs.readFile("../HTMLBook/schema/htmlbook.xsd", function(err, data) {
13+
xml_parser.parseString(data);
14+
});

package.json

+5-3
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,14 @@
1717
"author": "O'Reilly Media",
1818
"license": "MIT",
1919
"dependencies": {
20-
"jquery": "1.8.3",
2120
"marked": "0.2.9",
2221
"optparse": "1.0.4",
2322
"html": "0.0.7",
2423
"underscore":"",
25-
"htmlparser2":"",
26-
"xml2js": ""
24+
"htmlparser2":""
25+
},
26+
"devDependencies": {
27+
"xml2js": "",
28+
"jasmine-node": ""
2729
}
2830
}

readme.md

+3-14
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,15 @@
11
# htmlbook.js
22

3-
Parses HTML into [HTMLBook](https://github.com/oreillymedia/htmlbook).
3+
Converts Markdown to [HTMLBook](https://github.com/oreillymedia/htmlbook).
44

5-
## Setup
6-
7-
### Browser
8-
9-
Add the htmlbook.js script to a page with jQuery (tested with jQuery 1.10.2) and [Marked](https://github.com/chjj/marked).
10-
11-
### Node.js
5+
## Installation
126

137
Install with npm: `npm install -g htmlbook`
148

159
## Usage
1610

17-
### Browser & Node.js
18-
19-
For both the browser and Node.js, usage is the same
20-
2111
```
22-
var input = "HTML TEXT OR JQUERY OBJECT"
12+
var input = "MARKDOWN CONTENT"
2313
var output = HTMLBook(input).parse(opts);
2414
```
2515

@@ -37,7 +27,6 @@ Additionally, type `$ htmlbook --help` for all options.
3727

3828
Below is a list of available options, default value is emphasized.
3929

40-
- sourceFormat: _html_ or markdown
4130
- fragment: _false_ or true
4231
- level: _chapter_ or book
4332
- debug: boolean -- default for command line is 'false'

0 commit comments

Comments
 (0)