Skip to content

Commit

Permalink
Add option for rendering CDATA as separate property
Browse files Browse the repository at this point in the history
  • Loading branch information
amitguptagwl committed Feb 13, 2018
1 parent b6d3a5e commit 9981838
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 11 deletions.
19 changes: 18 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,19 @@ You can use this library online (press try me button above), or as command from
<img src="https://opencollective.com/fast-xml-parser/donate/button@2x.png?color=blue" width=300 />
</a>

### Main Features

* Works with node packages, in browser, and in CLI
* Faster than any pure JS implementation
* It can handle big files (tested up to 100mb)
* You can parse CDATA as separate property
* You can prefix attributes or group them to separate property. Or can ignore them from result completely.
* You can parse tag's or attribute's value to premitive type: string, integer, float, or boolean. And can optionally decode for HTML char.
* You can remove namespace from tag name while parsing
* It supports boolean attributes, if configured.



### How to use
**Installation**

Expand Down Expand Up @@ -58,7 +71,7 @@ var options = {
//from 3.0.0
var options = {
attributeNamePrefix : "@_",
attrNodeName: false,
attrNodeName: "attr", //default is 'false'
textNodeName : "#text",
ignoreAttributes : true,
ignoreNameSpace : false,
Expand All @@ -67,6 +80,8 @@ var options = {
parseAttributeValue : false,
trimValues: true,
decodeHTMLchar: false,
cdataTagName: "__cdata", //default is 'false'
cdataPositionChar: "\\c",
};
if(fastXmlParser.validate(xmlData)=== true){//optional
var jsonObj = fastXmlParser.parse(xmlData,options);
Expand All @@ -88,6 +103,8 @@ var jsonObj = fastXmlParser.convertToJson(tObj,options);
* **parseAttributeValue** : Parse the value of an attribute to float, integer, or boolean.
* **trimValues** : trim string values of an attribute or node
* **decodeHTMLchar** : decodes any named and numerical character HTML references excluding CDATA part.
* **cdataTagName** : If specified, parser parse CDATA as nested tag instead of adding it's value to parent tag.
* **cdataPositionChar** : It'll help to covert JSON back to XML without loosing CDATA position.

To use from command line
```bash
Expand Down
67 changes: 67 additions & 0 deletions spec/cdata_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,73 @@ describe("XMLParser", function () {
expect(result).toEqual(expected);
});

it("should parse CDATA as separate tag", function () {
var xmlData = "<xml>"
+ " <a><![CDATA[text]]></a>"
+ " <b>\n text \n</b>"
+ " <c> <![CDATA[text]]>after </c>"
+ " <d>23<![CDATA[]]> 24</d>"
+ "</xml>";
var expected = {
"xml": {
"a": {
"__cdata" : "text"
},
"b": "text",
"c": {
"#text" : "\\cafter",
"__cdata" : "text"
},
"d": {
"#text" : "23\\c24",
"__cdata" : ""
}
}
};

var result = parser.parse(xmlData, {
ignoreAttributes: false,
cdataTagName : "__cdata"
});

//console.log(JSON.stringify(result,null,4));
expect(result).toEqual(expected);
});

it("should parse CDATA as separate tag without preserving cdata position", function () {
var xmlData = "<xml>"
+ " <a><![CDATA[text]]></a>"
+ " <b>\n text \n</b>"
+ " <c> <![CDATA[text]]>after </c>"
+ " <d>23<![CDATA[]]> 24</d>"
+ "</xml>";
var expected = {
"xml": {
"a": {
"__cdata" : "text"
},
"b": "text",
"c": {
"#text" : "after",
"__cdata" : "text"
},
"d": {
"#text" : "2324",
"__cdata" : ""
}
}
};

var result = parser.parse(xmlData, {
ignoreAttributes: false,
cdataTagName : "__cdata",
cdataPositionChar : ""
});

//console.log(JSON.stringify(result,null,4));
expect(result).toEqual(expected);
});

it("should validate XML with repeated multiline CDATA and comments", function () {
var fs = require("fs");
var path = require("path");
Expand Down
33 changes: 23 additions & 10 deletions src/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ var defaultOptions = {
arrayMode : false,
trimValues: true, //Trim string values of tag and attributes
decodeHTMLchar: false,
cdataTagName: false,
cdataPositionChar: "\\c"
//decodeStrict: false,
};

Expand All @@ -36,6 +38,7 @@ var buildOptions = function (options){
"parseAttributeValue",
"arrayMode",
"trimValues",
"cdataPositionChar",
];
var len = props.length;
for (var i = 0; i < len; i++) {
Expand Down Expand Up @@ -69,10 +72,20 @@ var getTraversalObj =function (xmlData,options){

currentNode = currentNode.parent;
}else if(tagType === TagType.CDATA){
//no attribute
//add text to parent node
//add parsed data to parent node
currentNode.val = (currentNode.val || "") + (tag[3] || "") + processTagValue(tag[14],options);
if(options.cdataTagName){
//add cdata node
var childNode = new xmlNode( options.cdataTagName,currentNode,tag[3]);
childNode.attrsMap = buildAttributesMap(tag[8],options);
currentNode.addChild(childNode);
//for backtracking
currentNode.val = util.getValue(currentNode.val) + options.cdataPositionChar;
//add rest value to parent node
if(tag[14]){
currentNode.val += processTagValue(tag[14],options);
}
}else{
currentNode.val = (currentNode.val || "") + (tag[3] || "") + processTagValue(tag[14],options);
}
}else if(tagType === TagType.SELF){
var childNode = new xmlNode( options.ignoreNameSpace ? tag[7] : tag[5],currentNode, "");
if(tag[8] && tag[8].length > 1){
Expand Down Expand Up @@ -125,7 +138,7 @@ var fakeCallNoReturn = function() {}

var xml2json = function (xmlData,options){
options = buildOptions(options);
return convertToJson(getTraversalObj(xmlData,options), options.textNodeName, options.arrayMode);
return convertToJson(getTraversalObj(xmlData,options), options);
};


Expand Down Expand Up @@ -202,13 +215,13 @@ function buildAttributesMap(attrStr,options){
}
}

var convertToJson = function (node, textNodeName,arrayMode){
var convertToJson = function (node, options){
var jObj = {};

//traver through all the children
for (var index = 0; index < node.child.length; index++) {
var prop = node.child[index].tagname;
var obj = convertToJson(node.child[index],textNodeName, arrayMode);
var obj = convertToJson(node.child[index],options);
if(typeof jObj[prop] !== "undefined"){
if(!Array.isArray(jObj[prop])){
var swap = jObj[prop];
Expand All @@ -217,7 +230,7 @@ var convertToJson = function (node, textNodeName,arrayMode){
}
jObj[prop].push(obj);
}else{
jObj[prop] = arrayMode ? [obj] : obj;
jObj[prop] = options.arrayMode ? [obj] : obj;
}
}
util.merge(jObj,node.attrsMap);
Expand All @@ -226,8 +239,8 @@ var convertToJson = function (node, textNodeName,arrayMode){
return util.isExist(node.val)? node.val : "";
}else{
if(util.isExist(node.val)){
if(!(typeof node.val === "string" && node.val === "")){
jObj[textNodeName] = node.val;
if(!(typeof node.val === "string" && (node.val === "" || node.val === options.cdataPositionChar))){
jObj[options.textNodeName] = node.val;
}
}
}
Expand Down

0 comments on commit 9981838

Please sign in to comment.