Skip to content

Commit

Permalink
Merge pull request #74 from owncloud/fix-xml-processing
Browse files Browse the repository at this point in the history
Fix xml processing
  • Loading branch information
Vincent Petry committed Feb 21, 2014
2 parents 82713c7 + ed584c3 commit 478de4b
Show file tree
Hide file tree
Showing 13 changed files with 58 additions and 10 deletions.
2 changes: 2 additions & 0 deletions PHPExcel/Classes/PHPExcel/Reader/Excel2003XML.php
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ public function listWorksheetNames($pFilename)

$worksheetNames = array();

libxml_disable_entity_loader(true);
$xml = simplexml_load_file($pFilename);
$namespaces = $xml->getNamespaces(true);

Expand Down Expand Up @@ -165,6 +166,7 @@ public function listWorksheetInfo($pFilename)

$worksheetInfo = array();

libxml_disable_entity_loader(true);
$xml = simplexml_load_file($pFilename);
$namespaces = $xml->getNamespaces(true);

Expand Down
26 changes: 20 additions & 6 deletions PHPExcel/Classes/PHPExcel/Reader/Excel2007.php
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ public function canRead($pFilename)
$zip = new ZipArchive;
if ($zip->open($pFilename) === true) {
// check if it is an OOXML archive
libxml_disable_entity_loader(true);
$rels = simplexml_load_string($this->_getFromZipArchive($zip, "_rels/.rels"));
if ($rels !== false) {
foreach ($rels->Relationship as $rel) {
Expand Down Expand Up @@ -131,12 +132,14 @@ public function listWorksheetNames($pFilename)
$zip->open($pFilename);

// The files we're looking at here are small enough that simpleXML is more efficient than XMLReader
libxml_disable_entity_loader(true);
$rels = simplexml_load_string(
$this->_getFromZipArchive($zip, "_rels/.rels")
); //~ http://schemas.openxmlformats.org/package/2006/relationships");
foreach ($rels->Relationship as $rel) {
switch ($rel["Type"]) {
case "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument":
libxml_disable_entity_loader(true);
$xmlWorkbook = simplexml_load_string(
$this->_getFromZipArchive($zip, "{$rel['Target']}")
); //~ http://schemas.openxmlformats.org/spreadsheetml/2006/main");
Expand Down Expand Up @@ -173,11 +176,12 @@ public function listWorksheetInfo($pFilename)

$zip = new ZipArchive;
$zip->open($pFilename);

libxml_disable_entity_loader(true);
$rels = simplexml_load_string($this->_getFromZipArchive($zip, "_rels/.rels")); //~ http://schemas.openxmlformats.org/package/2006/relationships");
foreach ($rels->Relationship as $rel) {
if ($rel["Type"] == "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument") {
$dir = dirname($rel["Target"]);
libxml_disable_entity_loader(true);
$relsWorkbook = simplexml_load_string($this->_getFromZipArchive($zip, "$dir/_rels/" . basename($rel["Target"]) . ".rels")); //~ http://schemas.openxmlformats.org/package/2006/relationships");
$relsWorkbook->registerXPathNamespace("rel", "http://schemas.openxmlformats.org/package/2006/relationships");

Expand All @@ -186,8 +190,8 @@ public function listWorksheetInfo($pFilename)
if ($ele["Type"] == "http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet") {
$worksheets[(string) $ele["Id"]] = $ele["Target"];
}
}

}
libxml_disable_entity_loader(true);
$xmlWorkbook = simplexml_load_string($this->_getFromZipArchive($zip, "{$rel['Target']}")); //~ http://schemas.openxmlformats.org/spreadsheetml/2006/main");
if ($xmlWorkbook->sheets) {
$dir = dirname($rel["Target"]);
Expand Down Expand Up @@ -354,13 +358,14 @@ public function load($pFilename)
$zip->open($pFilename);

// Read the theme first, because we need the colour scheme when reading the styles
libxml_disable_entity_loader(true);
$wbRels = simplexml_load_string($this->_getFromZipArchive($zip, "xl/_rels/workbook.xml.rels")); //~ http://schemas.openxmlformats.org/package/2006/relationships");
foreach ($wbRels->Relationship as $rel) {
switch ($rel["Type"]) {
case "http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme":
$themeOrderArray = array('lt1','dk1','lt2','dk2');
$themeOrderAdditional = count($themeOrderArray);

libxml_disable_entity_loader(true);
$xmlTheme = simplexml_load_string($this->_getFromZipArchive($zip, "xl/{$rel['Target']}"));
if (is_object($xmlTheme)) {
$xmlThemeName = $xmlTheme->attributes();
Expand Down Expand Up @@ -390,11 +395,12 @@ public function load($pFilename)
break;
}
}

libxml_disable_entity_loader(true);
$rels = simplexml_load_string($this->_getFromZipArchive($zip, "_rels/.rels")); //~ http://schemas.openxmlformats.org/package/2006/relationships");
foreach ($rels->Relationship as $rel) {
switch ($rel["Type"]) {
case "http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties":
libxml_disable_entity_loader(true);
$xmlCore = simplexml_load_string($this->_getFromZipArchive($zip, "{$rel['Target']}"));
if (is_object($xmlCore)) {
$xmlCore->registerXPathNamespace("dc", "http://purl.org/dc/elements/1.1/");
Expand All @@ -414,6 +420,7 @@ public function load($pFilename)
break;

case "http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties":
libxml_disable_entity_loader(true);
$xmlCore = simplexml_load_string($this->_getFromZipArchive($zip, "{$rel['Target']}"));
if (is_object($xmlCore)) {
$docProps = $excel->getProperties();
Expand All @@ -425,6 +432,7 @@ public function load($pFilename)
break;

case "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties":
libxml_disable_entity_loader(true);
$xmlCore = simplexml_load_string($this->_getFromZipArchive($zip, "{$rel['Target']}"));
if (is_object($xmlCore)) {
$docProps = $excel->getProperties();
Expand All @@ -445,11 +453,13 @@ public function load($pFilename)

case "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument":
$dir = dirname($rel["Target"]);
libxml_disable_entity_loader(true);
$relsWorkbook = simplexml_load_string($this->_getFromZipArchive($zip, "$dir/_rels/" . basename($rel["Target"]) . ".rels")); //~ http://schemas.openxmlformats.org/package/2006/relationships");
$relsWorkbook->registerXPathNamespace("rel", "http://schemas.openxmlformats.org/package/2006/relationships");

$sharedStrings = array();
$xpath = self::array_item($relsWorkbook->xpath("rel:Relationship[@Type='http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings']"));
libxml_disable_entity_loader(true);
$xmlStrings = simplexml_load_string($this->_getFromZipArchive($zip, "$dir/$xpath[Target]")); //~ http://schemas.openxmlformats.org/spreadsheetml/2006/main");
if (isset($xmlStrings) && isset($xmlStrings->si)) {
foreach ($xmlStrings->si as $val) {
Expand All @@ -471,6 +481,7 @@ public function load($pFilename)
$styles = array();
$cellStyles = array();
$xpath = self::array_item($relsWorkbook->xpath("rel:Relationship[@Type='http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles']"));
libxml_disable_entity_loader(true);
$xmlStyles = simplexml_load_string($this->_getFromZipArchive($zip, "$dir/$xpath[Target]")); //~ http://schemas.openxmlformats.org/spreadsheetml/2006/main");
$numFmts = null;
if ($xmlStyles && $xmlStyles->numFmts[0]) {
Expand Down Expand Up @@ -568,7 +579,7 @@ public function load($pFilename)
}
}
}

libxml_disable_entity_loader(true);
$xmlWorkbook = simplexml_load_string($this->_getFromZipArchive($zip, "{$rel['Target']}")); //~ http://schemas.openxmlformats.org/spreadsheetml/2006/main");

// Set base date
Expand Down Expand Up @@ -612,6 +623,7 @@ public function load($pFilename)
// reverse
$docSheet->setTitle((string) $eleSheet["name"],false);
$fileWorksheet = $worksheets[(string) self::array_item($eleSheet->attributes("http://schemas.openxmlformats.org/officeDocument/2006/relationships"), "id")];
libxml_disable_entity_loader(true);
$xmlSheet = simplexml_load_string($this->_getFromZipArchive($zip, "$dir/$fileWorksheet")); //~ http://schemas.openxmlformats.org/spreadsheetml/2006/main");

$sharedFormulas = array();
Expand Down Expand Up @@ -1194,6 +1206,7 @@ public function load($pFilename)
if (!$this->_readDataOnly) {
// Locate hyperlink relations
if ($zip->locateName(dirname("$dir/$fileWorksheet") . "/_rels/" . basename($fileWorksheet) . ".rels")) {
libxml_disable_entity_loader(true);
$relsWorksheet = simplexml_load_string($this->_getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . "/_rels/" . basename($fileWorksheet) . ".rels") ); //~ http://schemas.openxmlformats.org/package/2006/relationships");
foreach ($relsWorksheet->Relationship as $ele) {
if ($ele["Type"] == "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink") {
Expand Down Expand Up @@ -1235,6 +1248,7 @@ public function load($pFilename)
if (!$this->_readDataOnly) {
// Locate comment relations
if ($zip->locateName(dirname("$dir/$fileWorksheet") . "/_rels/" . basename($fileWorksheet) . ".rels")) {
libxml_disable_entity_loader(true);
$relsWorksheet = simplexml_load_string($this->_getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . "/_rels/" . basename($fileWorksheet) . ".rels") ); //~ http://schemas.openxmlformats.org/package/2006/relationships");
foreach ($relsWorksheet->Relationship as $ele) {
if ($ele["Type"] == "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments") {
Expand Down
1 change: 1 addition & 0 deletions PHPExcel/Classes/PHPExcel/Reader/Gnumeric.php
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ public function loadIntoExisting($pFilename, PHPExcel $objPHPExcel)
// echo htmlentities($gFileData,ENT_QUOTES,'UTF-8');
// echo '</pre><hr />';
//
libxml_disable_entity_loader(true);
$xml = simplexml_load_string($gFileData);
$namespacesMeta = $xml->getNamespaces(true);

Expand Down
3 changes: 3 additions & 0 deletions PHPExcel/Classes/PHPExcel/Reader/OOCalc.php
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ public function canRead($pFilename)
if ($stat && ($stat['size'] <= 255)) {
$mimeType = $zip->getFromName($stat['name']);
} elseif($stat = $zip->statName('META-INF/manifest.xml')) {
libxml_disable_entity_loader(true);
$xml = simplexml_load_string($zip->getFromName('META-INF/manifest.xml'));
$namespacesContent = $xml->getNamespaces(true);
if (isset($namespacesContent['manifest'])) {
Expand Down Expand Up @@ -337,6 +338,7 @@ public function loadIntoExisting($pFilename, PHPExcel $objPHPExcel)
}

// echo '<h1>Meta Information</h1>';
libxml_disable_entity_loader(true);
$xml = simplexml_load_string($zip->getFromName("meta.xml"));
$namespacesMeta = $xml->getNamespaces(true);
// echo '<pre>';
Expand Down Expand Up @@ -421,6 +423,7 @@ public function loadIntoExisting($pFilename, PHPExcel $objPHPExcel)


// echo '<h1>Workbook Content</h1>';
libxml_disable_entity_loader(true);
$xml = simplexml_load_string($zip->getFromName("content.xml"));
$namespacesContent = $xml->getNamespaces(true);
// echo '<pre>';
Expand Down
1 change: 1 addition & 0 deletions getid3/getid3.lib.php
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,7 @@ public static function array_min($arraydata, $returnkey=false) {
public static function XML2array($XMLstring) {
if (function_exists('simplexml_load_string')) {
if (function_exists('get_object_vars')) {
libxml_disable_entity_loader(true);
$XMLobject = simplexml_load_string($XMLstring);
return self::SimpleXMLelement2array($XMLobject);
}
Expand Down
11 changes: 11 additions & 0 deletions phpdocx/classes/CreateDocx.inc
Original file line number Diff line number Diff line change
Expand Up @@ -1229,6 +1229,7 @@ class CreateDocx extends CreateDocument
PhpdocxLogger::logger($e->getMessage(), 'fatal');
}
$baseDocument = new DOMDocument();
libxml_disable_entity_loader(true);
$baseDocument->loadXML($baseTemplateDocumentT);
$docXpath = new DOMXPath($baseDocument);
$docXpath->registerNamespace('w', 'http://schemas.openxmlformats.org/wordprocessingml/2006/main');
Expand Down Expand Up @@ -1264,6 +1265,7 @@ class CreateDocx extends CreateDocument
PhpdocxLogger::logger($e->getMessage(), 'fatal');
}
$this->_contentTypeT = new DOMDocument();
libxml_disable_entity_loader(true);
$this->_contentTypeT->loadXML($baseTemplateContentTypeT);

//We are going to include the standard image defaults
Expand All @@ -1287,6 +1289,7 @@ class CreateDocx extends CreateDocument
}

$this->_wordRelsDocumentRelsT = new DOMDocument();
libxml_disable_entity_loader(true);
$this->_wordRelsDocumentRelsT->loadXML($baseTemplateDocumentRelsT);
$relationships = $this->_wordRelsDocumentRelsT->getElementsByTagName('Relationship');

Expand Down Expand Up @@ -2304,6 +2307,7 @@ class CreateDocx extends CreateDocument
if ($this->_debug->getActive() == 1) {
PhpdocxLogger::logger('Debug is active, add messages to objDebug.', 'debug');
libxml_use_internal_errors(true);
libxml_disable_entity_loader(true);
simplexml_load_string(
$this->_wordDocumentT, 'SimpleXMLElement', LIBXML_NOWARNING
);
Expand Down Expand Up @@ -2448,6 +2452,7 @@ class CreateDocx extends CreateDocument
}
//let's parse the different styles via XPath
$newStylesDoc = new DOMDocument();
libxml_disable_entity_loader(true);
$newStylesDoc->loadXML($newStyles);
$stylesXpath = new DOMXPath($newStylesDoc);
$stylesXpath->registerNamespace('w', 'http://schemas.openxmlformats.org/wordprocessingml/2006/main');
Expand All @@ -2468,6 +2473,7 @@ class CreateDocx extends CreateDocument
PhpdocxLogger::logger($e->getMessage(), 'fatal');
}
$stylesDocument = new DomDocument();
libxml_disable_entity_loader(true);
$stylesDocument->loadXML($this->_wordStylesT);
$baseNode = $stylesDocument->documentElement;
$stylesDocumentXPath = new DOMXPath($stylesDocument);
Expand Down Expand Up @@ -2651,6 +2657,7 @@ class CreateDocx extends CreateDocument
PhpdocxLogger::logger($e->getMessage(), 'fatal');
}
$stylesDocument = new DomDocument();
libxml_disable_entity_loader(true);
$stylesDocument->loadXML($this->_wordStylesT);
$langNode = $stylesDocument->getElementsByTagName('lang');
$langNode->item(0)->setAttribute('w:val', $lang);
Expand Down Expand Up @@ -2704,6 +2711,7 @@ class CreateDocx extends CreateDocument
{

$templateStylesheet = new DomDocument();
libxml_disable_entity_loader(true);
$templateStylesheet->loadXML($templateStyles);
//let's parse the different styles via XPath

Expand All @@ -2714,6 +2722,7 @@ class CreateDocx extends CreateDocument

//Let's get the original styles as a DOMNode
$stylesDocument = new DomDocument();
libxml_disable_entity_loader(true);
$stylesDocument->loadXML($templateStyles);
$baseNode = $stylesDocument->documentElement;

Expand Down Expand Up @@ -3028,6 +3037,7 @@ class CreateDocx extends CreateDocument
}

$this->_wordSettingsT = new DOMDocument();
libxml_disable_entity_loader(true);
$this->_wordSettingsT->loadXML($baseTemplateSettingsT);
$selectedElements = $this->_wordSettingsT->documentElement->getElementsByTagName($tag);
if($selectedElements->length == 0){
Expand Down Expand Up @@ -3703,6 +3713,7 @@ class CreateDocx extends CreateDocument
}

$settingsDoc = new DOMDocument();
libxml_disable_entity_loader(true);
$settingsDoc->loadXML($baseSettings);
$settings = $settingsDoc->documentElement;

Expand Down
1 change: 1 addition & 0 deletions phpdocx/classes/CreateMath.inc
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ class CreateMath extends CreateElement
$mathML = str_replace($arrDeleteStrsMML, $arrDeleteToStrsMML, $mathML);

$rscXML = new DOMDocument();
libxml_disable_entity_loader(true);
$rscXML->loadXML($mathML);
$objXSLTProc = new XSLTProcessor();
$objXSL = new DOMDocument();
Expand Down
5 changes: 5 additions & 0 deletions phpdocx/classes/Docx2Text.inc
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ class Docx2Text
}

$this->domDocument = new DomDocument();
libxml_disable_entity_loader(true);
$this->domDocument->loadXML($this->_document);
//get the body node to check the content from all his children
$bodyNode = $this->domDocument->getElementsByTagNameNS('http://schemas.openxmlformats.org/wordprocessingml/2006/main', 'body');
Expand Down Expand Up @@ -321,6 +322,7 @@ class Docx2Text
}
if (!empty($this->_endnote)) {
$domDocument = new DomDocument();
libxml_disable_entity_loader(true);
$domDocument->loadXML($this->_endnote);
$endnotes = $domDocument->getElementsByTagNameNS('http://schemas.openxmlformats.org/wordprocessingml/2006/main', 'endnote');
foreach ($endnotes as $endnote) {
Expand All @@ -344,6 +346,7 @@ class Docx2Text
}
if (!empty($this->_footnote)) {
$domDocument = new DomDocument();
libxml_disable_entity_loader(true);
$domDocument->loadXML($this->_footnote);
$footnotes = $domDocument->getElementsByTagNameNS('http://schemas.openxmlformats.org/wordprocessingml/2006/main', 'footnote');
foreach ($footnotes as $footnote) {
Expand All @@ -368,6 +371,7 @@ class Docx2Text
if(!empty($this->_numbering)){
//we use the domdocument to iterate the children of the numbering tag
$domDocument = new DomDocument();
libxml_disable_entity_loader(true);
$domDocument->loadXML($this->_numbering);
$numberings = $domDocument->getElementsByTagNameNS('http://schemas.openxmlformats.org/wordprocessingml/2006/main', 'numbering');
//there is only one numbering tag in the numbering.xml
Expand Down Expand Up @@ -418,6 +422,7 @@ class Docx2Text
$this->_relations = $this->docx->getFromName('word/_rels/document.xml.rels');
}
$domDocument = new DomDocument();
libxml_disable_entity_loader(true);
$domDocument->loadXML($this->_relations);
$relations = $domDocument->getElementsByTagName('Relationships');
$relations = $relations->item(0);
Expand Down
6 changes: 5 additions & 1 deletion phpdocx/classes/TransformDoc.inc
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ class TransformDoc
echo 'Unable to find the DOCX file';
exit();
}
libxml_disable_entity_loader(true);
$relations = simplexml_load_string(
$package->getFromName('_rels/.rels')
);
Expand All @@ -259,6 +260,7 @@ class TransformDoc
$xml = preg_replace(
'/(<w:wordDocument)+(.)*(><w:body>)/', '<w:body>', $xml
);
libxml_disable_entity_loader(true);
@$xmlDOM->loadXML($xml);
$xsl = new DOMDocument();
$xsl->load(dirname(__FILE__) . '/../xsl/docx2html.xsl');
Expand All @@ -281,6 +283,7 @@ class TransformDoc
$idImgs[] = substr($datFiltered[1], 0, -1);
}
}
libxml_disable_entity_loader(true);
$relationsImgs = simplexml_load_string(
$package->getFromName('word/_rels/document.xml.rels')
);
Expand Down Expand Up @@ -367,6 +370,7 @@ class TransformDoc
private function _extractProps()
{
$xmlDOM = new DOMDocument();
libxml_disable_entity_loader(true);
$xmlDOM->loadXML($this->getDocument());
//Get the page size and orientation
$node = $xmlDOM->getElementsByTagName('pgSz');
Expand Down Expand Up @@ -399,4 +403,4 @@ class TransformDoc
$this->setDocProps($docProps);
}

}
}
1 change: 1 addition & 0 deletions phpdocx/classes/WordML.inc
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ class WordML extends CreateElement
$namespaces = 'xmlns:ve="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml" ';
$wordML = '<?xml version="1.0" encoding="UTF-8" standalone="yes" ?><w:root '.$namespaces.'>'.$this->_wordML;
$wordML = $wordML.'</w:root>';
libxml_disable_entity_loader(true);
$wordMLChunk->loadXML($wordML);
$wordMLXpath = new DOMXPath($wordMLChunk);
$wordMLXpath->registerNamespace('w', 'http://schemas.openxmlformats.org/wordprocessingml/2006/main');
Expand Down
Loading

0 comments on commit 478de4b

Please sign in to comment.