Skip to content

Commit

Permalink
Merge pull request #399 from MarcusBarnes/issue-391
Browse files Browse the repository at this point in the history
Work on #391.
  • Loading branch information
MarcusBarnes authored Jun 8, 2017
2 parents 9d5eae6 + 4693399 commit b3027ed
Show file tree
Hide file tree
Showing 2 changed files with 198 additions and 0 deletions.
154 changes: 154 additions & 0 deletions extras/scripts/shutdownhooks/create_structure_files.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
<?php
/**
* @file
* MIK shutdown script that generates a "structure file"
* for each compound oject arranged under a root directory:
*
* Path_to_directory_containing_compound_objects\
* compound_object_1\
* child_1\
* child_2\
* compound_object_2\
* child_1\
* child_2\
* child_3\
* compound_object_3\
* [...]
*
* This script must be run to prepare compound objects for ingesting using
* Islandora Compound Batch.
*
* Based on https://github.com/MarcusBarnes/islandora_compound_batch/blob/master/extras/scripts/create_structure_files.php.
*/


$config_path = trim($argv[1]);
$config = parse_ini_file($config_path, TRUE);
$target_directory = $config['WRITER']['output_directory'];

if (!is_dir($target_directory)) {
exit("Please check that you have provided a full path to a directory as the input argument." . PHP_EOL);
}

$path_to_xsl = "extras/scripts/shutdownhooks/tree_to_compound_object.xsl";
if (!file_exists($path_to_xsl)) {
exit("Cannot find the required XSLT file ($path_to_xsl)." . PHP_EOL);
}

scanWrapperDirectory($target_directory, 'structure', $path_to_xsl);

// For use with use with get_dir_name(), which is used inside XSLT.
$compound_obj_path = '';

/**
* Recursively scans the target directory, generates the equivalent of the 'tree' command
* for each subdirectory, and transforms the resulting XML into an Islandora structure
* file for each.
*/
function scanWrapperDirectory($target_directory, $structurefilename = 'structure', $path_to_xsl) {
// Filenames to exclude.
$exclude_array = array('..', '.DS_Store', 'Thumbs.db', '.');

$stuffinwrapperdirectory = scandir($target_directory);
foreach ($stuffinwrapperdirectory as $compoundObjectOrFile) {
$objpath = $target_directory . DIRECTORY_SEPARATOR . $compoundObjectOrFile;
if (!in_array($compoundObjectOrFile, $exclude_array) && is_dir($objpath)) {
global $compound_obj_path;
$compound_obj_path = $objpath;
// subdirectories of wrapper directory will be compound object.
// create a structure file for each.
$structure_xml = compoundObjectStructureXML($objpath);

// Apply XSLT.
$structure_xml = treeToCompound($path_to_xsl, $structure_xml);
$structure_xml_output_file_path = $objpath . DIRECTORY_SEPARATOR
. $structurefilename . '.xml';
file_put_contents($structure_xml_output_file_path, $structure_xml);
}
}
}

function treeToCompound($path_to_xsl, $tree_output_xml) {
$xsl = $path_to_xsl;
// tree_output_xml is an xml string.
$xml = $tree_output_xml;

$xsl_doc = new DOMDocument();
$xsl_doc->load($xsl);

$xml_doc = new DOMDocument();
$xml_doc->loadXML($xml);

$xslt_proc = new XSLTProcessor();
$xslt_proc->importStylesheet($xsl_doc);
$xslt_proc->registerPHPFunctions();

$output = $xslt_proc->transformToXML($xml_doc);

return $output;
}

/**
* Removes path segments leading up to the last segment.
*
* Called from within the XSLT stylesheet.
*/
function get_dir_name() {
// global $input_dir;
// global $target_directory;
global $compound_obj_path;
$input_dir = $compound_obj_path;
$dir_path = preg_replace('/(\.*)/', '', $input_dir);
$dir_path = rtrim($dir_path, DIRECTORY_SEPARATOR);
$base_dir_pattern = '#^.*' . DIRECTORY_SEPARATOR . '#';
$dir_path = preg_replace($base_dir_pattern, '', $dir_path);
$dir_path = ltrim($dir_path, DIRECTORY_SEPARATOR);
// echo $dir_path . PHP_EOL;
return $dir_path;
}

/**
* Recursively create XML string of directory/tree structure.
* Based on psuedo-code from http://stackoverflow.com/a/15096721/850828.
*/
function directoryXML($directory_path, $state = NULL) {
// basenames to exclude.
$exclude_array = array('..', '.DS_Store', 'Thumbs.db', '.');

$dir_name = basename($directory_path);
// echo $dir_name . PHP_EOL;
if (!is_null($state)) {
echo $state . PHP_EOL;
$xml = "<directory name='" . $state . "/" . $dir_name . "'>";
}
else {
$xml = "<directory name='" . $dir_name . "'>";
}

$pathbase = pathinfo($directory_path, PATHINFO_BASENAME);
$stuffindirectory = scandir($directory_path);

foreach ($stuffindirectory as $subdirOrfile) {
$subdirOrfilepath = $directory_path . DIRECTORY_SEPARATOR . $subdirOrfile;
if (!in_array($subdirOrfile, $exclude_array) && is_file($subdirOrfilepath)) {
$xml .= "<file name='" . $subdirOrfile . "' />";
}
if (!in_array($subdirOrfile, $exclude_array) && is_dir($subdirOrfilepath)) {
// echo $subdirOrfilepath . PHP_EOL;
$state = $dir_name;
$xml .= directoryXML($subdirOrfilepath, $state);
}
}
$xml .= "</directory>";
return $xml;
}

function compoundObjectStructureXML($dir_path) {
$xmlstring = "<tree>";
$xmlstring .= directoryXML($dir_path);
$xmlstring .= "</tree>";
$xml = new DOMDocument("1.0");
$xml->loadXML($xmlstring);
$xml->formatOutput = TRUE;
return $xml->saveXML();
}
44 changes: 44 additions & 0 deletions extras/scripts/shutdownhooks/tree_to_compound_object.xsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:php="http://php.net/xsl" xsl:extension-element-prefixes="php" exclude-result-prefixes="xsl xsi php">

<!-- XSLT stylesheet to convert the output of the 'tree' utility (or equivalent)
into XML that represents the structure of an Islandora compound object. Called
by the create_structure_files.php script that is part of the Islandora Compound
Batch module. -->

<xsl:output method="xml" encoding="utf-8" indent="yes"/>
<xsl:strip-space elements="*"/>

<xsl:template match="tree/directory">
<xsl:comment>Islandora compound structure file used by the Compound Batch module. On batch ingest,
'islandora_compound_object' elements become compound objects, and 'child' elements become their
children. Files in directories named in child elements' 'content' attribute will be added as their
datastreams. If 'islandora_compound_object' elements do not contain a MODS.xml file, the value of
the 'title' attribute will be used as the parent's title/label.</xsl:comment>

<islandora_compound_object >
<xsl:attribute name="title">
<xsl:value-of select="php:function('get_dir_name')" />
</xsl:attribute>
<xsl:apply-templates/>
</islandora_compound_object>
</xsl:template>

<!-- We aren't intersted in these nodes, so apply an empty template to them. -->
<xsl:template match="report|directories|files"/>

<xsl:template match="directory">
<xsl:choose>
<xsl:when test="count(file) > 1">
<child content="{./@name}"/>
<xsl:apply-templates/>
</xsl:when>
<xsl:otherwise>
<parent title="{./@name}">
<xsl:apply-templates/>
</parent>
</xsl:otherwise>
</xsl:choose>
</xsl:template>

</xsl:stylesheet>

0 comments on commit b3027ed

Please sign in to comment.