Skip to content

Commit

Permalink
Work on #7.
Browse files Browse the repository at this point in the history
  • Loading branch information
mjordan committed Oct 24, 2017
1 parent fa7aecd commit 877ea5f
Show file tree
Hide file tree
Showing 3 changed files with 192 additions and 159 deletions.
15 changes: 15 additions & 0 deletions modules/islandora_doi_datacite/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,21 @@ This module provides the option of using an object's PID as the DOI suffix or us

There is also an option to use both the object's DC.creator and DC.contributor values to populate DataCite's reqiured 'creator' element. Because of the way that the Library of Congress' MODS-to-DC stylesheet generates DC datastreams, many Islandora objects' DC datastreams contain 'contributor' elements rather than 'creator' elements. Enabling this option will reduce the number of validation failures based on the lack of values for the DataCite 'creator' element.

## Assigning DataCite DOIs from a list of PIDs

This module includes a drush script that can assign DOIs from a list of PIDS. The script provides two commands, `islandora_doi_datacite_assign_dois_preflight` and `islandora_doi_datacite_assign_dois`.

The preflight command checks each object identified in the PID file to confirm that its DC datastream contains the values required by the DataCite metadata schema, specifically, for a dc.title, dc.creator, dc.publisher. It also checks the dc.date field for a YYYY year. Running the file produces two output files, named after the PID file with `.passed' and `.errors` appended. The 'passed' file contains PIDs of objects that had all the required values, and the 'errors' file contains a log of the missing elements in each object. For example:

```
drush -u 1 islandora_doi_datacite_assign_dois_preflight --pid_file=/tmp/dois.pids
```

The `islandora_doi_datacite_assign_dois` command assigns DOIs to each object listed in the PID file, skipping any objects that do not meet the requred DC values. It requires the `--resource_type` option, whose value must be from the list above. For example:

```
drush -u 1 islandora_doi_datacite_assign_dois --pid_file=/tmp/dois.pids --resource_type=Text
```

## Maintainer

Expand Down
150 changes: 150 additions & 0 deletions modules/islandora_doi_datacite/includes/utilities.inc
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,126 @@ function islandora_doi_datacite_post_new($pid, $form, $form_state) {
}
}

/**
* Registers a DOI and associated metadata with DataCite via drush.
*
* Also checks to make sure that the source DC datastream
* contains the elements required by the DataCite metadata
* schema.
*
* The resource's metadata must be registered via the DataCite MDS API
* first, then its URL. See https://datacite.readme.io/docs/mds-2 for
* additional info.
*
* @param string $pid
* The PID of the object.
* @param array $form_state
* An array containing values in the same structure as the
* islandora_doi_framework_manage_doi form.
*
* @return string|bool
* The DOI if the request was successful, FALSE if not.
*/
function islandora_doi_datacite_post_new_via_drush($pid, $form_state) {
$symbol = variable_get('islandora_doi_datacite_username', 'CISTI.FOO');
$password = variable_get('islandora_doi_datacite_password', '');
$suffix_source = variable_get('islandora_doi_datacite_suffix_source', 'pid');
if ($suffix_source == 'pid') {
$doi = variable_get('islandora_doi_datacite_prefix', '10.99999/') . $pid;
$doi = trim($doi);
}
if ($suffix_source == 'uuid') {
$uuid = islandora_doi_datacite_get_uuid();
$doi = variable_get('islandora_doi_datacite_prefix', '10.99999/') . $uuid;
$doi = trim($doi);
}

// POST the metadata first. This is a requirement of the API.
// The DOI for the resource must be encoded in the XML in the
// <identifier identifierType="DOI"> element.
$api_url = variable_get('islandora_doi_datacite_api_url', 'https://mds.datacite.org/') . 'metadata';

$metadata_xml = islandora_doi_datacite_generate_metadata($pid, $doi, $form_state);

$response = drupal_http_request($api_url, array(
'headers' => array(
'Content-Type' => 'application/xml;charset=UTF-8',
'Authorization' => 'Basic ' . base64_encode($symbol . ':' . $password),
),
'method' => 'POST',
'data' => $metadata_xml,
));
if ($response->code != 201) {
if ($response->code == '400') {
// The DataCite API provides specific error messages in the response body.
$message = $response->status_message . ': ' . $response->data;
}
else {
$message = $response->status_message;
}
watchdog(
'islandora_doi_datacite',
'Error registering metadata for object !pid, so cannot mint DOI: !code, !message',
array(
'!pid' => $pid,
'!code' => $response->code,
'!message' => $message,
),
WATCHDOG_ERROR
);
drupal_set_message(t("Can't post DataCite metadata for object !pid. See the Drupal log for more information.", array('!pid' => $pid)), 'error');
return FALSE;
}

// Then POST the resource's URL to mint the DOI.
$api_url = variable_get('islandora_doi_datacite_api_url', 'https://mds.datacite.org/') . 'doi';
global $base_url;
$url = $base_url . '/islandora/object/' . $pid;
$data = array('doi' => $doi, 'url' => $url);
$response = drupal_http_request($api_url, array(
'headers' => array(
'Content-Type' => 'text/plain;charset=UTF-8',
'Authorization' => 'Basic ' . base64_encode($symbol . ':' . $password),
),
'method' => 'POST',
'data' => 'doi=' . $doi . PHP_EOL . 'url=' . $url,
));
if ($response->code == 201) {
drupal_set_message(t('DOI !doi successfully minted for object !pid', array('!doi' => $doi, '!pid' => $pid)));
return $doi;
}
else {
if ($response->code == '400') {
// The DataCite API provides specific error messages in the response body.
$message = $response->status_message . ': ' . $response->data;
}
else {
$message = $response->status_message;
}
watchdog(
'islandora_doi_datacite',
'Error minting DOI !doi for object !pid: !code, !message',
array(
'!doi' => $doi,
'!pid' => $pid,
'!code' => $response->code,
'!message' => $message),
WATCHDOG_ERROR
);
drupal_set_message(t("Can't mint DOI for object !pid. See the Drupal log for more information.", array('!pid' => $pid)), 'error');
return FALSE;
}
}

/**
* Updates an object's DOI with updated metadata and/or URL.
*
* @param string $pid
* The object's PID.
* @param string $doi
* The object's DOI.
* @param array $form_state
* The form state of islandora_doi_framework_manage_doi form on submission.
*
* @return bool
* TRUE if the DOI was updated, FALSE if not.
Expand Down Expand Up @@ -218,6 +331,8 @@ function islandora_doi_datacite_update_doi($pid, $doi, $form_state) {
* The PID of the object to generate the metadata for.
* @param string $doi
* The DOI of the object to generate the metadata for.
* @param array $form_state
* The form state of islandora_doi_framework_manage_doi form on submission.
*
* @return string
* The DataCite metadata XML.
Expand Down Expand Up @@ -267,6 +382,41 @@ function islandora_doi_datacite_get_dc_values($xml) {
return $dc_values;
}

/**
* Checks for required values in the object's DC datastream.
*
* @param array $dc_values
* An associative array containing the object's DC values.
*
* @return array
* An array of arrays, each of which has as its first element
* a DC element name and as its second element an error message.
*/
function islandora_doi_datacite_check_required_values($dc_values) {
// We don't include type because the user supplies it on the command line.
$required_elements = array('title', 'creator', 'publisher', 'date');
$ret = array();
foreach ($required_elements as $element) {
if (!array_key_exists($element, $dc_values)) {
$ret[] = array(
$element,
t('missing'),
);
}
if (array_key_exists($element, $dc_values) && strlen($dc_values[$element][0]) == 0) {
$ret[] = array(
$element,
t('empty'),
);
}
}
// Parse out a date to see if we can get a yyyy value from it.
if (!preg_match('/(\d\d\d\d)/', $dc_values['date'][0], $matches)) {
$ret[] = array('date', t('does not have a YYYY year'));
}
return $ret;
}

/**
* Very lazy generation of a UUID.
*
Expand Down
Loading

0 comments on commit 877ea5f

Please sign in to comment.