Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add hOCR functionality #1006

Merged
merged 13 commits into from
Apr 19, 2024
12 changes: 12 additions & 0 deletions modules/islandora_iiif/config/schema/islandora_iiif.schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,17 @@ views.style.iiif_manifest:
mapping:
iiif_tile_field:
type: sequence
label: "Tile source field(s)"
sequence:
type: string
iiif_ocr_file_field:
type: sequence
label: "Structured OCR data file field"
sequence:
type: string
structured_text_term_uri:
type: string
label: "Structured text term"
search_endpoint:
type: string
label: "Search endpoint path"
135 changes: 124 additions & 11 deletions modules/islandora_iiif/src/Plugin/views/style/IIIFManifest.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
use Drupal\Core\Form\FormStateInterface;
use Drupal\Core\Messenger\MessengerInterface;
use Drupal\Core\Url;
use Drupal\islandora\IslandoraUtils;
use Drupal\taxonomy\TermInterface;
use Drupal\views\Plugin\views\style\StylePluginBase;
use Drupal\views\ResultRow;
use GuzzleHttp\Client;
Expand All @@ -35,6 +37,13 @@
*/
class IIIFManifest extends StylePluginBase {

/**
* Islandora utility functions.
*
* @var \Drupal\islandora\IslandoraUtils
*/
protected $utils;

/**
* {@inheritdoc}
*/
Expand Down Expand Up @@ -108,10 +117,24 @@ class IIIFManifest extends StylePluginBase {
*/
protected $moduleHandler;

/**
* Memoized structured text term.
*
* @var \Drupal\taxonomy\TermInterface|null
*/
protected ?TermInterface $structuredTextTerm;

/**
* Flag to track if we _have_ attempted a lookup, as the value is nullable.
*
* @var bool
*/
protected bool $structuredTextTermMemoized = FALSE;

/**
* {@inheritdoc}
*/
public function __construct(array $configuration, $plugin_id, $plugin_definition, SerializerInterface $serializer, Request $request, ImmutableConfig $iiif_config, EntityTypeManagerInterface $entity_type_manager, FileSystemInterface $file_system, Client $http_client, MessengerInterface $messenger, ModuleHandlerInterface $moduleHandler) {
public function __construct(array $configuration, $plugin_id, $plugin_definition, SerializerInterface $serializer, Request $request, ImmutableConfig $iiif_config, EntityTypeManagerInterface $entity_type_manager, FileSystemInterface $file_system, Client $http_client, MessengerInterface $messenger, ModuleHandlerInterface $moduleHandler, IslandoraUtils $utils) {
parent::__construct($configuration, $plugin_id, $plugin_definition);

$this->serializer = $serializer;
Expand All @@ -121,6 +144,7 @@ public function __construct(array $configuration, $plugin_id, $plugin_definition
$this->fileSystem = $file_system;
$this->httpClient = $http_client;
$this->messenger = $messenger;
$this->utils = $utils;
$this->moduleHandler = $moduleHandler;
}

Expand All @@ -139,7 +163,8 @@ public static function create(ContainerInterface $container, array $configuratio
$container->get('file_system'),
$container->get('http_client'),
$container->get('messenger'),
$container->get('module_handler')
$container->get('module_handler'),
$container->get('islandora.utils')
);
}

Expand Down Expand Up @@ -217,6 +242,9 @@ public function render() {

$content_type = 'json';

// Add a search endpoint if one is defined.
$this->addSearchEndpoint($json, $url_components);

// Give other modules a chance to alter the manifest.
$this->moduleHandler->alter('islandora_iiif_manifest', $json, $this);

Expand Down Expand Up @@ -300,7 +328,7 @@ protected function getTileSourceFromRow(ResultRow $row, $iiif_address, $iiif_bas
],
];

if ($ocr_url = $this->getOcrUrl($entity, $row, $i)) {
if ($ocr_url = $this->getOcrUrl($entity)) {
$tmp_canvas['seeAlso'] = [
'@id' => $ocr_url,
'format' => 'text/vnd.hocr+html',
Expand Down Expand Up @@ -380,30 +408,38 @@ protected function getCanvasDimensions(string $iiif_url, FieldItemInterface $ima
*
* @param \Drupal\Core\Entity\EntityInterface $entity
* The entity at the current row.
* @param \Drupal\views\ResultRow $row
* Result row.
* @param int $delta
* The delta in case there are multiple canvases on one media.
*
* @return string|false
* The absolute URL of the current row's structured text,
* or FALSE if none.
*/
protected function getOcrUrl(EntityInterface $entity, ResultRow $row, $delta) {
protected function getOcrUrl(EntityInterface $entity) {
$ocr_url = FALSE;
$iiif_ocr_file_field = !empty($this->options['iiif_ocr_file_field']) ? array_filter(array_values($this->options['iiif_ocr_file_field'])) : [];
$ocrField = count($iiif_ocr_file_field) > 0 ? $this->view->field[$iiif_ocr_file_field[0]] : NULL;
if ($ocrField) {
$ocr_entity = $ocrField->getEntity($row);
$ocr_entity = $entity;
$ocr_field_name = $ocrField->definition['field_name'];
if (!is_null($ocr_field_name)) {
$ocrs = $ocr_entity->{$ocr_field_name};
$ocr = isset($ocrs[$delta]) ? $ocrs[$delta] : FALSE;
$ocr = $ocrs[0] ?? FALSE;
if ($ocr) {
$ocr_url = $ocr->entity->createFileUrl(FALSE);
}
}
}
elseif ($structured_text_term = $this->getStructuredTextTerm()) {
$parent_node = $this->utils->getParentNode($entity);
$ocr_entity_array = $this->utils->getMediaReferencingNodeAndTerm($parent_node, $structured_text_term);
$ocr_entity_id = is_array($ocr_entity_array) ? array_shift($ocr_entity_array) : NULL;
$ocr_entity = $ocr_entity_id ? $this->entityTypeManager->getStorage('media')->load($ocr_entity_id) : NULL;
if ($ocr_entity) {
$ocr_file_source = $ocr_entity->getSource();
$ocr_fid = $ocr_file_source->getSourceFieldValue($ocr_entity);
$ocr_file = $this->entityTypeManager->getStorage('file')->load($ocr_fid);
$ocr_url = $ocr_file->createFileUrl(FALSE);
}
}

return $ocr_url;
}
Expand Down Expand Up @@ -448,6 +484,29 @@ protected function defineOptions() {
return $options;
}

/**
* Add the configured search endpoint to the manifest.
*
* @param array $json
* The IIIF manifest.
* @param array $url_components
* The search endpoint URL as array.
*/
protected function addSearchEndpoint(array &$json, array $url_components) {
$url_base = $this->getRequest()->getSchemeAndHttpHost();
$hocr_search_path = $this->options['search_endpoint'];
$hocr_search_url = $url_base . '/' . ltrim($hocr_search_path, '/');

$hocr_search_url = str_replace('%node', $url_components[1], $hocr_search_url);

$json['service'][] = [
"@context" => "http://iiif.io/api/search/0/context.json",
"@id" => $hocr_search_url,
"profile" => "http://iiif.io/api/search/0/search",
"label" => t("Search inside this work"),
];
}

/**
* {@inheritdoc}
*/
Expand Down Expand Up @@ -504,10 +563,27 @@ public function buildOptionsForm(&$form, FormStateInterface $form_state) {
'#title' => $this->t('Structured OCR data file field'),
'#type' => 'checkboxes',
'#default_value' => $this->options['iiif_ocr_file_field'],
'#description' => $this->t('The source of structured OCR text for each entity.'),
'#description' => $this->t("If the hOCR is a field on the same entity as the image source field above, select it here. If it's found in a related entity via the term below, leave this blank."),
'#options' => $field_options,
'#required' => FALSE,
];

$form['structured_text_term'] = [
'#type' => 'entity_autocomplete',
'#target_type' => 'taxonomy_term',
'#title' => $this->t('Structured OCR text term'),
'#default_value' => $this->getStructuredTextTerm(),
'#required' => FALSE,
'#description' => $this->t('Term indicating the media that holds structured text, such as hOCR, for the given object. Use this if the text is on a separate media from the tile source.'),
];

$form['search_endpoint'] = [
'#type' => 'textfield',
'#title' => $this->t("Search endpoint path."),
'#description' => $this->t("If there is a search endpoint to search within the book that returns IIIF annotations, put it here. Use %node substitution where needed.<br>E.g., paged-content-search/%node"),
'#default_value' => $this->options['search_endpoint'],
'#required' => FALSE,
];
}

/**
Expand All @@ -520,4 +596,41 @@ public function getFormats() {
return ['json' => 'json'];
}

/**
* Submit handler for options form.
*
* Used to store the structured text media term by URL instead of Ttid.
*
* @param array $form
* The form.
* @param \Drupal\Core\Form\FormStateInterface $form_state
* The form state object.
*/
// @codingStandardsIgnoreStart
public function submitOptionsForm(&$form, FormStateInterface $form_state) {
// @codingStandardsIgnoreEnd
$style_options = $form_state->getValue('style_options');
$tid = $style_options['structured_text_term'];
unset($style_options['structured_text_term']);
$term = $this->entityTypeManager->getStorage('taxonomy_term')->load($tid);
joecorall marked this conversation as resolved.
Show resolved Hide resolved
$style_options['structured_text_term_uri'] = $this->utils->getUriForTerm($term);
$form_state->setValue('style_options', $style_options);
parent::submitOptionsForm($form, $form_state);
}

/**
* Get the structured text term.
*
* @return \Drupal\taxonomy\TermInterface|null
* The term if it could be found; otherwise, NULL.
*/
protected function getStructuredTextTerm() : ?TermInterface {
if (!$this->structuredTextTermMemoized) {
$this->structuredTextTermMemoized = TRUE;
$this->structuredTextTerm = $this->utils->getTermForUri($this->options['structured_text_term_uri']);
}

return $this->structuredTextTerm;
}

}
Loading