Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add hOCR functionality #1006

Merged
merged 13 commits into from
Apr 19, 2024
12 changes: 12 additions & 0 deletions modules/islandora_iiif/config/schema/islandora_iiif.schema.yml
alxp marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,17 @@ views.style.iiif_manifest:
mapping:
iiif_tile_field:
type: sequence
label: "Tile source field(s)"
sequence:
type: string
iiif_ocr_file_field:
type: sequence
label: "Structured OCR data file field"
sequence:
type: string
structured_text_term:
type: string
label: "Structured text term"
search_endpoint:
type: string
label: "Search endpoint path"
122 changes: 106 additions & 16 deletions modules/islandora_iiif/src/Plugin/views/style/IIIFManifest.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
use Drupal\Core\Form\FormStateInterface;
use Drupal\Core\Messenger\MessengerInterface;
use Drupal\Core\Url;
use Drupal\islandora\IslandoraUtils;
use Drupal\taxonomy\TermInterface;
use Drupal\views\Plugin\views\style\StylePluginBase;
use Drupal\views\ResultRow;
use GuzzleHttp\Client;
Expand All @@ -35,6 +37,13 @@
*/
class IIIFManifest extends StylePluginBase {

/**
* Islandora utility functions.
*
* @var \Drupal\islandora\IslandoraUtils
*/
protected $utils;

/**
* {@inheritdoc}
*/
Expand Down Expand Up @@ -111,7 +120,7 @@ class IIIFManifest extends StylePluginBase {
/**
* {@inheritdoc}
*/
public function __construct(array $configuration, $plugin_id, $plugin_definition, SerializerInterface $serializer, Request $request, ImmutableConfig $iiif_config, EntityTypeManagerInterface $entity_type_manager, FileSystemInterface $file_system, Client $http_client, MessengerInterface $messenger, ModuleHandlerInterface $moduleHandler) {
public function __construct(array $configuration, $plugin_id, $plugin_definition, SerializerInterface $serializer, Request $request, ImmutableConfig $iiif_config, EntityTypeManagerInterface $entity_type_manager, FileSystemInterface $file_system, Client $http_client, MessengerInterface $messenger, ModuleHandlerInterface $moduleHandler, IslandoraUtils $utils) {
parent::__construct($configuration, $plugin_id, $plugin_definition);

$this->serializer = $serializer;
Expand All @@ -121,6 +130,7 @@ public function __construct(array $configuration, $plugin_id, $plugin_definition
$this->fileSystem = $file_system;
$this->httpClient = $http_client;
$this->messenger = $messenger;
$this->utils = $utils;
$this->moduleHandler = $moduleHandler;
}

Expand All @@ -139,7 +149,8 @@ public static function create(ContainerInterface $container, array $configuratio
$container->get('file_system'),
$container->get('http_client'),
$container->get('messenger'),
$container->get('module_handler')
$container->get('module_handler'),
$container->get('islandora.utils')
);
}

Expand Down Expand Up @@ -188,6 +199,11 @@ public function render() {
$label = $this->t("IIIF Manifest");
}

/**
* @var \Drupal\taxonomy\TermInterface|null
*/
$structured_text_term = $this->utils->getTermForUri($this->options['structured_text_term_uri']);
alxp marked this conversation as resolved.
Show resolved Hide resolved

// @see https://iiif.io/api/presentation/2.1/#manifest
$json += [
'@type' => 'sc:Manifest',
Expand All @@ -207,7 +223,7 @@ public function render() {
// For each row in the View result.
foreach ($this->view->result as $row) {
// Add the IIIF URL to the image to print out as JSON.
$canvases = $this->getTileSourceFromRow($row, $iiif_address, $iiif_base_id);
$canvases = $this->getTileSourceFromRow($row, $iiif_address, $iiif_base_id, $structured_text_term);
foreach ($canvases as $tile_source) {
$json['sequences'][0]['canvases'][] = $tile_source;
}
Expand All @@ -217,6 +233,9 @@ public function render() {

$content_type = 'json';

// Add a search endpoint if one is defined.
$this->addSearchEndpoint($json, $url_components);

// Give other modules a chance to alter the manifest.
$this->moduleHandler->alter('islandora_iiif_manifest', $json, $this);

Expand All @@ -233,11 +252,13 @@ public function render() {
* @param string $iiif_base_id
* The URL for the request, minus the last part of the URL,
* which is likely "manifest".
* @param \Drupal\taxonomy\TermInterface $structured_text_term
* The term representing the media use.
*
* @return array
* List of IIIF URLs to display in the Openseadragon viewer.
*/
protected function getTileSourceFromRow(ResultRow $row, $iiif_address, $iiif_base_id) {
protected function getTileSourceFromRow(ResultRow $row, $iiif_address, $iiif_base_id, TermInterface $structured_text_term) {
$canvases = [];
foreach (array_filter(array_values($this->options['iiif_tile_field'])) as $iiif_tile_field) {
$viewsField = $this->view->field[$iiif_tile_field];
Expand Down Expand Up @@ -300,7 +321,7 @@ protected function getTileSourceFromRow(ResultRow $row, $iiif_address, $iiif_bas
],
];

if ($ocr_url = $this->getOcrUrl($entity, $row, $i)) {
if ($ocr_url = $this->getOcrUrl($entity, $structured_text_term)) {
$tmp_canvas['seeAlso'] = [
'@id' => $ocr_url,
'format' => 'text/vnd.hocr+html',
Expand Down Expand Up @@ -380,28 +401,36 @@ protected function getCanvasDimensions(string $iiif_url, FieldItemInterface $ima
*
* @param \Drupal\Core\Entity\EntityInterface $entity
* The entity at the current row.
* @param \Drupal\views\ResultRow $row
* Result row.
* @param int $delta
* The delta in case there are multiple canvases on one media.
* @param \Drupal\taxonomy\TermInterface $structured_text_term
* The term representing the media use.
*
* @return string|false
* The absolute URL of the current row's structured text,
* or FALSE if none.
*/
protected function getOcrUrl(EntityInterface $entity, ResultRow $row, $delta) {
protected function getOcrUrl(EntityInterface $entity, TermInterface $structured_text_term) {
$ocr_url = FALSE;
$iiif_ocr_file_field = !empty($this->options['iiif_ocr_file_field']) ? array_filter(array_values($this->options['iiif_ocr_file_field'])) : [];
$ocrField = count($iiif_ocr_file_field) > 0 ? $this->view->field[$iiif_ocr_file_field[0]] : NULL;
if ($ocrField) {
$ocr_entity = $ocrField->getEntity($row);
$ocr_entity = $entity;
$ocr_field_name = $ocrField->definition['field_name'];
if (!is_null($ocr_field_name)) {
$ocrs = $ocr_entity->{$ocr_field_name};
$ocr = isset($ocrs[$delta]) ? $ocrs[$delta] : FALSE;
if ($ocr) {
$ocr_url = $ocr->entity->createFileUrl(FALSE);
}
$ocr = $ocrs[0] ?? FALSE;
$ocr_url = $ocr->entity->createFileUrl(FALSE);
}
}
elseif ($structured_text_term) {
alxp marked this conversation as resolved.
Show resolved Hide resolved
$parent_node = $this->utils->getParentNode($entity);
$ocr_entity_array = $this->utils->getMediaReferencingNodeAndTerm($parent_node, $structured_text_term);
$ocr_entity_id = is_array($ocr_entity_array) ? array_shift($ocr_entity_array) : NULL;
$ocr_entity = $ocr_entity_id ? $this->entityTypeManager->getStorage('media')->load($ocr_entity_id) : NULL;
if ($ocr_entity) {
$ocr_file_source = $ocr_entity->getSource();
$ocr_fid = $ocr_file_source->getSourceFieldValue($ocr_entity);
$ocr_file = $this->entityTypeManager->getStorage('file')->load($ocr_fid);
$ocr_url = $ocr_file->createFileUrl(FALSE);
}
}

Expand Down Expand Up @@ -448,6 +477,29 @@ protected function defineOptions() {
return $options;
}

/**
* Add the configured search endpoint to the manifest.
*
* @param array $json
* The IIIF manifest.
* @param array $url_components
* The search endpoint URL as array.
*/
protected function addSearchEndpoint(array &$json, array $url_components) {
$url_base = $this->getRequest()->getSchemeAndHttpHost();
$hocr_search_path = $this->options['search_endpoint'];
$hocr_search_url = $url_base . '/' . ltrim($hocr_search_path, '/');

$hocr_search_url = str_replace('%node', $url_components[1], $hocr_search_url);

$json['service'][] = [
"@context" => "http://iiif.io/api/search/0/context.json",
"@id" => $hocr_search_url,
"profile" => "http://iiif.io/api/search/0/search",
"label" => t("Search inside this work"),
];
}

/**
* {@inheritdoc}
*/
Expand Down Expand Up @@ -504,10 +556,27 @@ public function buildOptionsForm(&$form, FormStateInterface $form_state) {
'#title' => $this->t('Structured OCR data file field'),
'#type' => 'checkboxes',
'#default_value' => $this->options['iiif_ocr_file_field'],
'#description' => $this->t('The source of structured OCR text for each entity.'),
'#description' => $this->t("If the hOCR is a field on the same entity as the image source field above, select it here. If it's found in a related entity via the term below, leave this blank."),
'#options' => $field_options,
'#required' => FALSE,
];

$form['structured_text_term'] = [
'#type' => 'entity_autocomplete',
'#target_type' => 'taxonomy_term',
'#title' => $this->t('Structured OCR text term'),
'#default_value' => $this->utils->getTermForUri($this->options['structured_text_term_uri']),
'#required' => FALSE,
'#description' => $this->t('Term indicating the media that holds structured text, such as hOCR, for the given object. Use this if the text is on a separate media from the tile source.'),
];

$form['search_endpoint'] = [
'#type' => 'textfield',
'#title' => $this->t("Search endpoint path."),
'#description' => $this->t("If there is a search endpoint to search within the book that returns IIIF annotations, put it here. Use %node substitution where needed.<br>E.g., paged-content-search/%node"),
'#default_value' => $this->options['search_endpoint'],
'#required' => FALSE,
];
}

/**
Expand All @@ -520,4 +589,25 @@ public function getFormats() {
return ['json' => 'json'];
}

/**
* Submit handler for options form.
*
* Used to store the structured text media term by URL instead of Ttid.
*
* @param array $form
* The form.
* @param \Drupal\Core\Form\FormStateInterface $form_state
* The form state object.
*/
// @codingStandardsIgnoreStart
public function submitOptionsForm(&$form, FormStateInterface $form_state) {
// @codingStandardsIgnoreEnd
$style_options = $form_state->getValue('style_options');
$tid = $style_options['structured_text_term'];
$term = $this->entityTypeManager->getStorage('taxonomy_term')->load($tid);
joecorall marked this conversation as resolved.
Show resolved Hide resolved
$style_options['structured_text_term_uri'] = $this->utils->getUriForTerm($term);
$form_state->setValue('style_options', $style_options);
parent::submitOptionsForm($form, $form_state);
}

}
Loading