From a7d2d684da8fe2569e0ca831841edd86c655eff4 Mon Sep 17 00:00:00 2001 From: jazairi <16103405+jazairi@users.noreply.github.com> Date: Thu, 16 Oct 2025 16:52:17 -0400 Subject: [PATCH] Refactor how TIMDEX records are nomalized Why these changes are being introduced: Prior to USE, TIMDEX records were largely normalized in the view layer. While not ideal, this made some sense given that TIMDEX was the sole source we were working with in the UI. Now that we have introduced Primo results in USE, we should normalize TIMDEX records similarly, such that the two share a similar data structure. Relevant ticket(s): * [USE-73](https://mitlibraries.atlassian.net/browse/USE-73) How this addresses that need: This introduces NormalizeTimdexReuslts and NormalizeTimdexRecord models to parallel the Primo models. The normalization models share a structure to the extent that it is meaningful. Source-specific fields that will be used in the application are indicated as such. Side effects of this change: * Modifications to various parts of the view layer were necessary to retrofit this change. * In some cases, we are only mapping the necessary data. (E.g., TIMDEX links array only includes source link.) This is subject to change as we learn more about UX requirements. * We are still using separate result partials for TIMDEX and Primo, until we have a better sense of how different the requirements are for those sources. * The Primo result partial has been minimized to more closely resemble the TIMDEX partial. This is also subject to change based on UX requirements. --- app/controllers/search_controller.rb | 14 +- app/helpers/search_helper.rb | 4 +- app/models/normalize_primo_record.rb | 12 +- app/models/normalize_timdex_record.rb | 142 ++++++++++++ app/models/normalize_timdex_results.rb | 15 ++ app/views/search/_result.html.erb | 4 +- app/views/search/_result_geo.html.erb | 18 +- app/views/search/_result_primo.html.erb | 65 ++---- app/views/search/results.html.erb | 9 +- app/views/search/results_geo.html.erb | 2 +- app/views/shared/_geo_data_info.html.erb | 2 +- test/fixtures/timdex/full_record.json | 32 +++ test/fixtures/timdex/minimal_record.json | 5 + test/helpers/search_helper_test.rb | 8 +- test/models/normalize_timdex_record_test.rb | 221 +++++++++++++++++++ test/models/normalize_timdex_results_test.rb | 88 ++++++++ 16 files changed, 560 insertions(+), 81 deletions(-) create mode 100644 app/models/normalize_timdex_record.rb create mode 100644 app/models/normalize_timdex_results.rb create mode 100644 test/fixtures/timdex/full_record.json create mode 100644 test/fixtures/timdex/minimal_record.json create mode 100644 test/models/normalize_timdex_record_test.rb create mode 100644 test/models/normalize_timdex_results_test.rb diff --git a/app/controllers/search_controller.rb b/app/controllers/search_controller.rb index b8afd2fd..6de03e29 100644 --- a/app/controllers/search_controller.rb +++ b/app/controllers/search_controller.rb @@ -47,8 +47,11 @@ def load_gdt_results # Handle errors @errors = extract_errors(response) - @pagination = Analyzer.new(@enhanced_query, response).pagination if @errors.nil? - @results = extract_results(response) + return unless @errors.nil? + + @pagination = Analyzer.new(@enhanced_query, response).pagination + raw_results = extract_results(response) + @results = NormalizeTimdexResults.new(raw_results, @enhanced_query[:q]).normalize @filters = extract_filters(response) end @@ -77,8 +80,11 @@ def load_timdex_results response = query_timdex(query) @errors = extract_errors(response) - @pagination = Analyzer.new(@enhanced_query, response).pagination if @errors.nil? - @results = extract_results(response) + return unless @errors.nil? + + @pagination = Analyzer.new(@enhanced_query, response).pagination + raw_results = extract_results(response) + @results = NormalizeTimdexResults.new(raw_results, @enhanced_query[:q]).normalize end def active_filters diff --git a/app/helpers/search_helper.rb b/app/helpers/search_helper.rb index 7fa94080..335aec27 100644 --- a/app/helpers/search_helper.rb +++ b/app/helpers/search_helper.rb @@ -15,9 +15,9 @@ def format_highlight_label(field_name) end def view_online(result) - return unless result['sourceLink'].present? + return unless result['source_link'].present? - link_to 'View online', result['sourceLink'], class: 'button button-primary' + link_to 'View online', result['source_link'], class: 'button button-primary' end def view_record(record_id) diff --git a/app/models/normalize_primo_record.rb b/app/models/normalize_primo_record.rb index bd1cedfe..a7b016a2 100644 --- a/app/models/normalize_primo_record.rb +++ b/app/models/normalize_primo_record.rb @@ -1,4 +1,4 @@ -# Transforms a PNX doc from Primo Search API into a normalized record. +# Transforms a Primo Search API result into a normalized record. class NormalizePrimoRecord def initialize(record, query) @record = record @@ -7,6 +7,7 @@ def initialize(record, query) def normalize { + # Core fields 'title' => title, 'creators' => creators, 'source' => source, @@ -14,15 +15,16 @@ def normalize 'format' => format, 'links' => links, 'citation' => citation, - 'container' => container_title, 'identifier' => record_id, 'summary' => summary, - 'numbering' => numbering, - 'chapter_numbering' => chapter_numbering, - 'thumbnail' => thumbnail, 'publisher' => publisher, 'location' => best_location, 'subjects' => subjects, + # Primo-specific fields + 'container' => container_title, + 'numbering' => numbering, + 'chapter_numbering' => chapter_numbering, + 'thumbnail' => thumbnail, 'availability' => best_availability, 'other_availability' => other_availability? } diff --git a/app/models/normalize_timdex_record.rb b/app/models/normalize_timdex_record.rb new file mode 100644 index 00000000..cf17efd5 --- /dev/null +++ b/app/models/normalize_timdex_record.rb @@ -0,0 +1,142 @@ +# Transforms a TIMDEX result into a normalized record. +class NormalizeTimdexRecord + def initialize(record, query) + @record = record + @query = query + end + + def normalize + { + # Core fields + 'title' => title, + 'creators' => creators, + 'source' => source, + 'year' => year, + 'format' => format, + 'links' => links, + 'citation' => citation, + 'identifier' => identifier, + 'summary' => summary, + 'publisher' => publisher, + 'location' => location, + 'subjects' => subjects, + # TIMDEX-specific fields + 'content_type' => content_type, + 'dates' => dates, + 'contributors' => contributors, + 'highlight' => highlight, + 'source_link' => source_link + } + end + + private + + def title + @record['title'] || 'Unknown title' + end + + def creators + return [] unless @record['contributors'] + + # Convert TIMDEX contributors to Primo-style creators format + @record['contributors'] + .select { |c| %w[Creator Author].include?(c['kind']) } + .map { |creator| { 'value' => creator['value'], 'link' => nil } } + end + + def source + return 'Unknown source' unless @record['source'] + + @record['source'] + end + + def year + # Extract year from dates + return nil unless @record['dates'] + + pub_date = @record['dates'].find { |date| date['kind'] == 'Publication date' } + return pub_date['value']&.match(/\d{4}/)&.to_s if pub_date + + # Fallback to any date with a year + @record['dates'].each do |date| + year_match = date['value']&.match(/\d{4}/) + return year_match.to_s if year_match + end + end + + def format + return '' unless @record['contentType'] + + @record['contentType'].map { |type| type['value'] }.join(' ; ') + end + + def links + links = [] + + # Add source link if available + if @record['sourceLink'] + links << { + 'kind' => 'full record', + 'url' => @record['sourceLink'], + 'text' => 'View full record' + } + end + + links + end + + def citation + @record['citation'] || nil + end + + def summary + return nil unless @record['summary'] + + @record['summary'].is_a?(Array) ? @record['summary'].join(' ') : @record['summary'] + end + + def publisher + # Extract from contributors or other fields + return nil unless @record['contributors'] + + publisher = @record['contributors'].find { |c| c['kind'] == 'Publisher' } + publisher&.dig('value') + end + + def location + return nil unless @record['locations'] + + @record['locations'].map { |loc| loc['value'] }.compact.join('; ') + end + + def subjects + return [] unless @record['subjects'] + + @record['subjects'].map { |subject| subject['value'] } + end + + def identifier + @record['timdexRecordId'] + end + + # TIMDEX-specific methods + def content_type + @record['contentType'] + end + + def dates + @record['dates'] + end + + def contributors + @record['contributors'] + end + + def highlight + @record['highlight'] + end + + def source_link + @record['sourceLink'] + end +end diff --git a/app/models/normalize_timdex_results.rb b/app/models/normalize_timdex_results.rb new file mode 100644 index 00000000..1dd94fb6 --- /dev/null +++ b/app/models/normalize_timdex_results.rb @@ -0,0 +1,15 @@ +# Batch normalization for TIMDEX API results +class NormalizeTimdexResults + def initialize(results, query) + @results = results + @query = query + end + + def normalize + return [] unless @results.is_a?(Array) + + @results.filter_map do |doc| + NormalizeTimdexRecord.new(doc, @query).normalize + end + end +end diff --git a/app/views/search/_result.html.erb b/app/views/search/_result.html.erb index da115f61..fad44d90 100644 --- a/app/views/search/_result.html.erb +++ b/app/views/search/_result.html.erb @@ -1,11 +1,11 @@
- <%= result['contentType']&.each { |type| type['value'] }&.join(' ; ') %>
+ <%= result['content_type']&.each { |type| type['value'] }&.join(' ; ') %>
<% result['dates']&.each do |date| %>
<%= date['value'] if date['kind'] == 'Publication date' %>
diff --git a/app/views/search/_result_geo.html.erb b/app/views/search/_result_geo.html.erb
index a06f9350..4f543366 100644
--- a/app/views/search/_result_geo.html.erb
+++ b/app/views/search/_result_geo.html.erb
@@ -1,34 +1,38 @@
- Summary: <%= result_geo['summary'].join(' ') %>
+ Summary: <%= result_geo['summary'] %>
- <%= result_primo['format'] %>
- <%= result_primo['year'] %>
+ <%= result['format'] %>
+ <%= result['year'] %>
- Published in:
- <%= result_primo['container'] %>
-
- Citation:
- <%= result_primo['citation'] %>
-
- Summary:
- <%= truncate(result_primo['summary'], length: 300) %>
-
- Subjects:
- <%= result_primo['subjects'].join('; ') %>
-
- Availability:
- <%= result_primo['availability'] %>
-
- Title: <%= link_to(result_geo['title'], record_path(result_geo['timdexRecordId'])) %>
+ Title: <%= link_to(result_geo['title'], record_path(result_geo['identifier'])) %>
- <%= render partial: 'shared/contributors', locals: { contributors: result_geo['contributors'] } %>
+
+ <% contributors = result_geo['creators'].present? ? result_geo['creators'] : result_geo['contributors'] %>
+ <% if contributors %>
+ <%= render partial: 'shared/contributors', locals: { contributors: contributors } %>
+ <% end %>
<% end %>
- <% if result_geo['summary'] %>
+ <% if result_geo['summary'].present? %>
Title:
- <% if result_primo['links']&.find { |link| link['kind'] == 'full record' } %>
- <%= link_to(result_primo['title'], result_primo['links'].find { |link| link['kind'] == 'full record' }['url']) %>
+ <% if result['links']&.find { |link| link['kind'] == 'full record' } %>
+ <%= link_to(result['title'], result['links'].find { |link| link['kind'] == 'full record' }['url']) %>
<% else %>
- <%= result_primo['title'] %>
+ <%= result['title'] %>
<% end %>
- <% result_primo['creators'].each do |creator| %>
+ <% result['creators'].each do |creator| %>
<% end %>
- <% if result_primo['container'].present? %>
-
- <% result_primo['links'].each do |link| %>
-
<%= results_summary(@pagination[:hits]) %> returned
- <% if @active_tab == 'primo' %>
- <%= render(partial: 'search/result_primo', collection: @results) %>
- <% else %>
- <%= render(partial: 'search/result', collection: @results) %>
+ <% case @active_tab %>
+ <% when 'primo' %>
+ <%= render(partial: 'search/result_primo', collection: @results, as: :result) %>
+ <% when 'timdex' %>
+ <%= render(partial: 'search/result', collection: @results, as: :result) %>
<% end %>
<% elsif @errors.blank? %>
diff --git a/app/views/search/results_geo.html.erb b/app/views/search/results_geo.html.erb
index 6a7a6f74..812d686c 100644
--- a/app/views/search/results_geo.html.erb
+++ b/app/views/search/results_geo.html.erb
@@ -46,7 +46,7 @@
<% if @results.present? && @errors.blank? %>
<%= results_summary(@pagination[:hits]) %> returned
- <%= render(partial: 'search/result_geo', collection: @results) %>
+ <%= render(partial: 'search/result_geo', collection: @results, as: :result_geo) %>
<% elsif @errors.blank? %>