From a7d2d684da8fe2569e0ca831841edd86c655eff4 Mon Sep 17 00:00:00 2001 From: jazairi <16103405+jazairi@users.noreply.github.com> Date: Thu, 16 Oct 2025 16:52:17 -0400 Subject: [PATCH] Refactor how TIMDEX records are nomalized Why these changes are being introduced: Prior to USE, TIMDEX records were largely normalized in the view layer. While not ideal, this made some sense given that TIMDEX was the sole source we were working with in the UI. Now that we have introduced Primo results in USE, we should normalize TIMDEX records similarly, such that the two share a similar data structure. Relevant ticket(s): * [USE-73](https://mitlibraries.atlassian.net/browse/USE-73) How this addresses that need: This introduces NormalizeTimdexReuslts and NormalizeTimdexRecord models to parallel the Primo models. The normalization models share a structure to the extent that it is meaningful. Source-specific fields that will be used in the application are indicated as such. Side effects of this change: * Modifications to various parts of the view layer were necessary to retrofit this change. * In some cases, we are only mapping the necessary data. (E.g., TIMDEX links array only includes source link.) This is subject to change as we learn more about UX requirements. * We are still using separate result partials for TIMDEX and Primo, until we have a better sense of how different the requirements are for those sources. * The Primo result partial has been minimized to more closely resemble the TIMDEX partial. This is also subject to change based on UX requirements. --- app/controllers/search_controller.rb | 14 +- app/helpers/search_helper.rb | 4 +- app/models/normalize_primo_record.rb | 12 +- app/models/normalize_timdex_record.rb | 142 ++++++++++++ app/models/normalize_timdex_results.rb | 15 ++ app/views/search/_result.html.erb | 4 +- app/views/search/_result_geo.html.erb | 18 +- app/views/search/_result_primo.html.erb | 65 ++---- app/views/search/results.html.erb | 9 +- app/views/search/results_geo.html.erb | 2 +- app/views/shared/_geo_data_info.html.erb | 2 +- test/fixtures/timdex/full_record.json | 32 +++ test/fixtures/timdex/minimal_record.json | 5 + test/helpers/search_helper_test.rb | 8 +- test/models/normalize_timdex_record_test.rb | 221 +++++++++++++++++++ test/models/normalize_timdex_results_test.rb | 88 ++++++++ 16 files changed, 560 insertions(+), 81 deletions(-) create mode 100644 app/models/normalize_timdex_record.rb create mode 100644 app/models/normalize_timdex_results.rb create mode 100644 test/fixtures/timdex/full_record.json create mode 100644 test/fixtures/timdex/minimal_record.json create mode 100644 test/models/normalize_timdex_record_test.rb create mode 100644 test/models/normalize_timdex_results_test.rb diff --git a/app/controllers/search_controller.rb b/app/controllers/search_controller.rb index b8afd2fd..6de03e29 100644 --- a/app/controllers/search_controller.rb +++ b/app/controllers/search_controller.rb @@ -47,8 +47,11 @@ def load_gdt_results # Handle errors @errors = extract_errors(response) - @pagination = Analyzer.new(@enhanced_query, response).pagination if @errors.nil? - @results = extract_results(response) + return unless @errors.nil? + + @pagination = Analyzer.new(@enhanced_query, response).pagination + raw_results = extract_results(response) + @results = NormalizeTimdexResults.new(raw_results, @enhanced_query[:q]).normalize @filters = extract_filters(response) end @@ -77,8 +80,11 @@ def load_timdex_results response = query_timdex(query) @errors = extract_errors(response) - @pagination = Analyzer.new(@enhanced_query, response).pagination if @errors.nil? - @results = extract_results(response) + return unless @errors.nil? + + @pagination = Analyzer.new(@enhanced_query, response).pagination + raw_results = extract_results(response) + @results = NormalizeTimdexResults.new(raw_results, @enhanced_query[:q]).normalize end def active_filters diff --git a/app/helpers/search_helper.rb b/app/helpers/search_helper.rb index 7fa94080..335aec27 100644 --- a/app/helpers/search_helper.rb +++ b/app/helpers/search_helper.rb @@ -15,9 +15,9 @@ def format_highlight_label(field_name) end def view_online(result) - return unless result['sourceLink'].present? + return unless result['source_link'].present? - link_to 'View online', result['sourceLink'], class: 'button button-primary' + link_to 'View online', result['source_link'], class: 'button button-primary' end def view_record(record_id) diff --git a/app/models/normalize_primo_record.rb b/app/models/normalize_primo_record.rb index bd1cedfe..a7b016a2 100644 --- a/app/models/normalize_primo_record.rb +++ b/app/models/normalize_primo_record.rb @@ -1,4 +1,4 @@ -# Transforms a PNX doc from Primo Search API into a normalized record. +# Transforms a Primo Search API result into a normalized record. class NormalizePrimoRecord def initialize(record, query) @record = record @@ -7,6 +7,7 @@ def initialize(record, query) def normalize { + # Core fields 'title' => title, 'creators' => creators, 'source' => source, @@ -14,15 +15,16 @@ def normalize 'format' => format, 'links' => links, 'citation' => citation, - 'container' => container_title, 'identifier' => record_id, 'summary' => summary, - 'numbering' => numbering, - 'chapter_numbering' => chapter_numbering, - 'thumbnail' => thumbnail, 'publisher' => publisher, 'location' => best_location, 'subjects' => subjects, + # Primo-specific fields + 'container' => container_title, + 'numbering' => numbering, + 'chapter_numbering' => chapter_numbering, + 'thumbnail' => thumbnail, 'availability' => best_availability, 'other_availability' => other_availability? } diff --git a/app/models/normalize_timdex_record.rb b/app/models/normalize_timdex_record.rb new file mode 100644 index 00000000..cf17efd5 --- /dev/null +++ b/app/models/normalize_timdex_record.rb @@ -0,0 +1,142 @@ +# Transforms a TIMDEX result into a normalized record. +class NormalizeTimdexRecord + def initialize(record, query) + @record = record + @query = query + end + + def normalize + { + # Core fields + 'title' => title, + 'creators' => creators, + 'source' => source, + 'year' => year, + 'format' => format, + 'links' => links, + 'citation' => citation, + 'identifier' => identifier, + 'summary' => summary, + 'publisher' => publisher, + 'location' => location, + 'subjects' => subjects, + # TIMDEX-specific fields + 'content_type' => content_type, + 'dates' => dates, + 'contributors' => contributors, + 'highlight' => highlight, + 'source_link' => source_link + } + end + + private + + def title + @record['title'] || 'Unknown title' + end + + def creators + return [] unless @record['contributors'] + + # Convert TIMDEX contributors to Primo-style creators format + @record['contributors'] + .select { |c| %w[Creator Author].include?(c['kind']) } + .map { |creator| { 'value' => creator['value'], 'link' => nil } } + end + + def source + return 'Unknown source' unless @record['source'] + + @record['source'] + end + + def year + # Extract year from dates + return nil unless @record['dates'] + + pub_date = @record['dates'].find { |date| date['kind'] == 'Publication date' } + return pub_date['value']&.match(/\d{4}/)&.to_s if pub_date + + # Fallback to any date with a year + @record['dates'].each do |date| + year_match = date['value']&.match(/\d{4}/) + return year_match.to_s if year_match + end + end + + def format + return '' unless @record['contentType'] + + @record['contentType'].map { |type| type['value'] }.join(' ; ') + end + + def links + links = [] + + # Add source link if available + if @record['sourceLink'] + links << { + 'kind' => 'full record', + 'url' => @record['sourceLink'], + 'text' => 'View full record' + } + end + + links + end + + def citation + @record['citation'] || nil + end + + def summary + return nil unless @record['summary'] + + @record['summary'].is_a?(Array) ? @record['summary'].join(' ') : @record['summary'] + end + + def publisher + # Extract from contributors or other fields + return nil unless @record['contributors'] + + publisher = @record['contributors'].find { |c| c['kind'] == 'Publisher' } + publisher&.dig('value') + end + + def location + return nil unless @record['locations'] + + @record['locations'].map { |loc| loc['value'] }.compact.join('; ') + end + + def subjects + return [] unless @record['subjects'] + + @record['subjects'].map { |subject| subject['value'] } + end + + def identifier + @record['timdexRecordId'] + end + + # TIMDEX-specific methods + def content_type + @record['contentType'] + end + + def dates + @record['dates'] + end + + def contributors + @record['contributors'] + end + + def highlight + @record['highlight'] + end + + def source_link + @record['sourceLink'] + end +end diff --git a/app/models/normalize_timdex_results.rb b/app/models/normalize_timdex_results.rb new file mode 100644 index 00000000..1dd94fb6 --- /dev/null +++ b/app/models/normalize_timdex_results.rb @@ -0,0 +1,15 @@ +# Batch normalization for TIMDEX API results +class NormalizeTimdexResults + def initialize(results, query) + @results = results + @query = query + end + + def normalize + return [] unless @results.is_a?(Array) + + @results.filter_map do |doc| + NormalizeTimdexRecord.new(doc, @query).normalize + end + end +end diff --git a/app/views/search/_result.html.erb b/app/views/search/_result.html.erb index da115f61..fad44d90 100644 --- a/app/views/search/_result.html.erb +++ b/app/views/search/_result.html.erb @@ -1,11 +1,11 @@
  • - Title: <%= link_to(result['title'], record_path(result['timdexRecordId'])) %> + Title: <%= link_to(result['title'], record_path(result['identifier'])) %>

    - <%= result['contentType']&.each { |type| type['value'] }&.join(' ; ') %> + <%= result['content_type']&.each { |type| type['value'] }&.join(' ; ') %> <% result['dates']&.each do |date| %> <%= date['value'] if date['kind'] == 'Publication date' %> diff --git a/app/views/search/_result_geo.html.erb b/app/views/search/_result_geo.html.erb index a06f9350..4f543366 100644 --- a/app/views/search/_result_geo.html.erb +++ b/app/views/search/_result_geo.html.erb @@ -1,34 +1,38 @@

  • - Title: <%= link_to(result_geo['title'], record_path(result_geo['timdexRecordId'])) %> + Title: <%= link_to(result_geo['title'], record_path(result_geo['identifier'])) %>

    <%= render partial: 'shared/geo_data_info', locals: { metadata: result_geo } %>
    - <% if result_geo['contributors'] %> + <% if result_geo['creators'].present? || result_geo['contributors'].present? %> Contributors: <% end %> - <% if result_geo['summary'] %> + <% if result_geo['summary'].present? %>

    - Summary: <%= result_geo['summary'].join(' ') %> + Summary: <%= result_geo['summary'] %>

    <% end %> <% if result_geo['highlight'] %>
    - <%= render partial: 'search/highlights', locals: { result: result_geo } %> + <%= render partial: 'search/highlights', locals: { result: { 'highlight' => result_geo['highlight'] } } %>
    <% end %>
    - <%= view_record(result_geo['timdexRecordId']) %> + <%= view_record(result_geo['identifier']) %>
  • diff --git a/app/views/search/_result_primo.html.erb b/app/views/search/_result_primo.html.erb index b3acdaa5..bd4c5bba 100644 --- a/app/views/search/_result_primo.html.erb +++ b/app/views/search/_result_primo.html.erb @@ -2,22 +2,22 @@

    Title: - <% if result_primo['links']&.find { |link| link['kind'] == 'full record' } %> - <%= link_to(result_primo['title'], result_primo['links'].find { |link| link['kind'] == 'full record' }['url']) %> + <% if result['links']&.find { |link| link['kind'] == 'full record' } %> + <%= link_to(result['title'], result['links'].find { |link| link['kind'] == 'full record' }['url']) %> <% else %> - <%= result_primo['title'] %> + <%= result['title'] %> <% end %>

    - <%= result_primo['format'] %> - <%= result_primo['year'] %> + <%= result['format'] %> + <%= result['year'] %>

    - <% if result_primo['creators'].present? %> + <% if result['creators'].present? %> Contributors: <% end %> - <% if result_primo['container'].present? %> -

    - Published in: - <%= result_primo['container'] %> -

    - <% end %> - - <% if result_primo['citation'].present? %> -

    - Citation: - <%= result_primo['citation'] %> -

    - <% end %> - - <% if result_primo['summary'].present? %> -

    - Summary: - <%= truncate(result_primo['summary'], length: 300) %> -

    - <% end %> - - <% if result_primo['subjects'].present? %> -

    - Subjects: - <%= result_primo['subjects'].join('; ') %> -

    - <% end %> - - <% if result_primo['links'].present? %> - - <% end %> - - <% if result_primo['availability'].present? %> -

    - Availability: - <%= result_primo['availability'] %> -

    - <% end %> + <% end %> +
    - \ No newline at end of file + diff --git a/app/views/search/results.html.erb b/app/views/search/results.html.erb index d400175e..fc11b3a1 100644 --- a/app/views/search/results.html.erb +++ b/app/views/search/results.html.erb @@ -38,10 +38,11 @@ <% if @results.present? && @errors.blank? %>

    <%= results_summary(@pagination[:hits]) %> returned

      - <% if @active_tab == 'primo' %> - <%= render(partial: 'search/result_primo', collection: @results) %> - <% else %> - <%= render(partial: 'search/result', collection: @results) %> + <% case @active_tab %> + <% when 'primo' %> + <%= render(partial: 'search/result_primo', collection: @results, as: :result) %> + <% when 'timdex' %> + <%= render(partial: 'search/result', collection: @results, as: :result) %> <% end %>
    <% elsif @errors.blank? %> diff --git a/app/views/search/results_geo.html.erb b/app/views/search/results_geo.html.erb index 6a7a6f74..812d686c 100644 --- a/app/views/search/results_geo.html.erb +++ b/app/views/search/results_geo.html.erb @@ -46,7 +46,7 @@ <% if @results.present? && @errors.blank? %>

    <%= results_summary(@pagination[:hits]) %> returned

      - <%= render(partial: 'search/result_geo', collection: @results) %> + <%= render(partial: 'search/result_geo', collection: @results, as: :result_geo) %>
    <% elsif @errors.blank? %>
    diff --git a/app/views/shared/_geo_data_info.html.erb b/app/views/shared/_geo_data_info.html.erb index e4b6ef18..19362195 100644 --- a/app/views/shared/_geo_data_info.html.erb +++ b/app/views/shared/_geo_data_info.html.erb @@ -11,4 +11,4 @@ <% elsif access_type(metadata) == 'MIT authentication required' %>
  • <%= access_type(metadata) %>
  • <% end %> - + \ No newline at end of file diff --git a/test/fixtures/timdex/full_record.json b/test/fixtures/timdex/full_record.json new file mode 100644 index 00000000..58927aa8 --- /dev/null +++ b/test/fixtures/timdex/full_record.json @@ -0,0 +1,32 @@ +{ + "timdexRecordId": "test-record-123", + "title": "Sample TIMDEX Record for Testing", + "source": "Test Repository", + "contentType": [ + {"value": "Dataset"}, + {"value": "Geospatial data"} + ], + "dates": [ + {"kind": "Publication date", "value": "2023-01-15"}, + {"kind": "Coverage", "value": "2020-2023"} + ], + "contributors": [ + {"kind": "Creator", "value": "Smith, Jane"}, + {"kind": "Author", "value": "Doe, John"}, + {"kind": "Publisher", "value": "MIT Libraries"} + ], + "subjects": [ + {"value": "Geographic Information Systems"}, + {"value": "Remote Sensing"} + ], + "summary": "This is a comprehensive test record with all possible fields populated for testing normalization.", + "citation": "Smith, J. & Doe, J. (2023). Sample TIMDEX Record for Testing. Test Repository.", + "sourceLink": "https://example.com/source/record/123", + "locations": [ + {"value": "Cambridge, MA"} + ], + "highlight": { + "title": ["Sample TIMDEX Record"], + "summary": ["comprehensive test record"] + } +} diff --git a/test/fixtures/timdex/minimal_record.json b/test/fixtures/timdex/minimal_record.json new file mode 100644 index 00000000..d63f15cb --- /dev/null +++ b/test/fixtures/timdex/minimal_record.json @@ -0,0 +1,5 @@ +{ + "timdexRecordId": "minimal-record-456", + "title": "Minimal Test Record", + "source": "Test Repository" +} diff --git a/test/helpers/search_helper_test.rb b/test/helpers/search_helper_test.rb index c743c4c3..0ab09dd9 100644 --- a/test/helpers/search_helper_test.rb +++ b/test/helpers/search_helper_test.rb @@ -31,13 +31,13 @@ def setup { 'matchedField' => 'citation', 'matchedPhrases' => 'Datascientist, Jane' }], trim_highlights(result) end - test 'renders view_online link if sourceLink is present' do - result = { 'title' => 'A record', 'sourceLink' => 'https://example.org' } + test 'renders view_online link if source_link is present' do + result = { 'title' => 'A record', 'source_link' => 'https://example.org' } assert_equal 'View online', view_online(result) end - test 'does not render view_online link if sourceLink is absent' do + test 'does not render view_online link if source_link is absent' do result = { 'title' => 'A record' } assert_nil view_online(result) end @@ -167,7 +167,7 @@ def setup @test_strategy.switch!(:gdt, true) query = { - contributors: 'person, sample', + contributors: 'person, sample' } assert_equal ['Authors: person, sample'], applied_advanced_terms(query) end diff --git a/test/models/normalize_timdex_record_test.rb b/test/models/normalize_timdex_record_test.rb new file mode 100644 index 00000000..58addee7 --- /dev/null +++ b/test/models/normalize_timdex_record_test.rb @@ -0,0 +1,221 @@ +require 'test_helper' + +class NormalizeTimdexRecordTest < ActiveSupport::TestCase + def full_record + JSON.parse(File.read(Rails.root.join('test/fixtures/timdex/full_record.json'))) + end + + def minimal_record + JSON.parse(File.read(Rails.root.join('test/fixtures/timdex/minimal_record.json'))) + end + + test 'normalizes title' do + normalized = NormalizeTimdexRecord.new(full_record, 'test').normalize + assert_equal 'Sample TIMDEX Record for Testing', normalized['title'] + end + + test 'handles missing title' do + record_without_title = minimal_record + record_without_title.delete('title') + normalized = NormalizeTimdexRecord.new(record_without_title, 'test').normalize + assert_equal 'Unknown title', normalized['title'] + end + + test 'normalizes creators from contributors' do + normalized = NormalizeTimdexRecord.new(full_record, 'test').normalize + expected_creators = [ + { 'value' => 'Smith, Jane', 'link' => nil }, + { 'value' => 'Doe, John', 'link' => nil } + ] + assert_equal expected_creators, normalized['creators'] + end + + test 'handles missing creators' do + normalized = NormalizeTimdexRecord.new(minimal_record, 'test').normalize + assert_empty normalized['creators'] + end + + test 'normalizes source' do + normalized = NormalizeTimdexRecord.new(full_record, 'test').normalize + assert_equal 'Test Repository', normalized['source'] + end + + test 'handles missing source' do + record_without_source = minimal_record.dup + record_without_source.delete('source') + normalized = NormalizeTimdexRecord.new(record_without_source, 'test').normalize + assert_equal 'Unknown source', normalized['source'] + end + + test 'extracts year from publication date' do + normalized = NormalizeTimdexRecord.new(full_record, 'test').normalize + assert_equal '2023', normalized['year'] + end + + test 'handles missing year' do + normalized = NormalizeTimdexRecord.new(minimal_record, 'test').normalize + assert_nil normalized['year'] + end + + test 'extracts year from fallback date when no publication date' do + record_with_coverage_date = minimal_record.dup + record_with_coverage_date['dates'] = [ + { 'kind' => 'Coverage', 'value' => '1995-2000' }, + { 'kind' => 'Creation', 'value' => 'Created in 1998' } + ] + normalized = NormalizeTimdexRecord.new(record_with_coverage_date, 'test').normalize + assert_equal '1995', normalized['year'] + end + + test 'normalizes format from content type' do + normalized = NormalizeTimdexRecord.new(full_record, 'test').normalize + assert_equal 'Dataset ; Geospatial data', normalized['format'] + end + + test 'handles missing format' do + record_without_format = minimal_record.dup + record_without_format.delete('contentType') + normalized = NormalizeTimdexRecord.new(record_without_format, 'test').normalize + assert_empty normalized['format'] + end + + test 'normalizes links from source link' do + normalized = NormalizeTimdexRecord.new(full_record, 'test').normalize + expected_links = [ + { + 'kind' => 'full record', + 'url' => 'https://example.com/source/record/123', + 'text' => 'View full record' + } + ] + assert_equal expected_links, normalized['links'] + end + + test 'handles missing links' do + normalized = NormalizeTimdexRecord.new(minimal_record, 'test').normalize + assert_empty normalized['links'] + end + + test 'normalizes citation' do + normalized = NormalizeTimdexRecord.new(full_record, 'test').normalize + assert_equal 'Smith, J. & Doe, J. (2023). Sample TIMDEX Record for Testing. Test Repository.', + normalized['citation'] + end + + test 'handles missing citation' do + normalized = NormalizeTimdexRecord.new(minimal_record, 'test').normalize + assert_nil normalized['citation'] + end + + test 'normalizes identifier' do + normalized = NormalizeTimdexRecord.new(full_record, 'test').normalize + assert_equal 'test-record-123', normalized['identifier'] + end + + test 'normalizes summary' do + normalized = NormalizeTimdexRecord.new(full_record, 'test').normalize + assert_equal 'This is a comprehensive test record with all possible fields populated for testing normalization.', + normalized['summary'] + end + + test 'handles missing summary' do + normalized = NormalizeTimdexRecord.new(minimal_record, 'test').normalize + assert_nil normalized['summary'] + end + + test 'extracts publisher from contributors' do + normalized = NormalizeTimdexRecord.new(full_record, 'test').normalize + assert_equal 'MIT Libraries', normalized['publisher'] + end + + test 'handles missing publisher' do + normalized = NormalizeTimdexRecord.new(minimal_record, 'test').normalize + assert_nil normalized['publisher'] + end + + test 'normalizes location' do + normalized = NormalizeTimdexRecord.new(full_record, 'test').normalize + assert_equal 'Cambridge, MA', normalized['location'] + end + + test 'handles missing location' do + normalized = NormalizeTimdexRecord.new(minimal_record, 'test').normalize + assert_nil normalized['location'] + end + + test 'joins multiple locations with semicolon' do + record_with_multiple_locations = full_record.dup + record_with_multiple_locations['locations'] = [ + { 'value' => 'Cambridge, MA' }, + { 'value' => 'Boston, MA' }, + { 'value' => 'New York, NY' } + ] + normalized = NormalizeTimdexRecord.new(record_with_multiple_locations, 'test').normalize + assert_equal 'Cambridge, MA; Boston, MA; New York, NY', normalized['location'] + end + + test 'normalizes subjects' do + normalized = NormalizeTimdexRecord.new(full_record, 'test').normalize + assert_equal ['Geographic Information Systems', 'Remote Sensing'], normalized['subjects'] + end + + test 'handles missing subjects' do + normalized = NormalizeTimdexRecord.new(minimal_record, 'test').normalize + assert_empty normalized['subjects'] + end + + # Test TIMDEX-specific fields + test 'includes TIMDEX-specific content_type field' do + normalized = NormalizeTimdexRecord.new(full_record, 'test').normalize + expected_content_type = [ + { 'value' => 'Dataset' }, + { 'value' => 'Geospatial data' } + ] + assert_equal expected_content_type, normalized['content_type'] + end + + test 'includes TIMDEX-specific dates field' do + normalized = NormalizeTimdexRecord.new(full_record, 'test').normalize + expected_dates = [ + { 'kind' => 'Publication date', 'value' => '2023-01-15' }, + { 'kind' => 'Coverage', 'value' => '2020-2023' } + ] + assert_equal expected_dates, normalized['dates'] + end + + test 'includes TIMDEX-specific contributors field' do + normalized = NormalizeTimdexRecord.new(full_record, 'test').normalize + expected_contributors = [ + { 'kind' => 'Creator', 'value' => 'Smith, Jane' }, + { 'kind' => 'Author', 'value' => 'Doe, John' }, + { 'kind' => 'Publisher', 'value' => 'MIT Libraries' } + ] + assert_equal expected_contributors, normalized['contributors'] + end + + test 'includes TIMDEX-specific highlight field' do + normalized = NormalizeTimdexRecord.new(full_record, 'test').normalize + expected_highlight = { + 'title' => ['Sample TIMDEX Record'], + 'summary' => ['comprehensive test record'] + } + assert_equal expected_highlight, normalized['highlight'] + end + + test 'includes TIMDEX-specific source_link field' do + normalized = NormalizeTimdexRecord.new(full_record, 'test').normalize + assert_equal 'https://example.com/source/record/123', normalized['source_link'] + end + + # Test that Primo-only fields are not included + test 'does not include Primo-only fields' do + normalized = NormalizeTimdexRecord.new(full_record, 'test').normalize + + assert_not_includes normalized.keys, 'availability' + assert_not_includes normalized.keys, 'numbering' + assert_not_includes normalized.keys, 'chapter_numbering' + assert_not_includes normalized.keys, 'thumbnail' + assert_not_includes normalized.keys, 'other_availability' + assert_not_includes normalized.keys, 'container' + end +end diff --git a/test/models/normalize_timdex_results_test.rb b/test/models/normalize_timdex_results_test.rb new file mode 100644 index 00000000..08c3fda9 --- /dev/null +++ b/test/models/normalize_timdex_results_test.rb @@ -0,0 +1,88 @@ +require 'test_helper' + +class NormalizeTimdexResultsTest < ActiveSupport::TestCase + def sample_timdex_response + [ + JSON.parse(File.read(Rails.root.join('test/fixtures/timdex/full_record.json'))), + JSON.parse(File.read(Rails.root.join('test/fixtures/timdex/minimal_record.json'))) + ] + end + + def empty_timdex_response + [] + end + + test 'normalizes TIMDEX response with records' do + normalizer = NormalizeTimdexResults.new(sample_timdex_response, 'test query') + results = normalizer.normalize + + assert_equal 2, results.count + + # Check first record (full record) + first_result = results.first + assert_equal 'Sample TIMDEX Record for Testing', first_result['title'] + assert_equal 'test-record-123', first_result['identifier'] + assert_equal 'Test Repository', first_result['source'] + assert_equal 'Dataset ; Geospatial data', first_result['format'] + assert_equal '2023', first_result['year'] + + # Check TIMDEX-specific fields are preserved + assert_includes first_result.keys, 'content_type' + assert_includes first_result.keys, 'dates' + assert_includes first_result.keys, 'contributors' + assert_includes first_result.keys, 'highlight' + assert_includes first_result.keys, 'source_link' + + # Check second record (minimal record) + second_result = results.second + assert_equal 'Minimal Test Record', second_result['title'] + assert_equal 'minimal-record-456', second_result['identifier'] + assert_equal 'Test Repository', second_result['source'] + end + + test 'handles empty TIMDEX response' do + normalizer = NormalizeTimdexResults.new(empty_timdex_response, 'test query') + results = normalizer.normalize + + assert_empty results + end + + test 'handles nil TIMDEX response' do + normalizer = NormalizeTimdexResults.new(nil, 'test query') + results = normalizer.normalize + + assert_empty results + end + + test 'handles response without data field' do + invalid_response = { 'errors' => ['Some error'] } + normalizer = NormalizeTimdexResults.new(invalid_response, 'test query') + results = normalizer.normalize + + assert_empty results + end + + test 'handles response without search field' do + invalid_response = { 'data' => { 'other' => 'data' } } + normalizer = NormalizeTimdexResults.new(invalid_response, 'test query') + results = normalizer.normalize + + assert_empty results + end + + test 'handles response without records field' do + invalid_response = { 'data' => { 'search' => { 'other' => 'data' } } } + normalizer = NormalizeTimdexResults.new(invalid_response, 'test query') + results = normalizer.normalize + + assert_empty results + end + + test 'preserves query in normalizer' do + query = 'test search query' + normalizer = NormalizeTimdexResults.new(sample_timdex_response, query) + + # Test that query is stored (this would be used by individual record normalization) + assert_equal query, normalizer.instance_variable_get(:@query) + end +end