Merge a484ddb into 5304eae

MITLibraries · Feb 22, 2024 · 6aca77e · 6aca77e
2 parents 5304eae + a484ddb
commit 6aca77e
Show file tree

Hide file tree

Showing 65 changed files with 4,167 additions and 3,797 deletions.
diff --git a/.env.test b/.env.test
@@ -1,4 +1,4 @@
 EMAIL_FROM=fake@example.com
 EMAIL_URL_HOST=localhost:3000
 JWT_SECRET_KEY=3862fc949629030de4259b88f6e8f7c3702b2fabfc68d00d46fb7f9f70110690b526997ef4d77765ffa010d8aba440286af39947d0c85287174d99be2db14987
-OPENSEARCH_INDEX=timdex-prod
+OPENSEARCH_INDEX=all-current
diff --git a/README.md b/README.md
@@ -19,6 +19,57 @@ additional records with a standardized template.
 - don't commit your .env or .env.development, but do commit .env.test after
   confirming your test values are not actual secrets that need protecting
 
+## Generating cassettes for tests
+
+We use [VCR](https://github.com/vcr/vcr) to record transactions with OpenSearch so we will not need to load
+data for testing during each test run.
+
+We must take care to not record our credentials to any secure OpenSearch clusters we use _and_ we must make sure all cassettes look like the same
+endpoint.
+
+One option would be to force us to load data locally and only generate cassettes from localhost OpenSearch. This is secure, but is not always convenient to ensure we have test data that looks like production.
+
+The other option is to use deployed OpenSearch and scrub the data using VCRs [filter_sensitive_data](https://benoittgt.github.io/vcr/#/configuration/filter_sensitive_data) feature.
+
+The scrubbing has been configured in `test/test_helper.rb`.
+
+The test recording process is as follows:
+
+- If you want to use localhost:9200
+  - load the data you want and ensure it is accessible via the `all-current` alias. No other changes are necessary
+- If you want to use an AWS OpenSearch instance
+
+> [!CAUTION]
+> Use `.env` and _not_ `.env.test` to override these values to ensure you do not inadvertantly commit secrets!
+
+- Set the following values to whatever cluster you want to connect to in `.env` (Note: `.env` is preferred over `.env.test` because it is already in our `.gitignore` and will work together with `.env.test`.)
+  - OPENSEARCH_URL
+  - AWS_OPENSEARCH=true
+  - AWS_OPENSEARCH_ACCESS_KEY_ID
+  - AWS_OPENSEARCH_SECRET_ACCESS_KEY
+  - AWS_REGION
+- Delete any cassette you want to regenerate (for new tests, you can skip this). If you are making a graphql test, nest your cassette inside the `opensearch_init` cassette.
+
+Example of nested cassettes.
+
+```ruby
+test 'graphql search' do
+  VCR.use_cassette('opensearch init') do
+    VCR.use_cassette('YOUR CASSETTE NAME') do
+      YOUR QUERY
+      YOUR ASSERTIONS
+    end
+  end
+end
+```
+
+- Run your test(s). You may receive VCR errors as the `opensearch init` cassette does not have the HTTP transaction you are requesting. However, the nested cassette for your test will generate if it does not exist yet and on future runs these errors will not recur.
+- Manually confirm the headers do not have sensitive information. This scrubbing process should work, but it is your responsibility to ensure you are not committing secrets to code repositories. If you aren't sure, ask.
+- You have to remove or comment out AWS credentials from `.env` before re-running your test or the tests will fail (i.e. this process can only generate cassettes, it can not re-run them with AWS credentials as we scrub the AWS bits from the cassette so VCR does not match)
+
+> [!Important]
+> We re-use OpenSearch connections, which is handled by the nesting of cassettes (see above). If you have sporadically failing tests, ensure you are nesting your test specific cassette inside of the `opensearch init` cassette.
+
 ## Confirming functionality after updating dependencies
 
 This application has good code coverage, so most issues are detected by just running tests normally:

diff --git a/app/graphql/types/aggregations_type.rb b/app/graphql/types/aggregations_type.rb
@@ -15,5 +15,7 @@ class AggregationsType < Types::BaseObject
     field :source, [Types::AggregationCountType], null: true,
                                                   description: 'Total search results by source record system'
     field :subjects, [Types::AggregationCountType], null: true, description: 'Total search results by subject term'
+    field :places, [Types::AggregationCountType], null: true,
+                                                  description: 'Total search results by Place (which is a subject with type `Dublin Core; Spatial`)'
   end
 end
diff --git a/app/graphql/types/query_type.rb b/app/graphql/types/query_type.rb
@@ -1,3 +1,5 @@
+# rubocop:disable Metrics/ClassLength
+# rubocop:disable Metrics/MethodLength
 module Types
   class QueryType < Types::BaseObject
     # Add root-level fields here.
@@ -61,24 +63,30 @@ def record_id(id:, index:)
       # applied filters
       argument :content_type_filter, [String], required: false, default_value: nil,
                                                description: 'Filter results by content type. Use the `contentType` ' \
-                                                            'aggregation for a list of possible values'
+                                                            'aggregation for a list of possible values. Multiple ' \
+                                                            'values are ANDed.'
       argument :contributors_filter, [String], required: false, default_value: nil,
                                                description: 'Filter results by contributor. Use the `contributors` ' \
-                                                            'aggregation for a list of possible values'
+                                                            'aggregation for a list of possible values. Multiple ' \
+                                                            'values are ANDed.'
       argument :format_filter, [String], required: false, default_value: nil,
                                          description: 'Filter results by format. Use the `format` aggregation for a ' \
-                                                      'list of possible values'
+                                                      'list of possible values. Multiple values are ANDed.'
       argument :languages_filter, [String], required: false, default_value: nil,
                                             description: 'Filter results by language. Use the `languages` ' \
-                                                         'aggregation for a list of possible values'
+                                                         'aggregation for a list of possible values. Multiple values '\
+                                                         'are ANDed.'
       argument :literary_form_filter, String, required: false, default_value: nil,
                                               description: 'Filter results by fiction or nonfiction'
+      argument :places_filter, [String], required: false, default_value: nil,
+                                        description: 'Filter by places. Use the `places` aggregation ' \
+                                                      'for a list of possible values. Multiple values are ANDed.'
       argument :source_filter, [String], required: false, default_value: nil,
                                          description: 'Filter by source record system. Use the `sources` aggregation ' \
-                                                      'for a list of possible values'
+                                                      'for a list of possible values. Multiple values are ORed.'
       argument :subjects_filter, [String], required: false, default_value: nil,
                                            description: 'Filter by subject terms. Use the `contentType` aggregation ' \
-                                                        'for a list of possible values'
+                                                        'for a list of possible values. Multiple values are ANDed.'
     end
 
     def search(searchterm:, citation:, contributors:, funding_information:, geodistance:, geobox:, identifiers:,
@@ -132,6 +140,7 @@ def construct_query(searchterm, citation, contributors, funding_information, geo
       query[:contributors_filter] = filters[:contributors_filter]
       query[:languages_filter] = filters[:languages_filter]
       query[:literary_form_filter] = filters[:literary_form_filter]
+      query[:places_filter] = filters[:places_filter] 
       query = source_deprecation_handler(query, filters[:source_filter], source)
       query[:subjects_filter] = filters[:subjects_filter]
       query
@@ -150,6 +159,7 @@ def collapse_buckets(es_aggs)
         contributors: es_aggs['contributors']['contributor_names']['buckets'],
         source: es_aggs['source']['buckets'],
         subjects: es_aggs['subjects']['subject_names']['buckets'],
+        places: es_aggs['places']['only_spatial']['place_names']['buckets'],
         languages: es_aggs['languages']['buckets'],
         literary_form: es_aggs['literary_form']['buckets'],
         format: es_aggs['content_format']['buckets'],
@@ -158,3 +168,5 @@ def collapse_buckets(es_aggs)
     end
   end
 end
+# rubocop:enable Metrics/ClassLength
+# rubocop:enable Metrics/MethodLength
diff --git a/app/models/aggregations.rb b/app/models/aggregations.rb
@@ -1,3 +1,5 @@
+# rubocop:disable Metrics/ClassLength
+# rubocop:disable Metrics/MethodLength
 class Aggregations
   # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html
   def self.all
@@ -7,6 +9,7 @@ def self.all
       content_format:,
       languages:,
       literary_form:,
+      places:,
       source:,
       subjects:
     }
@@ -81,4 +84,32 @@ def self.subjects
       }
     }
   end
+
+  def self.places
+    {
+      nested: {
+        path: 'subjects'
+      },
+      aggs: {
+        only_spatial: {
+          filter: {
+            terms: {
+              'subjects.kind': [
+                'Dublin Core; Spatial'
+              ]
+            }
+          },
+          aggs: {
+            place_names: {
+              terms: {
+                field: 'subjects.value.keyword'
+              }
+            }
+          }
+        }
+      }
+    }
+  end
 end
+# rubocop:enable Metrics/ClassLength
+# rubocop:enable Metrics/MethodLength
diff --git a/app/models/opensearch.rb b/app/models/opensearch.rb
@@ -1,3 +1,5 @@
+# rubocop:disable Metrics/ClassLength
+# rubocop:disable Metrics/MethodLength
 class Opensearch
   SIZE = 20
   MAX_PAGE = 200
@@ -25,7 +27,6 @@ def build_query(from)
     }
 
     query_hash[:highlight] = highlight if @highlight
-
     query_hash.to_json
   end
 
@@ -207,6 +208,13 @@ def filters(params)
                                    params[:literary_form_filter])
     end
 
+    # places are really just a subset of subjects so the filter uses the subject field
+    if params[:places_filter].present?
+      params[:places_filter].each do |p|
+        f.push filter_field_by_value('subjects.value.keyword', p)
+      end
+    end 
+
     # source aggregation is "OR" and not "AND" so it does not use the filter_field_by_value method
     f.push filter_sources(params[:source_filter]) if params[:source_filter]
 
@@ -283,3 +291,5 @@ def nested_field(field)
     end
   end
 end
+# rubocop:enable Metrics/ClassLength
+# rubocop:enable Metrics/MethodLength