Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ group :test do
# Use system testing [https://guides.rubyonrails.org/testing.html#system-testing]
gem 'capybara'
gem 'climate_control'
gem 'mocha'
gem 'selenium-webdriver'
gem 'simplecov'
gem 'simplecov-lcov'
Expand Down
4 changes: 4 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,8 @@ GEM
matrix (0.4.2)
mini_mime (1.1.5)
minitest (5.25.5)
mocha (2.7.1)
ruby2_keywords (>= 0.0.5)
msgpack (1.8.0)
multi_json (1.15.0)
net-http (0.6.0)
Expand Down Expand Up @@ -398,6 +400,7 @@ GEM
rubocop (>= 1.75.0, < 2.0)
rubocop-ast (>= 1.44.0, < 2.0)
ruby-progressbar (1.13.0)
ruby2_keywords (0.0.5)
rubyzip (2.4.1)
sassc (2.4.0)
ffi (~> 1.9)
Expand Down Expand Up @@ -518,6 +521,7 @@ DEPENDENCIES
importmap-rails
jbuilder
mitlibraries-theme!
mocha
omniauth
omniauth-rails_csrf_protection
omniauth_openid_connect
Expand Down
30 changes: 24 additions & 6 deletions app/models/detector/ml_citation.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,16 @@ class MlCitation
# For now the initialize method just needs to consult the external lambda.
#
# @param phrase String. Often a `Term.phrase`.
# @return Nothing intentional. Data is written to Hash `@detections` during processing.
# @return Nothing intentional. Data is written to Boolean `@detections` during processing.
def initialize(phrase)
return unless self.class.expected_env?

response = fetch(phrase)
@detections = false

features = extract_features(phrase)
return unless enough_nonzero_values?(features)

response = fetch(features)
@detections = response unless response == 'Error'
end

Expand Down Expand Up @@ -111,10 +116,10 @@ def define_lambda
# define_payload defines the Hash that will be sent to the lambda.
#
# @return Hash
def define_payload(phrase)
def define_payload(features)
{
action: 'predict',
features: extract_features(phrase),
features: features,
challenge_secret: self.class.lambda_secret
}
end
Expand All @@ -135,9 +140,9 @@ def extract_features(phrase)
# error handling with the response.
#
# @return Boolean or 'Error'
def fetch(phrase)
def fetch(features)
lambda = define_lambda
payload = define_payload(phrase)
payload = define_payload(features)

response = lambda.post(self.class.lambda_path, payload.to_json)

Expand All @@ -151,5 +156,18 @@ def fetch(phrase)
'Error'
end
end

# Enough_nonzero_values? checks that a provided hash contains at least three values which are not zero.
#
# @note We chose 3 as our value here after analyzing the behavior of the citation detector across nearly a year of
# search traffic. For searches which had only one or two features that are not zero, we found no actual citations.
# To see the analyses, look at the "Filtering results" and "Surprising predictions" notebooks at
# https://github.com/MITLibraries/tacos-notebooks/tree/main/notebooks/explorations
#
# @param hash Hash
# @return Integer
def enough_nonzero_values?(hash)
hash.values.count { |v| v != 0 } >= 3
end
end
end
22 changes: 21 additions & 1 deletion test/models/detector/ml_citation_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,32 @@ class MlCitationTest < ActiveSupport::TestCase

assert_instance_of Detector::MlCitation, prediction

assert_nil(prediction.detections)
assert_equal(false, prediction.detections)
end
end
end
end

test 'lookup skips fetching a prediction for search phrases with less than three features' do
Detector::MlCitation.any_instance.expects(:fetch).never

with_enabled_mlcitation do
# This search phrase is expected to have only two non-zero feature values, which based on
# our analyses will never result in a predicted citation.
Detector::MlCitation.new('foobar (2025)')
end
end

test 'lookup does not skip fetching a prediction for search phrases with three or more features' do
Detector::MlCitation.any_instance.expects(:fetch).once

with_enabled_mlcitation do
# This search phrase is expected to have three non-zero feature values, which is the minimum
# number we expect to have any hope of a citation.
Detector::MlCitation.new('foobar (2025) 1234-76')
end
end

# Record method
test 'record does relevant work' do
with_enabled_mlcitation do
Expand Down
3 changes: 2 additions & 1 deletion test/test_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
ENV['RAILS_ENV'] ||= 'test'
require_relative '../config/environment'
require 'rails/test_help'
require 'mocha/minitest'

VCR.configure do |config|
config.ignore_localhost = false
Expand Down Expand Up @@ -124,4 +125,4 @@ def with_enabled_mlcitation
}
ensure
ENV.replace(old_env)
end
end