Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/3367 missing oaipmh records #2191

Merged
merged 12 commits into from
Mar 17, 2023
3 changes: 2 additions & 1 deletion doajtest/fixtures/v2/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@
"subject": [
{"scheme": "LCC", "term": "Economic theory. Demography",
"code": "HB1-3840"},
{"scheme": "LCC", "term": "Social Sciences", "code": "H"}
{"scheme": "LCC", "term": "Social Sciences", "code": "H"},
{"scheme": "LCC", "term": "Veterinary medicine", "code": "SF600-1100"}
],
"title": "The Title",
"waiver": {
Expand Down
57 changes: 55 additions & 2 deletions doajtest/unit/test_oaipmh.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,11 +279,13 @@ def test_08_list_sets(self):
t = etree.fromstring(resp.data)
records = t.xpath('/oai:OAI-PMH/oai:ListSets', namespaces=self.oai_ns)
sets = records[0].getchildren()
assert len(sets) == 2
assert len(sets) == 3
set0 = sets[0].getchildren()
set1 = sets[1].getchildren()
set2 = sets[2].getchildren()
assert set0[1].text == 'LCC:Economic theory. Demography'
assert set1[1].text == 'LCC:Social Sciences'
assert set2[1].text == 'LCC:Veterinary medicine'

# check that we can retrieve a record with one of those sets
with self.app_test.test_client() as t_client:
Expand Down Expand Up @@ -343,4 +345,55 @@ def test_09_article(self):
assert len(r) == 1

# Check we have the correct journal
assert records[0].xpath('//dc:title', namespaces=self.oai_ns)[0].text == a_public.bibjson().title
assert records[0].xpath('//dc:title', namespaces=self.oai_ns)[0].text == a_public.bibjson().title

def test_10_subjects(self):
"""test if the OAI-PMH journal feed returns correct journals when set specified"""
journal_source = JournalFixtureFactory.make_journal_source(in_doaj=True)
j_public = models.Journal(**journal_source)
j_public.save(blocking=True)
public_id = j_public.id

with self.app_test.test_request_context():
# Check whether the journal is found for its specific set: Veterinary Medicine (TENDOlZldGVyaW5hcnkgbWVkaWNpbmU)
with self.app_test.test_client() as t_client:
resp = t_client.get(url_for('oaipmh.oaipmh', verb='ListRecords', metadataPrefix='oai_dc', set='TENDOlZldGVyaW5hcnkgbWVkaWNpbmU~'))
assert resp.status_code == 200

t = etree.fromstring(resp.data)
records = t.xpath('/oai:OAI-PMH/oai:ListRecords', namespaces=self.oai_ns)

# Check we only have one journal returned
assert len(records[0].xpath('//oai:record', namespaces=self.oai_ns)) == 1

# Check we have the correct journal
assert records[0].xpath('//dc:title', namespaces=self.oai_ns)[0].text == j_public.bibjson().title

#check we have expected subjects (Veterinary Medicine but not Agriculture)
subjects = records[0].xpath('//dc:subject', namespaces=self.oai_ns)
assert len(subjects) != 0

subjects_txt = list(map(lambda s: s.text, subjects))
assert "Veterinary medicine" in subjects_txt
assert not "Agriculture" in subjects_txt

# check we can still find the record in Agriculture set (parent of Veterinary Medicine)
with self.app_test.test_request_context():
# Check whether the journal is found for more general set: Agriculture (TENDOkFncmljdWx0dXJl)
with self.app_test.test_client() as t_client:
resp = t_client.get(url_for('oaipmh.oaipmh', verb='ListRecords', metadataPrefix='oai_dc', set='TENDOkFncmljdWx0dXJl~'))
assert resp.status_code == 200

t = etree.fromstring(resp.data)
records = t.xpath('/oai:OAI-PMH/oai:ListRecords', namespaces=self.oai_ns)

sets = records[0].getchildren()

assert len(sets) == 1
set0 = sets[0].getchildren()

# Check we only have one journal returned
assert len(records[0].xpath('//oai:record', namespaces=self.oai_ns)) == 1

# Check we have the correct journal
assert records[0].xpath('//dc:title', namespaces=self.oai_ns)[0].text == j_public.bibjson().title
2 changes: 2 additions & 0 deletions portality/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,5 @@

# Role
ROLE_ASSOCIATE_EDITOR = 'associate_editor'

SUBJECTS_SCHEMA = "LCC:"
7 changes: 3 additions & 4 deletions portality/models/oaipmh.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from copy import deepcopy
from portality.models import Journal, Article
from portality import constants

class OAIPMHRecord(object):
earliest = {
Expand Down Expand Up @@ -49,7 +50,6 @@ class OAIPMHRecord(object):
"size": 25
}

set_limit = {"term" : { "index.schema_subject.exact" : "<set name>" }}
range_limit = { "range" : { "last_updated" : {"gte" : "<from date>", "lte" : "<until date>"} } }
created_sort = [{"last_updated" : {"order" : "desc"}}, {"id.exact" : "desc"}]

Expand All @@ -71,9 +71,8 @@ def list_records(self, from_date=None, until_date=None, oai_set=None, list_size=
if start_after is not None or from_date is not None or until_date is not None or oai_set is not None:

if oai_set is not None:
s = deepcopy(self.set_limit)
s["term"]["index.schema_subject.exact"] = oai_set
q["query"]["bool"]["must"].append(s)
a = oai_set.replace(constants.SUBJECTS_SCHEMA,"")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

variable a never used. I think is forgot to remove this line.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@amdomanska can you confirm if this line should be used somewhere?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was a bug, that line is required, but the result of it was not used. I have fixed and pushed.

q["query"]["bool"]["should"] = {"match":{"index.classification": a}}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should be on index.classification.exact ? Or does changing it from a term query to a match fix that?

Also, are we sure it should be a should query? Seems like is should be a must query like the one it is replacing?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I'm confident that this should be a term query and a must.

Can we please lay it out the way way as it was before, so use self.set_limit and just modify it to use index.classification.exact instead.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, you're right. The should and match is the only way to make it work without the exact filter. When exact is applied, then it is must and term that works.


if until_date is not None or from_date is not None or start_after is not None:
d = deepcopy(self.range_limit)
Expand Down