Skip to content

Commit

Permalink
Download PDFs from Wiley service
Browse files Browse the repository at this point in the history
* Add function to download PDF from Wiley author manuscript service
* Add corresponding unit test and fixtures

* https://mitlibraries.atlassian.net/browse/DLSPP-30
  • Loading branch information
ehanson8 committed Oct 14, 2021
1 parent ee2f5d3 commit 984deb4
Show file tree
Hide file tree
Showing 8 changed files with 43 additions and 4 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -132,4 +132,5 @@ dmypy.json
*.pdf
*.xlsx
.DS_Store
!tests/fixtures/test.csv
!tests/fixtures/*.csv
!tests/fixtures/*.pdf
2 changes: 1 addition & 1 deletion awd/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def __init__(self):
self.client = client("s3")

def put_file(self, file, bucket, key):
""""""
"""Put a file in a specified S3 bucket with a specified key."""
response = self.client.put_object(
Body=file,
Bucket=bucket,
Expand Down
11 changes: 11 additions & 0 deletions awd/wiley.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import requests

headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"
}


def get_wiley_pdf(url, doi):
"""Get PDF from Wiley server based on a DOI."""
return requests.get(f"{url}{doi}", headers=headers)
22 changes: 21 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,23 @@ def mocked_s3(aws_credentials):


@pytest.fixture()
def web_mock(crossref_work_record):
def web_mock(crossref_work_record, wiley_pdf):
with requests_mock.Mocker() as m:
request_headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"
}
m.get(
"http://example.com/doi/10.1002/term.3131",
text="Forbidden",
status_code=403,
)
m.get(
"http://example.com/doi/10.1002/term.3131",
content=wiley_pdf,
headers={"Content-Type": "application/pdf; charset=UTF-8"},
request_headers=request_headers,
)
m.get(
"http://example.com/works/10.1002/term.3131?mailto=dspace-lib@mit.edu",
json=crossref_work_record,
Expand All @@ -46,3 +61,8 @@ def crossref_value_dict():
@pytest.fixture()
def dspace_metadata():
return json.loads(open("tests/fixtures/dspace_metadata.json", "r").read())


@pytest.fixture()
def wiley_pdf():
return open("tests/fixtures/wiley.pdf", "rb").read()
File renamed without changes.
Binary file added tests/fixtures/wiley.pdf
Binary file not shown.
2 changes: 1 addition & 1 deletion tests/test_crossref.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


def test_get_dois_from_spreadsheet():
dois = crossref.get_dois_from_spreadsheet("tests/fixtures/test.csv")
dois = crossref.get_dois_from_spreadsheet("tests/fixtures/dois.csv")
for doi in dois:
assert doi == "10.1002/term.3131"

Expand Down
7 changes: 7 additions & 0 deletions tests/test_wiley.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from awd import wiley


def test_get_wiley_pdf(web_mock, wiley_pdf):
doi = "10.1002/term.3131"
response = wiley.get_wiley_pdf("http://example.com/doi/", doi)
assert response.content == wiley_pdf

0 comments on commit 984deb4

Please sign in to comment.