Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ requests = "*"
structlog = "*"
attr = "*"
click = "*"
lxml = "*"

[requires]
python_version = "3.7"
Expand Down
64 changes: 48 additions & 16 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions dsaps/models.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import datetime
from functools import partial
import os
import operator
import requests
import time

import attr
from lxml import html
import structlog

op = operator.attrgetter('name')
Expand Down Expand Up @@ -123,6 +125,17 @@ class MetadataEntry(BaseRecord):
language = Field()


def build_file_list_remote(directory_url, file_extension):
"""Build list of files in local directory."""
file_list = {}
response = requests.get(directory_url)
links = html.fromstring(response.content).iterlinks()
for link in links:
if link[2].endswith(file_extension):
file_list[link[2]] = f'{directory_url}{link[2]}'
return file_list


def elapsed_time(start_time, label):
"""Calculate elapsed time."""
td = datetime.timedelta(seconds=time.time() - start_time)
Expand Down
20 changes: 20 additions & 0 deletions tests/test_models.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os

import attr
import pytest
import requests_mock
Expand Down Expand Up @@ -60,6 +62,7 @@ def test_filtered_item_search(client):


def test__pop_inst(client):
"""Test _pop_inst function."""
class_type = models.Collection
rec_obj = {'name': 'Test title', 'type': 'collection', 'items': []}
rec_obj = client._pop_inst(class_type, rec_obj)
Expand All @@ -68,7 +71,24 @@ def test__pop_inst(client):


def test__build_uuid_list(client):
"""Test _build_uuid_list function."""
rec_obj = {'items': [{'uuid': '1234'}]}
children = 'items'
child_list = client._build_uuid_list(rec_obj, children)
assert '1234' in child_list


def test_build_file_list_remote():
"""Test build_file_list_remote function."""
content = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"><html>'
content += '<head><title>Index of /pdf</title></head><body><h1>Index of /'
content += 'pdf</h1><table><tr><th>Name</th><th>Last modified</th><th>'
content += 'Size</th></tr><tr><td><a href="999.pdf">999.pdf</a></td><td>'
content += '2001-02-16 11:59 </td><td>107K</td></tr></table></body></html>'
with requests_mock.Mocker() as m:
directory_url = 'mock://test.com/pdfs/'
file_extension = 'pdf'
m.get(directory_url, text=content)
file_list = models.build_file_list_remote(directory_url,
file_extension)
assert '999.pdf' in file_list