Skip to content

Commit 63ca607

Browse files
committed
add file list func
1 parent 45b0f26 commit 63ca607

File tree

4 files changed

+105
-16
lines changed

4 files changed

+105
-16
lines changed

Pipfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ requests = "*"
1212
structlog = "*"
1313
attr = "*"
1414
click = "*"
15+
lxml = "*"
1516

1617
[requires]
1718
python_version = "3.7"

Pipfile.lock

Lines changed: 48 additions & 16 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dsaps/models.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
import datetime
22
from functools import partial
3+
import os
34
import operator
45
import requests
56
import time
67

78
import attr
9+
from lxml import html
810
import structlog
911

1012
op = operator.attrgetter('name')
@@ -123,6 +125,28 @@ class MetadataEntry(BaseRecord):
123125
language = Field()
124126

125127

128+
def build_file_list_local(directory, file_extension):
129+
"""Build list of files in local directory."""
130+
file_list = {}
131+
for root, dirs, files in os.walk(directory, topdown=True):
132+
for file in files:
133+
if file.endswith(file_extension):
134+
full_file_path = os.path.join(root, file)
135+
file_list[file] = full_file_path
136+
return file_list
137+
138+
139+
def build_file_list_remote(directory_url, file_extension):
140+
"""Build list of files in local directory."""
141+
file_list = {}
142+
response = requests.get(directory_url)
143+
links = html.fromstring(response.content).iterlinks()
144+
for link in links:
145+
if link[2].endswith(file_extension):
146+
file_list[link[2]] = f'{directory_url}{link[2]}'
147+
return file_list
148+
149+
126150
def elapsed_time(start_time, label):
127151
"""Calculate elapsed time."""
128152
td = datetime.timedelta(seconds=time.time() - start_time)

tests/test_models.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import os
2+
13
import attr
24
import pytest
35
import requests_mock
@@ -60,6 +62,7 @@ def test_filtered_item_search(client):
6062

6163

6264
def test__pop_inst(client):
65+
"""Test _pop_inst function."""
6366
class_type = models.Collection
6467
rec_obj = {'name': 'Test title', 'type': 'collection', 'items': []}
6568
rec_obj = client._pop_inst(class_type, rec_obj)
@@ -68,7 +71,36 @@ def test__pop_inst(client):
6871

6972

7073
def test__build_uuid_list(client):
74+
"""Test _build_uuid_list function."""
7175
rec_obj = {'items': [{'uuid': '1234'}]}
7276
children = 'items'
7377
child_list = client._build_uuid_list(rec_obj, children)
7478
assert '1234' in child_list
79+
80+
81+
def test_build_file_list_local():
82+
"""Test filtered_item_search function."""
83+
file_extension = 'pdf'
84+
directory = 'test_temp'
85+
os.mkdir(directory)
86+
open(f'{directory}/999.pdf', 'w')
87+
file_list = models.build_file_list_local(directory, file_extension)
88+
os.remove(f'{directory}/999.pdf')
89+
os.rmdir(directory)
90+
assert '999.pdf' in file_list
91+
92+
93+
def test_build_file_list_remote():
94+
"""Test build_file_list_remote function."""
95+
content = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"><html>'
96+
content += '<head><title>Index of /pdf</title></head><body><h1>Index of /'
97+
content += 'pdf</h1><table><tr><th>Name</th><th>Last modified</th><th>'
98+
content += 'Size</th></tr><tr><td><a href="999.pdf">999.pdf</a></td><td>'
99+
content += '2001-02-16 11:59 </td><td>107K</td></tr></table></body></html>'
100+
with requests_mock.Mocker() as m:
101+
directory_url = 'mock://test.com/pdfs/'
102+
file_extension = 'pdf'
103+
m.get(directory_url, text=content)
104+
file_list = models.build_file_list_remote(directory_url,
105+
file_extension)
106+
assert '999.pdf' in file_list

0 commit comments

Comments
 (0)