From be4795ece97853764b7b8c3f359f127eeee90334 Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Mon, 16 Dec 2019 12:42:40 -0500 Subject: [PATCH] add file list func --- Pipfile | 1 + Pipfile.lock | 64 +++++++++++++++++++++++++++++++++----------- dsaps/models.py | 13 +++++++++ tests/test_models.py | 20 ++++++++++++++ 4 files changed, 82 insertions(+), 16 deletions(-) diff --git a/Pipfile b/Pipfile index d94a2cb..1da3184 100644 --- a/Pipfile +++ b/Pipfile @@ -12,6 +12,7 @@ requests = "*" structlog = "*" attr = "*" click = "*" +lxml = "*" [requires] python_version = "3.7" diff --git a/Pipfile.lock b/Pipfile.lock index ed5ef2a..2581cd3 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "b3b90945556e626398a7ce5d09116ed13635364ca3ddc5e01a9e5a6aced5f337" + "sha256": "f068635c34247a99e86662b9c499882fbb86089a20d7bd584193f44cae883b76" }, "pipfile-spec": 6, "requires": { @@ -26,10 +26,10 @@ }, "certifi": { "hashes": [ - "sha256:e4f3620cfea4f83eedc95b24abd9cd56f3c4b146dd0177e83a21b4eb49e21e50", - "sha256:fd7c7c74727ddcf00e9acd26bba8da604ffec95bf1c2144e67aff7a8b50e6cef" + "sha256:017c25db2a153ce562900032d5bc68e9f191e44e9a0f762f373977de9df1fbb3", + "sha256:25b64c7da4cd7479594d035c08c2d809eb4aab3a26e5a990ea98cc450c320f1f" ], - "version": "==2019.9.11" + "version": "==2019.11.28" }, "chardet": { "hashes": [ @@ -53,6 +53,38 @@ ], "version": "==2.8" }, + "lxml": { + "hashes": [ + "sha256:00ac0d64949fef6b3693813fe636a2d56d97a5a49b5bbb86e4cc4cc50ebc9ea2", + "sha256:0571e607558665ed42e450d7bf0e2941d542c18e117b1ebbf0ba72f287ad841c", + "sha256:0e3f04a7615fdac0be5e18b2406529521d6dbdb0167d2a690ee328bef7807487", + "sha256:13cf89be53348d1c17b453867da68704802966c433b2bb4fa1f970daadd2ef70", + "sha256:217262fcf6a4c2e1c7cb1efa08bd9ebc432502abc6c255c4abab611e8be0d14d", + "sha256:223e544828f1955daaf4cefbb4853bc416b2ec3fd56d4f4204a8b17007c21250", + "sha256:277cb61fede2f95b9c61912fefb3d43fbd5f18bf18a14fae4911b67984486f5d", + "sha256:3213f753e8ae86c396e0e066866e64c6b04618e85c723b32ecb0909885211f74", + "sha256:4690984a4dee1033da0af6df0b7a6bde83f74e1c0c870623797cec77964de34d", + "sha256:4fcc472ef87f45c429d3b923b925704aa581f875d65bac80f8ab0c3296a63f78", + "sha256:61409bd745a265a742f2693e4600e4dbd45cc1daebe1d5fad6fcb22912d44145", + "sha256:678f1963f755c5d9f5f6968dded7b245dd1ece8cf53c1aa9d80e6734a8c7f41d", + "sha256:6c6d03549d4e2734133badb9ab1c05d9f0ef4bcd31d83e5d2b4747c85cfa21da", + "sha256:6e74d5f4d6ecd6942375c52ffcd35f4318a61a02328f6f1bd79fcb4ffedf969e", + "sha256:7b4fc7b1ecc987ca7aaf3f4f0e71bbfbd81aaabf87002558f5bc95da3a865bcd", + "sha256:7ed386a40e172ddf44c061ad74881d8622f791d9af0b6f5be20023029129bc85", + "sha256:8f54f0924d12c47a382c600c880770b5ebfc96c9fd94cf6f6bdc21caf6163ea7", + "sha256:ad9b81351fdc236bda538efa6879315448411a81186c836d4b80d6ca8217cdb9", + "sha256:bbd00e21ea17f7bcc58dccd13869d68441b32899e89cf6cfa90d624a9198ce85", + "sha256:c3c289762cc09735e2a8f8a49571d0e8b4f57ea831ea11558247b5bdea0ac4db", + "sha256:cf4650942de5e5685ad308e22bcafbccfe37c54aa7c0e30cd620c2ee5c93d336", + "sha256:cfcbc33c9c59c93776aa41ab02e55c288a042211708b72fdb518221cc803abc8", + "sha256:e301055deadfedbd80cf94f2f65ff23126b232b0d1fea28f332ce58137bcdb18", + "sha256:ebbfe24df7f7b5c6c7620702496b6419f6a9aa2fd7f005eb731cc80d7b4692b9", + "sha256:eff69ddbf3ad86375c344339371168640951c302450c5d3e9936e98d6459db06", + "sha256:f6ed60a62c5f1c44e789d2cf14009423cb1646b44a43e40a9cf6a21f077678a1" + ], + "index": "pypi", + "version": "==4.4.2" + }, "requests": { "hashes": [ "sha256:11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4", @@ -94,10 +126,10 @@ }, "certifi": { "hashes": [ - "sha256:e4f3620cfea4f83eedc95b24abd9cd56f3c4b146dd0177e83a21b4eb49e21e50", - "sha256:fd7c7c74727ddcf00e9acd26bba8da604ffec95bf1c2144e67aff7a8b50e6cef" + "sha256:017c25db2a153ce562900032d5bc68e9f191e44e9a0f762f373977de9df1fbb3", + "sha256:25b64c7da4cd7479594d035c08c2d809eb4aab3a26e5a990ea98cc450c320f1f" ], - "version": "==2019.9.11" + "version": "==2019.11.28" }, "chardet": { "hashes": [ @@ -115,18 +147,18 @@ }, "importlib-metadata": { "hashes": [ - "sha256:aa18d7378b00b40847790e7c27e11673d7fed219354109d0e7b9e5b25dc3ad26", - "sha256:d5f18a79777f3aa179c145737780282e27b508fc8fd688cb17c7a813e8bd39af" + "sha256:073a852570f92da5f744a3472af1b61e28e9f78ccf0c9117658dc32b15de7b45", + "sha256:d95141fbfa7ef2ec65cfd945e2af7e5a6ddbd7c8d9a25e66ff3be8e3daf9f60f" ], "markers": "python_version < '3.8'", - "version": "==0.23" + "version": "==1.3.0" }, "more-itertools": { "hashes": [ - "sha256:409cd48d4db7052af495b09dec721011634af3753ae1ef92d2b32f73a745f832", - "sha256:92b8c4b06dac4f0611c0729b2f2ede52b2e1bac1ab48f089c7ddc12e26bb60c4" + "sha256:b84b238cce0d9adad5ed87e745778d20a3f8487d0f0cb8b8a586816c7496458d", + "sha256:c833ef592a0324bcc6a60e48440da07645063c453880c9477ceb22490aec1564" ], - "version": "==7.2.0" + "version": "==8.0.2" }, "packaging": { "hashes": [ @@ -158,11 +190,11 @@ }, "pytest": { "hashes": [ - "sha256:63344a2e3bce2e4d522fd62b4fdebb647c019f1f9e4ca075debbd13219db4418", - "sha256:f67403f33b2b1d25a6756184077394167fe5e2f9d8bdaab30707d19ccec35427" + "sha256:6b571215b5a790f9b41f19f3531c53a45cf6bb8ef2988bc1ff9afb38270b25fa", + "sha256:e41d489ff43948babd0fad7ad5e49b8735d5d55e26628a58673c39ff61d95de4" ], "index": "pypi", - "version": "==5.3.1" + "version": "==5.3.2" }, "requests": { "hashes": [ diff --git a/dsaps/models.py b/dsaps/models.py index c6c8538..52b0146 100644 --- a/dsaps/models.py +++ b/dsaps/models.py @@ -1,10 +1,12 @@ import datetime from functools import partial +import os import operator import requests import time import attr +from lxml import html import structlog op = operator.attrgetter('name') @@ -123,6 +125,17 @@ class MetadataEntry(BaseRecord): language = Field() +def build_file_list_remote(directory_url, file_extension): + """Build list of files in local directory.""" + file_list = {} + response = requests.get(directory_url) + links = html.fromstring(response.content).iterlinks() + for link in links: + if link[2].endswith(file_extension): + file_list[link[2]] = f'{directory_url}{link[2]}' + return file_list + + def elapsed_time(start_time, label): """Calculate elapsed time.""" td = datetime.timedelta(seconds=time.time() - start_time) diff --git a/tests/test_models.py b/tests/test_models.py index cb68397..c3e12ae 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,3 +1,5 @@ +import os + import attr import pytest import requests_mock @@ -60,6 +62,7 @@ def test_filtered_item_search(client): def test__pop_inst(client): + """Test _pop_inst function.""" class_type = models.Collection rec_obj = {'name': 'Test title', 'type': 'collection', 'items': []} rec_obj = client._pop_inst(class_type, rec_obj) @@ -68,7 +71,24 @@ def test__pop_inst(client): def test__build_uuid_list(client): + """Test _build_uuid_list function.""" rec_obj = {'items': [{'uuid': '1234'}]} children = 'items' child_list = client._build_uuid_list(rec_obj, children) assert '1234' in child_list + + +def test_build_file_list_remote(): + """Test build_file_list_remote function.""" + content = '' + content += 'Index of /pdf

Index of /' + content += 'pdf

NameLast modified' + content += 'Size
999.pdf' + content += '2001-02-16 11:59 107K
' + with requests_mock.Mocker() as m: + directory_url = 'mock://test.com/pdfs/' + file_extension = 'pdf' + m.get(directory_url, text=content) + file_list = models.build_file_list_remote(directory_url, + file_extension) + assert '999.pdf' in file_list