diff --git a/.gitignore b/.gitignore index b6e4761..7f6b817 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,5 @@ dmypy.json # Pyre type checker .pyre/ + +.DS_Store diff --git a/Dockerfile b/Dockerfile index 4baa9bd..bcf8bfa 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,6 +9,8 @@ ENV PIP_NO_CACHE_DIR yes WORKDIR /app RUN pip install --no-cache-dir --upgrade pip pipenv +RUN apt-get update && apt-get upgrade -y && apt-get install -y git + COPY Pipfile* / RUN pipenv install --system --clear --deploy diff --git a/Makefile b/Makefile index 58ab82e..f84acba 100644 --- a/Makefile +++ b/Makefile @@ -39,7 +39,7 @@ isort: ## isort your imports, so you don't have to pipenv run isort . --diff test: ## runs pytest - pipenv run pytest + pipenv run pytest --cov=submitter coveralls: test pipenv run coveralls diff --git a/Pipfile b/Pipfile index c602c6f..39dc340 100644 --- a/Pipfile +++ b/Pipfile @@ -6,6 +6,8 @@ name = "pypi" [packages] click = "*" boto3 = "*" +smart-open = "*" +dspace-python-client = {ref = "0.1.0", git = "https://github.com/mitlibraries/dspace-python-client.git"} [dev-packages] flake8 = "*" @@ -15,6 +17,10 @@ bandit = "*" moto = {extras = ["s3", "sqs"], version = "*"} pytest = "*" coveralls = "*" +requests-mock = "*" +pytest-cov = "*" +pytest-env = "*" +freezegun = "*" [requires] python_version = "3.9" diff --git a/Pipfile.lock b/Pipfile.lock index 16b19ad..058b1ff 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "4886d0e7b0613ebefce99153e350cb6a32dcaf8fcb73d743599db3faad690916" + "sha256": "78e8e91859fd11a4d5055affeff153f7f9f97f23524067975296f745a531b8bc" }, "pipfile-spec": 6, "requires": { @@ -18,19 +18,19 @@ "default": { "boto3": { "hashes": [ - "sha256:a299d0c6b5a30dc2e823944286ec782aec415d83965a51f97fc9a779a04ff194", - "sha256:f4b17a2b6e04e5ec6f494e643d05b06dd60c88943f33d6f9650dd9e7f89a7022" + "sha256:63b9846c26e0905f4e9e39d6b59f152330c53a926d693439161c43dcf9779365", + "sha256:a9232185d8e7e2fd2b166c0ebee5d7b1f787fdb3093f33bbf5aa932c08f0ccac" ], "index": "pypi", - "version": "==1.18.32" + "version": "==1.18.42" }, "botocore": { "hashes": [ - "sha256:5803bf852304a301de41dccc3c0431053354144f3aefc7571dbe240a4288d3c5", - "sha256:95ff61534b2a423d0e70067c39615e4e70c119773d2180d7254bf4025c54396d" + "sha256:0952d1200968365b440045efe8e45bbae38cf603fee12bcfc3d7b5f963cbfa18", + "sha256:6de4fec4ee10987e4dea96f289553c2f45109fcaafcb74a5baee1221926e1306" ], "markers": "python_version >= '3.6'", - "version": "==1.21.32" + "version": "==1.21.42" }, "click": { "hashes": [ @@ -40,6 +40,10 @@ "index": "pypi", "version": "==8.0.1" }, + "dspace-python-client": { + "git": "https://github.com/mitlibraries/dspace-python-client.git", + "ref": "dd7a8ab508f1a0e7722b47851d6702d6a8ca9301" + }, "jmespath": { "hashes": [ "sha256:b85d0567b8666149a93172712e68920734333c0ce7e89b78b3e987f71e5ed4f9", @@ -51,6 +55,8 @@ "python-dateutil": { "hashes": [ "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", + "sha256:104a4ff9f1ece23d8a31582156ea3ae928afe7121fac9fed3e967a1e2d6cf6ed", + "sha256:1efd93a2e222eb7360b5396108fdfa04e9753637d24143b8026dfb48ffbc755b", "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", @@ -72,6 +78,14 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==1.16.0" }, + "smart-open": { + "hashes": [ + "sha256:71d14489da58b60ce12fc3ecb823facc59a8b23cd1b58edb97175640350d3a62", + "sha256:75abf758717a92a8f53aa96953f0c245c8cedf8e1e4184903db3659b419d4c17" + ], + "index": "pypi", + "version": "==5.2.1" + }, "urllib3": { "hashes": [ "sha256:39fb8672126159acb139a7718dd10806104dec1e2f0f6c88aab05d17df10c8d4", @@ -100,7 +114,9 @@ "bandit": { "hashes": [ "sha256:216be4d044209fa06cf2a3e51b319769a51be8318140659719aa7a115c35ed07", - "sha256:8a4c7415254d75df8ff3c3b15cfe9042ecee628a1e40b44c15a98890fbfc2608" + "sha256:8a4c7415254d75df8ff3c3b15cfe9042ecee628a1e40b44c15a98890fbfc2608", + "sha256:8eb574cd365cd5b900cc21592a7f8e1bc2d856838f366d7351c004a8dce4c7ec", + "sha256:9cc799df25fdc3c555566bb60979552c5ff4a0ebfdec847e92925f5debe5f2b8" ], "index": "pypi", "version": "==1.7.0" @@ -115,19 +131,19 @@ }, "boto3": { "hashes": [ - "sha256:a299d0c6b5a30dc2e823944286ec782aec415d83965a51f97fc9a779a04ff194", - "sha256:f4b17a2b6e04e5ec6f494e643d05b06dd60c88943f33d6f9650dd9e7f89a7022" + "sha256:63b9846c26e0905f4e9e39d6b59f152330c53a926d693439161c43dcf9779365", + "sha256:a9232185d8e7e2fd2b166c0ebee5d7b1f787fdb3093f33bbf5aa932c08f0ccac" ], "index": "pypi", - "version": "==1.18.32" + "version": "==1.18.42" }, "botocore": { "hashes": [ - "sha256:5803bf852304a301de41dccc3c0431053354144f3aefc7571dbe240a4288d3c5", - "sha256:95ff61534b2a423d0e70067c39615e4e70c119773d2180d7254bf4025c54396d" + "sha256:0952d1200968365b440045efe8e45bbae38cf603fee12bcfc3d7b5f963cbfa18", + "sha256:6de4fec4ee10987e4dea96f289553c2f45109fcaafcb74a5baee1221926e1306" ], "markers": "python_version >= '3.6'", - "version": "==1.21.32" + "version": "==1.21.42" }, "certifi": { "hashes": [ @@ -188,11 +204,11 @@ }, "charset-normalizer": { "hashes": [ - "sha256:0c8911edd15d19223366a194a513099a302055a962bca2cec0f54b8b63175d8b", - "sha256:f23667ebe1084be45f6ae0538e4a5a865206544097e4e8bbcacf42cd02a348f3" + "sha256:7098e7e862f6370a2a8d1a6398cd359815c45d12626267652c3f13dec58e2367", + "sha256:fa471a601dfea0f492e4f4fca035cd82155e65dc45c9b83bf4322dfab63755dd" ], "markers": "python_version >= '3'", - "version": "==2.0.4" + "version": "==2.0.5" }, "click": { "hashes": [ @@ -210,6 +226,7 @@ "sha256:06191eb60f8d8a5bc046f3799f8a07a2d7aefb9504b0209aff0b47298333302a", "sha256:13034c4409db851670bc9acd836243aeee299949bd5673e11844befcb0149f03", "sha256:13c4ee887eca0f4c5a247b75398d4114c37882658300e153113dafb1d76de529", + "sha256:146ecef2215d4d828e18cc835b081d82aac994d4ccfd5bef0a0a5010a812f564", "sha256:184a47bbe0aa6400ed2d41d8e9ed868b8205046518c52464fde713ea06e3a74a", "sha256:18ba8bbede96a2c3dde7b868de9dcbd55670690af0988713f0603f037848418a", "sha256:1aa846f56c3d49205c952d8318e76ccc2ae23303351d9270ab220004c580cfe2", @@ -223,6 +240,7 @@ "sha256:30c77c1dc9f253283e34c27935fded5015f7d1abe83bc7821680ac444eaf7793", "sha256:3487286bc29a5aa4b93a072e9592f22254291ce96a9fbc5251f566b6b7343cdb", "sha256:372da284cfd642d8e08ef606917846fa2ee350f64994bebfbd3afb0040436905", + "sha256:3ef25667b6e598f01e2f6990483f0d1d9637d2a3afd3c619acb0d8634c3416d2", "sha256:41179b8a845742d1eb60449bdb2992196e211341818565abded11cfa90efb821", "sha256:44d654437b8ddd9eee7d1eaee28b7219bec228520ff809af170488fd2fed3e2b", "sha256:4a7697d8cb0f27399b0e393c0b90f0f1e40c82023ea4d45d22bce7032a5d7b81", @@ -251,13 +269,14 @@ "sha256:d1f9ce122f83b2305592c11d64f181b87153fc2c2bbd3bb4a3dde8303cfb1a6b", "sha256:d314ed732c25d29775e84a960c3c60808b682c08d86602ec2c3008e1202e3bb6", "sha256:d636598c8305e1f90b439dbf4f66437de4a5e3c31fdf47ad29542478c8508bbb", + "sha256:d9e6f4f41c3969fb6cc5aa47bf58649f2f0103b882f312198f7e62af537b7cfa", "sha256:deee1077aae10d8fa88cb02c845cfba9b62c55e1183f52f6ae6a2df6a2187160", "sha256:ebe78fe9a0e874362175b02371bdfbee64d8edc42a044253ddf4ee7d3c15212c", "sha256:f030f8873312a16414c0d8e1a1ddff2d3235655a2174e3648b4fa66b3f2f1079", "sha256:f0b278ce10936db1a37e6954e15a3730bea96a0997c26d7fee88e6c396c2086d", "sha256:f11642dddbb0253cc8853254301b51390ba0081750a8ac03f20ea8103f0c56b6" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4.0'", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", "version": "==5.5" }, "coveralls": { @@ -293,7 +312,8 @@ }, "docopt": { "hashes": [ - "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491" + "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491", + "sha256:b2de412b0b73a5f16110cb1becdfbabceb3fd80811441183245938ff135ef9c1" ], "version": "==0.6.2" }, @@ -305,6 +325,14 @@ "index": "pypi", "version": "==3.9.2" }, + "freezegun": { + "hashes": [ + "sha256:177f9dd59861d871e27a484c3332f35a6e3f5d14626f2bf91be37891f18927f3", + "sha256:2ae695f7eb96c62529f03a038461afe3c692db3465e215355e1bb4b0ab408712" + ], + "index": "pypi", + "version": "==1.1.0" + }, "gitdb": { "hashes": [ "sha256:6c4cc71933456991da20917998acbe6cf4fb41eeaab7d6d67fbc05ecd4c865b0", @@ -332,6 +360,8 @@ "iniconfig": { "hashes": [ "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3", + "sha256:714dfcaf550420126ed7e66bb1cbc893752af388742828f294c21f847d78b88a", + "sha256:8cd395a2a89caee27b08d7879c1515fa9d93e0c477a5b38c9893787fc5e51896", "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32" ], "version": "==1.1.1" @@ -429,11 +459,11 @@ }, "more-itertools": { "hashes": [ - "sha256:2cf89ec599962f2ddc4d568a05defc40e0a587fbc10d5989713638864c36be4d", - "sha256:83f0308e05477c68f56ea3a888172c78ed5d5b3c282addb67508e7ba6c8f813a" + "sha256:70401259e46e216056367a0a6034ee3d3f95e0bf59d3aa6a4eb77837171ed996", + "sha256:8c746e0d09871661520da4f1241ba6b908dc903839733c8203b552cffaf173bd" ], "markers": "python_version >= '3.5'", - "version": "==8.8.0" + "version": "==8.9.0" }, "moto": { "extras": [ @@ -441,11 +471,11 @@ "sqs" ], "hashes": [ - "sha256:21c838b63f44e24b9b5015a2cdcc5be7c1e1004e58a69fb7cac71383bce34535", - "sha256:e86b0d92bc5f80802a8ae0f338a4fdac15dab82c54eb12db93b356b69407effc" + "sha256:461955aaccd257151591b1e36c9b2e7ddf7f42e17056f15ccbd64d0eb618742d", + "sha256:f5d131d0be71890809c94556930f865d25814e2d2e29d74fab749f963a11b518" ], "index": "pypi", - "version": "==2.2.6" + "version": "==2.2.7" }, "mypy-extensions": { "hashes": [ @@ -533,9 +563,26 @@ "index": "pypi", "version": "==6.2.5" }, + "pytest-cov": { + "hashes": [ + "sha256:261bb9e47e65bd099c89c3edf92972865210c36813f80ede5277dceb77a4a62a", + "sha256:261ceeb8c227b726249b376b8526b600f38667ee314f910353fa318caa01f4d7" + ], + "index": "pypi", + "version": "==2.12.1" + }, + "pytest-env": { + "hashes": [ + "sha256:7e94956aef7f2764f3c147d216ce066bf6c42948bb9e293169b1b1c880a580c2" + ], + "index": "pypi", + "version": "==0.6.2" + }, "python-dateutil": { "hashes": [ "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", + "sha256:104a4ff9f1ece23d8a31582156ea3ae928afe7121fac9fed3e967a1e2d6cf6ed", + "sha256:1efd93a2e222eb7360b5396108fdfa04e9753637d24143b8026dfb48ffbc755b", "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", @@ -580,7 +627,6 @@ "sha256:fdc842473cd33f45ff6bce46aea678a54e3d21f1b61a7750ce3c498eedfe25d6", "sha256:fe69978f3f768926cfa37b867e3843918e012cf83f680806599ddce33c2c68b0" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", "version": "==5.4.1" }, "regex": { @@ -637,13 +683,21 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", "version": "==2.26.0" }, + "requests-mock": { + "hashes": [ + "sha256:0a2d38a117c08bb78939ec163522976ad59a6b7fdd82b709e23bb98004a44970", + "sha256:8d72abe54546c1fc9696fa1516672f1031d72a55a1d66c85184f972a24ba0eba" + ], + "index": "pypi", + "version": "==1.9.3" + }, "responses": { "hashes": [ - "sha256:9476775d856d3c24ae660bbebe29fb6d789d4ad16acd723efbfb6ee20990b899", - "sha256:d8d0f655710c46fd3513b9202a7f0dcedd02ca0f8cf4976f27fa8ab5b81e656d" + "sha256:57bab4e9d4d65f31ea5caf9de62095032c4d81f591a8fac2f5858f7777b8567b", + "sha256:93f774a762ee0e27c0d9d7e06227aeda9ff9f5f69392f72bb6c6b73f8763563e" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", - "version": "==0.13.4" + "version": "==0.14.0" }, "s3transfer": { "hashes": [ diff --git a/README.md b/README.md index 92ec3e7..72d2963 100644 --- a/README.md +++ b/README.md @@ -37,4 +37,4 @@ make dist docker run submitter:latest -- ``` -note: the application requires being run in an environment with Roles based access to the AWS resources. +note: the application requires being run in an environment with Roles based access to the AWS resources. in addition, the environment must have WORKSPACE and SSM_PATH variables set according to stage and prod conventions. diff --git a/conftest.py b/conftest.py new file mode 100644 index 0000000..f49badd --- /dev/null +++ b/conftest.py @@ -0,0 +1,259 @@ +import json +import os + +import boto3 +import pytest +import requests_mock +from dspace import DSpaceClient +from moto import mock_sqs + + +@pytest.fixture(scope="function") +def aws_credentials(): + """Mocked AWS Credentials for moto.""" + os.environ["AWS_ACCESS_KEY_ID"] = "testing" + os.environ["AWS_DEFAULT_REGION"] = "us-east-1" + os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" + os.environ["AWS_SECURITY_TOKEN"] = "testing" + os.environ["AWS_SESSION_TOKEN"] = "testing" + + +@pytest.fixture(scope="function") +def mocked_dspace(): + with requests_mock.Mocker() as m: + m.post( + "mock://dspace.edu/rest/login", + cookies={"JSESSIONID": "sessioncookie"}, + ) + m.get( + "mock://dspace.edu/rest/handle/0000/collection01", + json={"uuid": "collection01"}, + ) + m.post( + "mock://dspace.edu/rest/collections/collection01/items", + json=item_post_response, + ) + m.post( + "mock://dspace.edu/rest/items/item01/bitstreams", + json=bitstream_post_response, + ) + m.post( + "mock://dspace.edu/rest/collections/not-a-collection/items", status_code=404 + ) + m.delete("mock://dspace.edu/rest/bitstreams/bitstream01", status_code=200) + m.delete("mock://dspace.edu/rest/items/item01", status_code=200) + yield m + + +@pytest.fixture(scope="function") +def mocked_sqs(aws_credentials): + with mock_sqs(): + sqs = boto3.resource("sqs") + sqs.create_queue(QueueName="empty_input_queue") + sqs.create_queue(QueueName="empty_result_queue") + queue = sqs.create_queue(QueueName="input_queue_with_messages") + for i in range(11): + queue.send_message( + MessageAttributes=test_attributes, + MessageBody=json.dumps( + { + "SubmissionSystem": "DSpace@MIT", + "CollectionHandle": "0000/collection01", + "MetadataLocation": "tests/fixtures/test-item-metadata.json", + "Files": [ + { + "BitstreamName": "test-file-01.pdf", + "FileLocation": "tests/fixtures/test-file-01.pdf", + "BitstreamDescription": "A test bitstream", + } + ], + } + ), + ) + yield sqs + + +@pytest.fixture(scope="function") +def test_client(mocked_dspace): + client = DSpaceClient("mock://dspace.edu/rest/") + client.login("test", "test") + yield client + + +@pytest.fixture +def input_message_good(mocked_sqs): + queue = mocked_sqs.get_queue_by_name(QueueName="empty_input_queue") + queue.send_message( + MessageAttributes=test_attributes, + MessageBody=json.dumps( + { + "SubmissionSystem": "DSpace@MIT", + "CollectionHandle": "0000/collection01", + "MetadataLocation": "tests/fixtures/test-item-metadata.json", + "Files": [ + { + "BitstreamName": "test-file-01.pdf", + "FileLocation": "tests/fixtures/test-file-01.pdf", + "BitstreamDescription": "A test bitstream", + } + ], + } + ), + ) + message = queue.receive_messages(MessageAttributeNames=["All"])[0] + yield message + + +@pytest.fixture +def input_message_item_create_error(mocked_sqs): + queue = mocked_sqs.get_queue_by_name(QueueName="empty_input_queue") + queue.send_message( + MessageAttributes=test_attributes, + MessageBody=json.dumps( + { + "SubmissionSystem": "DSpace@MIT", + "CollectionHandle": "0000/collection01", + "MetadataLocation": "tests/fixtures/test-item-metadata-error.json", + "Files": [ + { + "BitstreamName": "test-file-01.pdf", + "FileLocation": "tests/fixtures/test-file-01.pdf", + "BitstreamDescription": "A test bitstream", + } + ], + } + ), + ) + message = queue.receive_messages(MessageAttributeNames=["All"])[0] + yield message + + +@pytest.fixture +def input_message_bitstream_create_error(mocked_sqs): + queue = mocked_sqs.get_queue_by_name(QueueName="empty_input_queue") + queue.send_message( + MessageAttributes=test_attributes, + MessageBody=json.dumps( + { + "SubmissionSystem": "DSpace@MIT", + "CollectionHandle": "0000/collection01", + "MetadataLocation": "tests/fixtures/test-item-metadata.json", + "Files": [ + { + "BitstreamName": "test-file-01.pdf", + "BitstreamDescription": "A test bitstream", + } + ], + } + ), + ) + message = queue.receive_messages(MessageAttributeNames=["All"])[0] + yield message + + +@pytest.fixture +def input_message_item_post_error(mocked_sqs): + queue = mocked_sqs.get_queue_by_name(QueueName="empty_input_queue") + queue.send_message( + MessageAttributes=test_attributes, + MessageBody=json.dumps( + { + "SubmissionSystem": "DSpace@MIT", + "CollectionHandle": "0000/not-a-collection", + "MetadataLocation": "tests/fixtures/test-item-metadata.json", + "Files": [ + { + "BitstreamName": "test-file-01.pdf", + "FileLocation": "tests/fixtures/test-file-01.pdf", + "BitstreamDescription": "A test bitstream", + } + ], + } + ), + ) + message = queue.receive_messages(MessageAttributeNames=["All"])[0] + yield message + + +@pytest.fixture +def input_message_bitstream_post_error(mocked_sqs): + queue = mocked_sqs.get_queue_by_name(QueueName="empty_input_queue") + queue.send_message( + MessageAttributes=test_attributes, + MessageBody=json.dumps( + { + "SubmissionSystem": "DSpace@MIT", + "CollectionHandle": "0000/collection01", + "MetadataLocation": "tests/fixtures/test-item-metadata.json", + "Files": [ + { + "BitstreamName": "test-file-01.pdf", + "FileLocation": "tests/fixtures/test-file-01.pdf", + "BitstreamDescription": "A test bitstream", + }, + { + "BitstreamName": "No file", + "FileLocation": "tests/fixtures/nothing-here", + "BitstreamDescription": "No file", + }, + ], + } + ), + ) + message = queue.receive_messages(MessageAttributeNames=["All"])[0] + yield message + + +item_post_response = { + "uuid": "item01", + "name": "Test Thesis", + "handle": "0000/item01", + "type": "item", + "link": "/rest/items/item01", + "expand": [ + "metadata", + "parentCollection", + "parentCollectionList", + "parentCommunityList", + "bitstreams", + "all", + ], + "lastModified": "2015-01-12 15:44:12.978", + "parentCollection": None, + "parentCollectionList": None, + "parentCommunityList": None, + "bitstreams": None, + "archived": "true", + "withdrawn": "false", +} + +bitstream_post_response = { + "uuid": "bitstream01", + "name": "test-file-01.pdf", + "handle": None, + "type": "bitstream", + "link": "/rest/bitstreams/bitstream01", + "expand": ["parent", "policies", "all"], + "bundleName": "ORIGINAL", + "description": "A test bitstream", + "format": "Adobe PDF", + "mimeType": "application/pdf", + "sizeBytes": 129112, + "parentObject": None, + "retrieveLink": "/bitstreams/bitstream01/retrieve", + "checkSum": { + "value": "62778292a3a6dccbe2662a2bfca3b86e", + "checkSumAlgorithm": "MD5", + }, + "sequenceId": 1, + "policies": None, +} + +test_attributes = { + "PackageID": {"DataType": "String", "StringValue": "etdtest01"}, + "SubmissionSource": {"DataType": "String", "StringValue": "etd"}, + "OutputQueue": { + "DataType": "String", + "StringValue": "empty_result_queue", + }, +} diff --git a/pyproject.toml b/pyproject.toml index b0471b7..27c83b9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,8 @@ [build-system] requires = ["setuptools", "wheel"] -build-backend = "setuptools.build_meta:__legacy__" \ No newline at end of file +build-backend = "setuptools.build_meta:__legacy__" + +[tool.pytest.ini_options] +env = [ + "WORKSPACE=test" +] diff --git a/setup.py b/setup.py index db5884c..1c49758 100644 --- a/setup.py +++ b/setup.py @@ -7,6 +7,12 @@ include_package_data=True, install_requires=[ "Click", + "boto3", + "smart-open", + ( + "dspace-python-client @ git+https://github.com/mitlibraries/" + "dspace-python-client@0.1.0#egg=dspace-python-client" + ), ], entry_points={"console_scripts": ["submitter=submitter.cli:main"]}, ) diff --git a/submitter/__init__.py b/submitter/__init__.py index 074be5d..f7bcfd4 100644 --- a/submitter/__init__.py +++ b/submitter/__init__.py @@ -1,6 +1,6 @@ """ DSpace Submission Service """ +import logging -INPUT_QUEUE = "queue1-stage" -OUTPUT_QUEUE = "queue2-stage" +logging.basicConfig(level=logging.INFO) diff --git a/submitter/cli.py b/submitter/cli.py index 0047ac2..80cb3a3 100644 --- a/submitter/cli.py +++ b/submitter/cli.py @@ -1,9 +1,13 @@ +import logging + import click -from submitter import INPUT_QUEUE, OUTPUT_QUEUE +from submitter import config from submitter.sample_data import sample_data from submitter.sqs import message_loop +logger = logging.getLogger(__name__) + @click.group() def main(): @@ -12,21 +16,22 @@ def main(): @main.command() @click.option( - "--input-queue", default=INPUT_QUEUE, help="queue name to use as input queue" -) -@click.option( - "--output-queue", default=OUTPUT_QUEUE, help="queue name to use as output queue" + "--queue", default=config.INPUT_QUEUE, help="Name of queue to process messages from" ) @click.option("--wait", default=20, help="seconds to wait for long polling. max 20") -def start(input_queue, output_queue, wait): - click.echo("Processing starting") - message_loop(input_queue, output_queue, wait) - click.echo("Processing complete") +def start(queue, wait): + logger.info("Starting processing messages from queue %s", queue) + message_loop(queue, wait) + logger.info("Completed processing messages from queue %s", queue) @main.command() -@click.option("--queue", default=INPUT_QUEUE, help="queue name to use as input queue") +@click.option( + "--queue", + default=config.INPUT_QUEUE, + help="Name of queue to load sample messages to", +) def sample_data_loader(queue): - click.echo("sample this!") + logger.info("sample this!") sample_data(queue) - click.echo("sample data (probably) loaded into input queue") + logger.info("sample data (probably) loaded into input queue") diff --git a/submitter/config.py b/submitter/config.py new file mode 100644 index 0000000..b12ae66 --- /dev/null +++ b/submitter/config.py @@ -0,0 +1,29 @@ +import logging +import os + +from submitter.ssm import SSM + +logger = logging.getLogger(__name__) + +env = os.getenv("WORKSPACE") +ssm_path = os.getenv("SSM_PATH") + +logger.info("Configuring dspace-submission-service for current env: %s", env) + +ssm = SSM() + +if env == "stage" or env == "prod": + DSPACE_API_URL = ssm.get_parameter_value(ssm_path + "dspace_api_url") + DSPACE_USER = ssm.get_parameter_value(ssm_path + "dspace_user") + DSPACE_PASSWORD = ssm.get_parameter_value(ssm_path + "dspace_password") + INPUT_QUEUE = ssm.get_parameter_value(ssm_path + "SQS_dss_input_queue") +elif env == "test": + DSPACE_API_URL = "mock://dspace.edu/rest/" + DSPACE_USER = "test" + DSPACE_PASSWORD = "test" # nosec + INPUT_QUEUE = "test_queue_with_messages" +else: + DSPACE_API_URL = os.getenv("DSPACE_API_URL") + DSPACE_USER = os.getenv("DSPACE_USER") + DSPACE_PASSWORD = os.getenv("DSPACE_PASSWORD") + INPUT_QUEUE = os.getenv("DSS_INPUT_QUEUE") diff --git a/submitter/sqs.py b/submitter/sqs.py index 7a10d41..c0aa350 100644 --- a/submitter/sqs.py +++ b/submitter/sqs.py @@ -1,76 +1,72 @@ +import json +import logging + import boto3 -import click +from dspace.client import DSpaceClient +from submitter import config +from submitter.submission import Submission -def message_loop(input_queue, output_queue, wait): - msgs = retrieve(input_queue, wait) +logger = logging.getLogger(__name__) - if len(msgs) > 0: - click.echo(len(msgs)) - process(msgs, output_queue) - message_loop(input_queue, output_queue, wait) - else: - click.echo("No messages received") +def message_loop(queue, wait): + logger.info("Message loop started") + msgs = retrieve_messages_from_queue(queue, wait) -def process(msgs, output_queue): - for message in msgs: - click.echo(message.message_attributes) - click.echo(message.body) - print("Do all the dspace submission stuff here") + if len(msgs) > 0: + process(msgs) + message_loop(queue, wait) + else: + logger.info("No messages available in queue %s", queue) - # faking it with always succeeding for now... creating of this status - # dict is likely better moved to our upcoming submission class but this - # was convenient for initial testing - status = { - "PackageSource": { - "DataType": "String", - "StringValue": message.message_attributes.get("PackageSource").get( - "StringValue" - ), - }, - "PackageID": { - "DataType": "String", - "StringValue": message.message_attributes.get("PackageID").get( - "StringValue" - ), - }, - "status": {"DataType": "String", "StringValue": "success"}, - "handle": { - "DataType": "String", - "StringValue": "https://example.com/handle/this", - }, - } - # write result to output - write(status, output_queue) +def process(msgs): + client = DSpaceClient(config.DSPACE_API_URL) + client.login(config.DSPACE_USER, config.DSPACE_PASSWORD) - # cleanup (probs better to confirm the write to the output was good - # before cleanup but for now yolo it) + for message in msgs: + message_id = message.message_attributes["PackageID"]["StringValue"] + message_source = message.message_attributes["SubmissionSource"]["StringValue"] + logger.info("Processing message %s from source %s", message_id, message_source) + try: + submission = Submission.from_message(message) + except Exception as e: + # TODO: handle and test submit message errors + raise e + submission.submit(client) + write_message_to_queue( + submission.result_attributes, + json.dumps(submission.result_message), + submission.result_queue, + ) + # TODO: probs better to confirm the write to the output was good + # before cleanup but for now yolo it message.delete() + logger.info("Deleted message %s from input queue", message_id) -def retrieve(input_queue, wait): +def retrieve_messages_from_queue(input_queue, wait): sqs = boto3.resource("sqs") queue = sqs.get_queue_by_name(QueueName=input_queue) - click.echo("Polling for messages") + logger.info("Polling queue %s for messages", input_queue) msgs = queue.receive_messages( MaxNumberOfMessages=10, WaitTimeSeconds=wait, MessageAttributeNames=["All"], AttributeNames=["All"], ) + logger.info("%d messages received", len(msgs)) return msgs -def write(status, output_queue): +def write_message_to_queue(attributes, body, output_queue): sqs = boto3.resource("sqs") queue = sqs.get_queue_by_name(QueueName=output_queue) - - # Send message to SQS result queue queue.send_message( - MessageAttributes=status, - MessageBody=("testing"), + MessageAttributes=attributes, + MessageBody=body, ) + logger.info("Wrote message to %s with message body: %s", output_queue, body) diff --git a/submitter/ssm.py b/submitter/ssm.py new file mode 100644 index 0000000..e860e8f --- /dev/null +++ b/submitter/ssm.py @@ -0,0 +1,17 @@ +from boto3 import client + + +class SSM: + """An SSM class that provides a generic boto3 SSM client with specific SSM + functionality necessary for dspace submission service""" + + def __init__(self): + self.client = client("ssm", region_name="us-east-1") + + def get_parameter_value(self, parameter_key): + """Get parameter value based on the specified key.""" + parameter_object = self.client.get_parameter( + Name=parameter_key, WithDecryption=True + ) + parameter_value = parameter_object["Parameter"]["Value"] + return parameter_value diff --git a/submitter/submission.py b/submitter/submission.py new file mode 100644 index 0000000..6007dcf --- /dev/null +++ b/submitter/submission.py @@ -0,0 +1,139 @@ +import json +import logging +import traceback +from datetime import datetime + +import dspace +import smart_open + +logger = logging.getLogger(__name__) + + +class Submission: + def __init__( + self, + destination, + collection_handle, + metadata_location, + files, + attributes, + result_queue, + ): + self.destination = destination + self.collection_handle = collection_handle + self.metadata_location = metadata_location + self.files = files + self.result_attributes = attributes + self.result_message = None + self.result_queue = result_queue + + @classmethod + def from_message(cls, message): + body = json.loads(message.body) + return cls( + destination=body["SubmissionSystem"], + collection_handle=body["CollectionHandle"], + metadata_location=body["MetadataLocation"], + files=body["Files"], + attributes={ + "PackageID": message.message_attributes["PackageID"], + "SubmissionSource": message.message_attributes["SubmissionSource"], + }, + result_queue=message.message_attributes["OutputQueue"]["StringValue"], + ) + + def get_metadata_entries_from_file(self): + with smart_open.open(self.metadata_location) as f: + metadata = json.load(f) + for entry in metadata["metadata"]: + yield entry + + def result_error_message(self, error, info): + time = datetime.now() + self.result_message = { + "ResultType": "error", + "ErrorTimestamp": time.strftime("%Y-%m-%d %H:%M:%S"), + "ErrorInfo": info, + "ExceptionMessage": str(error), + "ExceptionTraceback": traceback.format_exc(), + } + + def result_success_message(self, item): + self.result_message = { + "ResultType": "success", + "ItemHandle": item.handle, + "lastModified": item.lastModified, + "Bitstreams": [], + } + + for bitstream in item.bitstreams: + self.result_message["Bitstreams"].append( + { + "BitstreamName": bitstream.name, + "BitstreamUUID": bitstream.uuid, + "BitstreamChecksum": bitstream.checkSum, + } + ) + + def submit(self, client): + # Create item instance and add metadata + try: + item = dspace.item.Item() + for entry in self.get_metadata_entries_from_file(): + metadata_entry = dspace.item.MetadataEntry.from_dict(entry) + item.metadata.append(metadata_entry) + except Exception as e: + self.result_error_message( + e, "Error occurred while creating item metadata from file" + ) + return + + # Add bitstreams to item from files + logger.info("Adding bitstreams to item") + try: + for file in self.files: + bitstream = dspace.bitstream.Bitstream( + file_path=file["FileLocation"], + name=file["BitstreamName"], + description=file.get("BitstreamDescription"), + ) + item.bitstreams.append(bitstream) + except Exception as e: + self.result_error_message( + e, "Error occurred while adding bitstreams to item from files" + ) + return + + # Post item to DSpace + try: + item.post(client, collection_handle=self.collection_handle) + except Exception as e: + self.result_error_message(e, "Error occurred while posting item to DSpace") + return + logger.info("Posted item to Dspace with handle %s", item.handle) + + # Post all bitstreams to item + try: + for bitstream in item.bitstreams: + bitstream.post(client, item_uuid=item.uuid) + except Exception as e: + handle = item.handle + for bitstream in item.bitstreams: + if bitstream.uuid is not None: + bitstream.delete(client) + item.delete(client) + self.result_error_message( + e, + ( + f"Error occurred while posting bitstreams to item in DSpace. Item " + f"with handle {handle} and any successfully posted bitstreams " + f"have been deleted" + ), + ) + return + logger.info( + "Posted %d bitstreams to item with handle %s", + len(item.bitstreams), + item.handle, + ) + self.result_success_message(item) diff --git a/tests/fixtures/test-file-01.pdf b/tests/fixtures/test-file-01.pdf new file mode 100644 index 0000000..a3e5a49 Binary files /dev/null and b/tests/fixtures/test-file-01.pdf differ diff --git a/tests/fixtures/test-item-metadata-error.json b/tests/fixtures/test-item-metadata-error.json new file mode 100644 index 0000000..2fe5f4a --- /dev/null +++ b/tests/fixtures/test-item-metadata-error.json @@ -0,0 +1,7 @@ +{ + "metadata": [ + { + "key": "dc.title" + } + ] +} diff --git a/tests/fixtures/test-item-metadata.json b/tests/fixtures/test-item-metadata.json new file mode 100644 index 0000000..c6c0c9f --- /dev/null +++ b/tests/fixtures/test-item-metadata.json @@ -0,0 +1,12 @@ +{ + "metadata": [ + { + "key": "dc.title", + "value": "Test Thesis" + }, + { + "key": "dc.contributor.author", + "value": "Jane Q. Smith" + } + ] +} diff --git a/tests/test_cli.py b/tests/test_cli.py index 46856c2..eb532ac 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,74 +1,45 @@ -import os - -import boto3 from click.testing import CliRunner -from moto import mock_sqs from submitter.cli import main -os.environ["AWS_DEFAULT_REGION"] = "us-east-1" - -@mock_sqs -def test_cli_sample_data_loader(): - mocked_sqs = boto3.resource("sqs") - input_queue = "test-input" - queue = mocked_sqs.create_queue(QueueName=input_queue) +def test_cli_sample_data_loader(mocked_sqs): + queue = mocked_sqs.get_queue_by_name(QueueName="empty_input_queue") - # confirm queue starts empty sqs_messages = queue.receive_messages() assert len(sqs_messages) == 0 runner = CliRunner() - result = runner.invoke(main, ["sample-data-loader", "--queue", input_queue]) + result = runner.invoke(main, ["sample-data-loader", "--queue", "empty_input_queue"]) assert result.exit_code == 0 - # confirm messages now in queue sqs_messages = queue.receive_messages() assert len(sqs_messages) > 0 -@mock_sqs -def test_cli_start(): - mocked_sqs = boto3.resource("sqs") - input_queue = "test-input" - output_queue = "test-output" - queue = mocked_sqs.create_queue(QueueName=input_queue) - out = mocked_sqs.create_queue(QueueName=output_queue) +def test_cli_start(caplog, mocked_dspace, mocked_sqs): + # Required because pytest and CliRunner handle log capturing in incompatible ways + caplog.set_level(100000) + input_queue = mocked_sqs.get_queue_by_name(QueueName="input_queue_with_messages") + result_queue = mocked_sqs.get_queue_by_name(QueueName="empty_result_queue") - # confirm queue starts empty - sqs_messages = queue.receive_messages() - assert len(sqs_messages) == 0 + results = result_queue.receive_messages() + assert len(results) == 0 runner = CliRunner() - result = runner.invoke(main, ["sample-data-loader", "--queue", input_queue]) - assert result.exit_code == 0 - - # confirm messages now in queue - sqs_messages = queue.receive_messages() - assert len(sqs_messages) > 0 - - # confirm no messages in out queue before start - out_messages = out.receive_messages() - assert len(out_messages) == 0 - result = runner.invoke( main, [ "start", "--wait", 1, - "--input-queue", - input_queue, - "--output-queue", - output_queue, + "--queue", + "input_queue_with_messages", ], ) assert result.exit_code == 0 - # confirm queue is empty again - sqs_messages = queue.receive_messages() + sqs_messages = input_queue.receive_messages() assert len(sqs_messages) == 0 - - out_messages = out.receive_messages() + out_messages = result_queue.receive_messages() assert len(out_messages) > 0 diff --git a/tests/test_submission.py b/tests/test_submission.py new file mode 100644 index 0000000..e5735e8 --- /dev/null +++ b/tests/test_submission.py @@ -0,0 +1,131 @@ +import traceback + +from dspace import Bitstream, Item +from freezegun import freeze_time + +from submitter.submission import Submission + + +def test_init_submission_from_message(input_message_good): + submission = Submission.from_message(input_message_good) + assert submission.destination == "DSpace@MIT" + assert submission.collection_handle == "0000/collection01" + assert submission.metadata_location == "tests/fixtures/test-item-metadata.json" + assert submission.files == [ + { + "BitstreamName": "test-file-01.pdf", + "FileLocation": "tests/fixtures/test-file-01.pdf", + "BitstreamDescription": "A test bitstream", + } + ] + assert submission.result_attributes == { + "PackageID": {"DataType": "String", "StringValue": "etdtest01"}, + "SubmissionSource": {"DataType": "String", "StringValue": "etd"}, + } + assert submission.result_message is None + assert submission.result_queue == "empty_result_queue" + + +def test_get_metadata_entries_from_file(): + submission = Submission( + destination=None, + collection_handle=None, + metadata_location="tests/fixtures/test-item-metadata.json", + files=None, + attributes=None, + result_queue=None, + ) + metadata = submission.get_metadata_entries_from_file() + assert next(metadata) == {"key": "dc.title", "value": "Test Thesis"} + + +@freeze_time("2021-09-01 05:06:07") +def test_result_error_message(input_message_good): + submission = Submission.from_message(input_message_good) + error = KeyError() + submission.result_error_message(error, "A test error") + assert submission.result_message["ResultType"] == "error" + assert submission.result_message["ErrorTimestamp"] == "2021-09-01 05:06:07" + assert submission.result_message["ErrorInfo"] == "A test error" + assert submission.result_message["ExceptionMessage"] == str(error) + assert submission.result_message["ExceptionTraceback"] == traceback.format_exc() + + +def test_result_success_message(input_message_good): + item = Item() + item.handle = "0000/12345" + item.lastModified = "yesterday" + bitstream = Bitstream() + bitstream.name = "A test bitstream" + bitstream.uuid = "1234-5678-9000" + bitstream.checkSum = { + "value": "a4e0f4930dfaff904fa3c6c85b0b8ecc", + "checkSumAlgorithm": "MD5", + } + item.bitstreams = [bitstream] + submission = Submission.from_message(input_message_good) + submission.result_success_message(item) + assert submission.result_message["ResultType"] == "success" + assert submission.result_message["ItemHandle"] == item.handle + assert submission.result_message["lastModified"] == item.lastModified + assert submission.result_message["Bitstreams"] == [ + { + "BitstreamName": bitstream.name, + "BitstreamUUID": bitstream.uuid, + "BitstreamChecksum": bitstream.checkSum, + } + ] + + +def test_submit_success(mocked_dspace, test_client, input_message_good): + submission = Submission.from_message(input_message_good) + submission.submit(test_client) + assert submission.result_message["ResultType"] == "success" + + +def test_submit_create_item_error( + mocked_dspace, test_client, input_message_item_create_error +): + submission = Submission.from_message(input_message_item_create_error) + submission.submit(test_client) + assert submission.result_message["ResultType"] == "error" + assert ( + submission.result_message["ErrorInfo"] + == "Error occurred while creating item metadata from file" + ) + + +def test_submit_add_bitstreams_error( + mocked_dspace, test_client, input_message_bitstream_create_error +): + submission = Submission.from_message(input_message_bitstream_create_error) + submission.submit(test_client) + assert submission.result_message["ResultType"] == "error" + assert ( + submission.result_message["ErrorInfo"] + == "Error occurred while adding bitstreams to item from files" + ) + + +def test_submit_item_post_error( + mocked_dspace, test_client, input_message_item_post_error +): + submission = Submission.from_message(input_message_item_post_error) + submission.submit(test_client) + assert submission.result_message["ResultType"] == "error" + assert ( + submission.result_message["ErrorInfo"] + == "Error occurred while posting item to DSpace" + ) + + +def test_submit_bitstream_post_error( + mocked_dspace, test_client, input_message_bitstream_post_error +): + submission = Submission.from_message(input_message_bitstream_post_error) + submission.submit(test_client) + assert submission.result_message["ResultType"] == "error" + assert submission.result_message["ErrorInfo"] == ( + "Error occurred while posting bitstreams to item in DSpace. Item with handle " + "0000/item01 and any successfully posted bitstreams have been deleted" + )