diff --git a/.gitignore b/.gitignore
index cd37cec..92037cf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -39,6 +39,132 @@ $RECYCLE.BIN/
.Trashes
.VolumeIcon.icns
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
@@ -52,18 +178,9 @@ local/*
!data/.keep
.profile
*.csv
+!tests/fixtures/*.csv
*.json
+!config/*.json
+!tests/fixtures/*.json
createItemMetadataFromCSV_*
*.txt
-
-# Environments
-.env
-.venv
-env/
-venv/
-ENV/
-env.bak/
-venv.bak/
-
-# Rope project settings
-.ropeproject
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..4b1d45a
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,7 @@
+language: python
+python:
+ - "3.8"
+install:
+ - pipenv install --dev
+script:
+ - pipenv run pytest
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..ce30b6c
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,13 @@
+Copyright 2019 MIT Libraries
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..1aba38f
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1 @@
+include LICENSE
diff --git a/Pipfile b/Pipfile
index 4639c37..c02ecb3 100644
--- a/Pipfile
+++ b/Pipfile
@@ -4,11 +4,18 @@ url = "https://pypi.org/simple"
verify_ssl = true
[dev-packages]
+pytest = "*"
+requests-mock = "*"
[packages]
requests = "*"
-click = "*"
+structlog = "*"
attrs = "*"
+click = "*"
+lxml = "*"
[requires]
-python_version = "3.7"
+python_version = "3.8"
+
+[scripts]
+dsaps = "python -c \"from dsaps.cli import main; main()\""
diff --git a/Pipfile.lock b/Pipfile.lock
index 1b7c2af..33f1610 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,11 +1,11 @@
{
"_meta": {
"hash": {
- "sha256": "01dc55cd69a2df69f74a7428d6c916635a02376ce9d212768bbb2065001068d1"
+ "sha256": "49000269c1d938e1cf8e815b9f7d86df95693fb5fe1262e610db0b28739889b2"
},
"pipfile-spec": 6,
"requires": {
- "python_version": "3.7"
+ "python_version": "3.8"
},
"sources": [
{
@@ -18,56 +18,228 @@
"default": {
"attrs": {
"hashes": [
- "sha256:69c0dbf2ed392de1cb5ec704444b08a5ef81680a61cb899dc08127123af36a79",
- "sha256:f0b870f674851ecbfbbbd364d6b5cbdff9dcedbc7f3f5e18a6891057f21fe399"
+ "sha256:31b2eced602aa8423c2aea9c76a724617ed67cf9513173fd3a4f03e3a929c7e6",
+ "sha256:832aa3cde19744e49938b91fea06d69ecb9e649c93ba974535d08ad92164f700"
],
"index": "pypi",
- "version": "==19.1.0"
+ "version": "==20.3.0"
},
"certifi": {
"hashes": [
- "sha256:046832c04d4e752f37383b628bc601a7ea7211496b4638f6514d0e5b9acc4939",
- "sha256:945e3ba63a0b9f577b1395204e13c3a231f9bc0223888be653286534e5873695"
+ "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c",
+ "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830"
],
- "version": "==2019.6.16"
+ "version": "==2020.12.5"
},
"chardet": {
"hashes": [
- "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
- "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
+ "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa",
+ "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5"
],
- "version": "==3.0.4"
+ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
+ "version": "==4.0.0"
},
"click": {
"hashes": [
- "sha256:2335065e6395b9e67ca716de5f7526736bfa6ceead690adf616d925bdc622b13",
- "sha256:5b94b49521f6456670fdb30cd82a4eca9412788a93fa6dd6df72c94d5a8ff2d7"
+ "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a",
+ "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc"
],
"index": "pypi",
- "version": "==7.0"
+ "version": "==7.1.2"
},
"idna": {
"hashes": [
- "sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407",
- "sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c"
+ "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6",
+ "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"
],
- "version": "==2.8"
+ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+ "version": "==2.10"
+ },
+ "lxml": {
+ "hashes": [
+ "sha256:079f3ae844f38982d156efce585bc540c16a926d4436712cf4baee0cce487a3d",
+ "sha256:0fbcf5565ac01dff87cbfc0ff323515c823081c5777a9fc7703ff58388c258c3",
+ "sha256:122fba10466c7bd4178b07dba427aa516286b846b2cbd6f6169141917283aae2",
+ "sha256:1b7584d421d254ab86d4f0b13ec662a9014397678a7c4265a02a6d7c2b18a75f",
+ "sha256:26e761ab5b07adf5f555ee82fb4bfc35bf93750499c6c7614bd64d12aaa67927",
+ "sha256:289e9ca1a9287f08daaf796d96e06cb2bc2958891d7911ac7cae1c5f9e1e0ee3",
+ "sha256:2a9d50e69aac3ebee695424f7dbd7b8c6d6eb7de2a2eb6b0f6c7db6aa41e02b7",
+ "sha256:33bb934a044cf32157c12bfcfbb6649807da20aa92c062ef51903415c704704f",
+ "sha256:3439c71103ef0e904ea0a1901611863e51f50b5cd5e8654a151740fde5e1cade",
+ "sha256:39b78571b3b30645ac77b95f7c69d1bffc4cf8c3b157c435a34da72e78c82468",
+ "sha256:4289728b5e2000a4ad4ab8da6e1db2e093c63c08bdc0414799ee776a3f78da4b",
+ "sha256:4bff24dfeea62f2e56f5bab929b4428ae6caba2d1eea0c2d6eb618e30a71e6d4",
+ "sha256:542d454665a3e277f76954418124d67516c5f88e51a900365ed54a9806122b83",
+ "sha256:5a0a14e264069c03e46f926be0d8919f4105c1623d620e7ec0e612a2e9bf1c04",
+ "sha256:66e575c62792c3f9ca47cb8b6fab9e35bab91360c783d1606f758761810c9791",
+ "sha256:74f7d8d439b18fa4c385f3f5dfd11144bb87c1da034a466c5b5577d23a1d9b51",
+ "sha256:7610b8c31688f0b1be0ef882889817939490a36d0ee880ea562a4e1399c447a1",
+ "sha256:76fa7b1362d19f8fbd3e75fe2fb7c79359b0af8747e6f7141c338f0bee2f871a",
+ "sha256:7728e05c35412ba36d3e9795ae8995e3c86958179c9770e65558ec3fdfd3724f",
+ "sha256:8157dadbb09a34a6bd95a50690595e1fa0af1a99445e2744110e3dca7831c4ee",
+ "sha256:820628b7b3135403540202e60551e741f9b6d3304371712521be939470b454ec",
+ "sha256:884ab9b29feaca361f7f88d811b1eea9bfca36cf3da27768d28ad45c3ee6f969",
+ "sha256:89b8b22a5ff72d89d48d0e62abb14340d9e99fd637d046c27b8b257a01ffbe28",
+ "sha256:92e821e43ad382332eade6812e298dc9701c75fe289f2a2d39c7960b43d1e92a",
+ "sha256:b007cbb845b28db4fb8b6a5cdcbf65bacb16a8bd328b53cbc0698688a68e1caa",
+ "sha256:bc4313cbeb0e7a416a488d72f9680fffffc645f8a838bd2193809881c67dd106",
+ "sha256:bccbfc27563652de7dc9bdc595cb25e90b59c5f8e23e806ed0fd623755b6565d",
+ "sha256:c4f05c5a7c49d2fb70223d0d5bcfbe474cf928310ac9fa6a7c6dddc831d0b1d4",
+ "sha256:ce256aaa50f6cc9a649c51be3cd4ff142d67295bfc4f490c9134d0f9f6d58ef0",
+ "sha256:d2e35d7bf1c1ac8c538f88d26b396e73dd81440d59c1ef8522e1ea77b345ede4",
+ "sha256:df7c53783a46febb0e70f6b05df2ba104610f2fb0d27023409734a3ecbb78fb2",
+ "sha256:efac139c3f0bf4f0939f9375af4b02c5ad83a622de52d6dfa8e438e8e01d0eb0",
+ "sha256:efd7a09678fd8b53117f6bae4fa3825e0a22b03ef0a932e070c0bdbb3a35e654",
+ "sha256:f2380a6376dfa090227b663f9678150ef27543483055cc327555fb592c5967e2",
+ "sha256:f8380c03e45cf09f8557bdaa41e1fa7c81f3ae22828e1db470ab2a6c96d8bc23",
+ "sha256:f90ba11136bfdd25cae3951af8da2e95121c9b9b93727b1b896e3fa105b2f586"
+ ],
+ "index": "pypi",
+ "version": "==4.6.3"
},
"requests": {
"hashes": [
- "sha256:11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4",
- "sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31"
+ "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804",
+ "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e"
+ ],
+ "index": "pypi",
+ "version": "==2.25.1"
+ },
+ "structlog": {
+ "hashes": [
+ "sha256:62f06fc0ee32fb8580f0715eea66cb87271eb7efb0eaf9af6b639cba8981de47",
+ "sha256:d9d2d890532e8db83c6977a2a676fb1889922ff0c26ad4dc0ecac26f9fafbc57"
],
"index": "pypi",
- "version": "==2.22.0"
+ "version": "==21.1.0"
},
"urllib3": {
"hashes": [
- "sha256:b246607a25ac80bedac05c6f282e3cdaf3afb65420fd024ac94435cabe6e18d1",
- "sha256:dbe59173209418ae49d485b87d1681aefa36252ee85884c31346debd19463232"
+ "sha256:2f4da4594db7e1e110a944bb1b551fdf4e6c136ad42e4234131391e21eb5b0df",
+ "sha256:e7b021f7241115872f92f43c6508082facffbd1c048e3c6e2bb9c2a157e28937"
],
- "version": "==1.25.3"
+ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
+ "version": "==1.26.4"
}
},
- "develop": {}
+ "develop": {
+ "attrs": {
+ "hashes": [
+ "sha256:31b2eced602aa8423c2aea9c76a724617ed67cf9513173fd3a4f03e3a929c7e6",
+ "sha256:832aa3cde19744e49938b91fea06d69ecb9e649c93ba974535d08ad92164f700"
+ ],
+ "index": "pypi",
+ "version": "==20.3.0"
+ },
+ "certifi": {
+ "hashes": [
+ "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c",
+ "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830"
+ ],
+ "version": "==2020.12.5"
+ },
+ "chardet": {
+ "hashes": [
+ "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa",
+ "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5"
+ ],
+ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
+ "version": "==4.0.0"
+ },
+ "idna": {
+ "hashes": [
+ "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6",
+ "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"
+ ],
+ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+ "version": "==2.10"
+ },
+ "iniconfig": {
+ "hashes": [
+ "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3",
+ "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"
+ ],
+ "version": "==1.1.1"
+ },
+ "packaging": {
+ "hashes": [
+ "sha256:5b327ac1320dc863dca72f4514ecc086f31186744b84a230374cc1fd776feae5",
+ "sha256:67714da7f7bc052e064859c05c595155bd1ee9f69f76557e21f051443c20947a"
+ ],
+ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+ "version": "==20.9"
+ },
+ "pluggy": {
+ "hashes": [
+ "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0",
+ "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"
+ ],
+ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+ "version": "==0.13.1"
+ },
+ "py": {
+ "hashes": [
+ "sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3",
+ "sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a"
+ ],
+ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+ "version": "==1.10.0"
+ },
+ "pyparsing": {
+ "hashes": [
+ "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1",
+ "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"
+ ],
+ "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+ "version": "==2.4.7"
+ },
+ "pytest": {
+ "hashes": [
+ "sha256:671238a46e4df0f3498d1c3270e5deb9b32d25134c99b7d75370a68cfbe9b634",
+ "sha256:6ad9c7bdf517a808242b998ac20063c41532a570d088d77eec1ee12b0b5574bc"
+ ],
+ "index": "pypi",
+ "version": "==6.2.3"
+ },
+ "requests": {
+ "hashes": [
+ "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804",
+ "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e"
+ ],
+ "index": "pypi",
+ "version": "==2.25.1"
+ },
+ "requests-mock": {
+ "hashes": [
+ "sha256:11215c6f4df72702aa357f205cf1e537cffd7392b3e787b58239bde5fb3db53b",
+ "sha256:e68f46844e4cee9d447150343c9ae875f99fa8037c6dcf5f15bf1fe9ab43d226"
+ ],
+ "index": "pypi",
+ "version": "==1.8.0"
+ },
+ "six": {
+ "hashes": [
+ "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259",
+ "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"
+ ],
+ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+ "version": "==1.15.0"
+ },
+ "toml": {
+ "hashes": [
+ "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b",
+ "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"
+ ],
+ "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+ "version": "==0.10.2"
+ },
+ "urllib3": {
+ "hashes": [
+ "sha256:2f4da4594db7e1e110a944bb1b551fdf4e6c136ad42e4234131391e21eb5b0df",
+ "sha256:e7b021f7241115872f92f43c6508082facffbd1c048e3c6e2bb9c2a157e28937"
+ ],
+ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
+ "version": "==1.26.4"
+ }
+ }
}
diff --git a/README.md b/README.md
index 1bdff78..be61a0e 100644
--- a/README.md
+++ b/README.md
@@ -1,173 +1,72 @@
-# dspace-api
+# dsaps
-This repository was created from the merger of https://github.com/ehanson8/dspace-editing and https://github.com/ehanson8/dspace-data-collection, both of which have been archived. All further development will occur in this repository.
+This command line application provides several ways of interacting with the [DSpace](https://github.com/DSpace/DSpace) API. This application was written for DSpace 6.3, it has not been tested against other DSpace versions. Previously, this branch of the repository was a set of self-contained scripts that could be run independently, those scripts can be found as a [release](https://github.com/MITLibraries/dspace-api-python-scripts/releases/tag/v1.0).
-**Note**: Upgraded to Python 3 in 02/2019.
-
-**Note**: These scripts were updated in 05/2018 for the new authentication method used by DSpace 6.x
-
-All of these scripts require a secrets.py file in the same directory that must contain the following text:
+## Installation
+Clone the repository and install using [pipenv](https://github.com/pypa/pipenv):
```
- baseURL='https://dspace.myuni.edu'
- email='dspace_user@.myuni.edu'
- password='my_dspace_password'
- filePath = '/Users/dspace_user/dspace-data-collection/data/'
- verify = True or False (no quotes). Use False if using an SSH tunnel to connect to the DSpace API
- skipColl = A list of the 'uuid' of any collections that you wish the script to skip. (e.g. ['45794375-6640-4efe-848e-082e60bae375'])
+pipenv install
+```
+After installation, run the application with:
+```
+pipenv run dsaps
```
-The 'filePath' is directory into which output files will be written and 'handlePrefix' may or may not vary from your DSpace URL depending on your configuration. This secrets.py file will be ignored according to the repository's .gitignore file so that DSpace login details will not be inadvertently exposed through GitHub.
-
-If you are using both a development server and a production server, you can create a separate secrets.py file with a different name (e.g. secretsProd.py) and containing the production server information. When running each of these scripts, you will be prompted to enter the file name (e.g 'secretsProd' without '.py') of an alternate secrets file. If you skip the prompt or incorrectly type the file name, the scripts will default to the information in the secrets.py file. This ensures that you will only edit the production server if you really intend to.
-
-#### [addKeyValuePairOnHandleCSV.py](addKeyValuePairOnHandleCSV.py)
-Based on user input, adds key-value pairs from a specified CSV file of DSpace item handles and the value to be added to that item using the specified key. A CSV log is written with all of the changes made and a 'dc.description.provenance' note describing the change is added to the metadata of each item that is updated.
-
-#### [addKeyValuePairToCollection.py](addKeyValuePairToCollection.py)
-Based on user input, adds a specified key-value pair with a specified language value to every item in the collection with the specified handle.
-
-#### [addKeyValuePairToCommunity.py](addKeyValuePairToCommunity.py)
-Based on user input, adds a specified key-value pair with a specified language value to every item in every collection in the community with the specified handle.
-
-#### [addNewItemsToCollection.py](addNewItemsToCollection.py)
-Based on user input, adds new items to the specified collection. In the specified directory, the script creates items and associated metadata based on a 'metadataNewFiles.json' file in the directory. The script then posts files for the appropriate items, which is determined by having the file name (minus the file extension) in a 'dc.identifier.other' field in the item metadata record.
-
-#### [compareTwoKeysInCommunity.py](compareTwoKeysInCommunity.py)
-Based on user input, extracts the values of two specified keys from a specified community to a CSV file for comparison.
-
-#### [countInitialedNamesByCollection.py](countInitialedNamesByCollection.py)
-Based on [mjanowiecki's](https://github.com/mjanowiecki) [findInitialedNamesByCollection.py](https://github.com/mjanowiecki/dspace-data-collection/blob/master/findInitialedNamesByCollection.py), find values in name fields that appear to have first initials that could be expanded to full names and provides a count for each collection when the count is more than zero.
-
-#### [createItemMetadataFromCSV.py](createItemMetadataFromCSV.py)
-Based on user input, creates a JSON file of metadata that can be added to a DSpace item from the specified CSV file or from values directly specified in the script. The 'createMetadataElementCSV' function in the script is used to create a metadata element from the specified CSV file and has three variables:
-
-- 'key' - The Dublin Core property to be used for the element.
-- 'value' - The column in the specified CSV file that contains the data for the element.
-- 'language' - The desired language value for the element
-
-The 'createMetadataElementDirect' function in the script is used to create a metadata element without a CSV file (intended for metadata elements that will be constant across all items in a collection) and has three variables:
-
-- 'key' - The Dublin Core property to be used for the element.
-- 'value' - The actual value of the element.
-- 'language' - The desired language value for the element.
-
-#### [deleteBitstreamsFromItem.py](deleteBitstreamsFromItem.py)
-Based on user input, removes all bitstreams associated with an item with the specified handle.
-
-#### [deleteKeyFromCollection.py](deleteKeyFromCollection.py)
-Based on user input, removes all key-value pairs with the specified key for every item in the collection with the specified handle.
-
-#### [deleteKeyFromCommunity.py](deleteKeyFromCommunity.py)
-Based on user input, removes all key-value pairs with the specified key for every item in every collection in the community with the specified handle.
-
-#### [deleteKeyValuePairFromCollection.py](deleteKeyValuePairFromCollection.py)
-Based on user input, removes all key-value pairs with the specified key and value for every item in the collection with the specified handle.
-
-#### [editBitstreamsNames.py](editBitstreamsNames.py)
-Based on a specified CSV file of DSpace item handles and replacement file names, replaces the name of bitstreams attached to the specified items.
-
-#### [exportSelectedRecordMetadataToCSV.py](exportSelectedRecordMetadataToCSV.py)
-Based a CSV of item handles, extracts all metadata (except 'dc.description.provenance' values) from the selected items to a CSV file.
-
-#### [findBogusUris.py](findBogusUris.py)
-Extracts the item ID and the value of the key 'dc.identifier.uri' to a CSV file when the value does not begin with the handlePrefix specified in the secrets.py file.
-
-#### [findDuplicateKeys.py](findDuplicateKeys.py)
-Based on user input, extracts item IDs to a CSV file where there are multiple instances of the specified key in the item metadata.
-
-#### [generateCollectionLevelAbstract.py](generateCollectionLevelAbstract.py)
-Based on user input, creates an HTML collection-level abstract that contains hyperlinks to all of the items in each series, as found in the metadata CSV. This assumes that the series title is recorded in 'dc.relation.ispartof' or a similar property in the DSpace item records. The abstract is then posted to the collection in DSpace.
-
-#### [getCollectionMetadataJson.py](getCollectionMetadataJson.py)
-Based on user input, extracts all of the item metadata from the specified collection to a JSON file.
-
-#### [getCompleteAndUniqueValuesForAllKeys.py](getCompleteAndUniqueValuesForAllKeys.py)
-Creates a 'completeValueLists' folder and for all keys used in the repository, extracts all values for a particular key to a CSV with item IDs. It also creates a 'uniqueValueLists' folder, that writes a CSV file for each key with all unique values and a count of how many times the value appears.
-
-#### [getCompleteAndUniqueValuesForAllKeysInCommunity.py](getCompleteAndUniqueValuesForAllKeysInCommunity.py)
-Creates a 'completeValueLists' folder and for all keys used in the specified community, extracts all values for a particular key to a CSV with item IDs. It also creates a 'uniqueValueLists' folder, that writes a CSV file for each key with all unique values and a count of how many times the value appears.
-
-#### [getFacultyNamesFromETDs.py](getFacultyNamesFromETDs.py)
-Based on user input, extracts all values from 'dc.contributor.advisor' and 'dc.contributor.committeeMember' fields from items in collections in the specified community.
-
-#### [getGlobalLanguageValues.py](getGlobalLanguageValues.py)
-Extracts all unique language values used by metadata entries in the repository to a CSV file.
-
-#### [getHandlesAndBitstreamsFromCollection.py](getHandlesAndBitstreamsFromCollection.py)
-Based on user input, extracts all the handles and bitstreams associated with the items in the specified collection to a CSV file.
-
-#### [getLanguageValuesForKeys.py](getLanguageValuesForKeys.py)
-Extracts all unique pairs of keys and language values used by metadata entries in the repository to a CSV file.
-
-#### [getRecordsAndValuesForKey.py](getRecordsAndValuesForKey.py)
-Based on user input, extracts the ID and URI for all items in the repository with the specified key, as well as the value of the specified key, to a CSV file.
-
-#### [getRecordsAndValuesForKeyInCollection.py](getRecordsAndValuesForKeyInCollection.py)
-Based on user input, extracts the ID and URI for all items in the specified collection with the specified key, as well as the value of the specified key, to a CSV file.
-
-#### [getRecordsWithKeyAndValue.py](getRecordsWithKeyAndValue.py)
-Based on user input, extracts the ID and URI for all items in the repository with the specified key-value pair to a CSV file.
-
-#### [identifyItemsMissingKeyInCommunity.py](identifyItemsMissingKeyInCommunity.py)
-Based on user input, extracts the IDs of items from a specified community that do not have the specified key.
-
-#### [metadataCollectionsKeysMatrix.py](metadataCollectionsKeysMatrix.py)
-Creates a matrix containing a count of each time a key appears in each collection in the repository.
-
-#### [metadataOverview.py](metadataOverview.py)
-Produces several CSV files containing different information about the structure and metadata of the repository:
-
-|File Name |Description|
-|--------------------------|--------------------------------------------------------------------------|
-|collectionMetadataKeys.csv | A list of all keys used in each collection with collection name, ID, and handle.|
-|dspaceIDs.csv | A list of every item ID along with the IDs of the collection and community that contains that item.|
-|dspaceTypes.csv | A list of all unique values for the key 'dc.type.'|
-|keyCount.csv | A list of all unique keys used in the repository, as well as a count of how many times it appear.|
-|collectionStats.csv | A list of all collections in the repository with the collection name, ID, handle, and number of items.|
-
-#### [overwriteExistingMetadata.py](overwriteExistingMetadata.py)
-Based on a specified CSV file of DSpace item handles and file identifiers, replaces the metadata of the items with specified handles with the set of metadata elements associated with the corresponding file identifier in a JSON file of metadata entries named 'metadataOverwrite.json.'
-#### [postCollection.py](postCollection.py)
-Based on user input, creates a collection with a specified name within the specified community. In the specified directory, the script creates items and associated metadata based on a 'collectionMetadata.json' file in the directory. Based on the specified file extension, the script then posts each file in the directory with that extension as a bitstream for the appropriate item, which is determined by having the file name (minus the file extension) in a 'dc.identifier.other' field in the item metadata record.
+## Authentication
-#### [removeDuplicateKeyValuePairsFromItems.py](removeDuplicateKeyValuePairsFromItems.py)
-Finds all items with duplicate key-value pairs and removes the duplicates. A CSV log is written with all of the changes made and a 'dc.description.provenance' note describing the change is added to the metadata of each item that is updated.
+To authenticate, use the following parameters
-#### [replaceKey.py](replaceKey.py)
-Based on user input, replaces one specified key with another specified key in all item metadata across the repository. A CSV log is written with all of the changes made and a 'dc.description.provenance' note describing the change is added to the metadata of each item that is updated.
+Option (short) | Option (long) | Description
+------ | ------ | -----------
+N/A | --url | The DSpace API URL (e.g. https://dspace.mit.edu/rest), defaults to the DSPACE_URL environmental variable if nothing is specified
+-e | --email | The email of the user for authentication.
+-p | --password | The password for authentication.
-#### [replaceKeyForCollection.py](replaceKeyForCollection.py)
-Based on user input, replaces one specified key with another specified key in all item metadata across the specified collection. A CSV log is written with all of the changes made and a 'dc.description.provenance' note describing the change is added to the metadata of each item that is updated.
+## Commands
-#### [replaceKeyForCommunity.py](replaceKeyForCommunity.py)
-Based on user input, replaces one specified key with another specified key in all item metadata across the specified community. A CSV log is written with all of the changes made and a 'dc.description.provenance' note describing the change is added to the metadata of each item that is updated.
+### additems
+Adds items to a specified collection from a metadata CSV, a field mapping file, and a directory of files. May be run in conjunction with the newcollection CLI command.
-#### [replaceKeyValuePairOnItemIdCSV.py](replaceKeyValuePairOnItemIdCSV.py)
-Based on user input, updates key-value pairs on the specified items from the specified CSV file with the columns: 'replacedKey,' 'replacementKey,' 'replacedValue,' 'replacementValue,' and 'itemID.' A CSV log is written with all of the changes made and a 'dc.description.provenance' note describing the change is added to the metadata of each item that is updated.
+Option (short) | Option (long) | Description
+------ | ------ | -------
+-m | --metadata-csv | The path to the CSV file of metadata for the items.
+-f | --field-map | The path to JSON field mapping file.
+-d | --content-directory | The full path to the content, either a directory of files or a URL for the storage location.
+-t | --file-type | The file type to be uploaded, if limited to one file type.
+-r | --ingest-report| Create ingest report for updating other systems.
+-c | --collection-handle | The handle of the collection to which items are being added.
-#### [replaceKeyValuePairsFromCSV.py](replaceKeyValuePairsFromCSV.py)
-Based on user input, updates key-value pairs from the specified CSV file with the columns: 'replacedKey,' 'replacementKey,' 'replacedValue,' and 'replacementValue.' A CSV log is written with all of the changes made and a 'dc.description.provenance' note describing the change is added to the metadata of each item that is updated.
-#### [replaceUnnecessarySpaces.py](replaceUnnecessarySpaces.py)
-Based on user input, removes double spaces, triple spaces, and spaces before commas in the values from the specified key in the specified community.
+#### Example Usage
+```
+pipenv run dsaps --url https://dspace.com/rest -e abc@def.com -p ******** additems -m coll_metadata.csv -f config/aspace_mapping.json -d /files/pdfs -t pdf -r -c 111.1/111111
+```
-#### [replaceValueInCollection.py](replaceValueInCollection.py)
-Based on user input, replaces a specified value with another specified value in all item metadata across the specified collection. A CSV log is written with all of the changes made and a 'dc.description.provenance' note describing the change is added to the metadata of each item that is updated.
+### newcollection
+Posts a new collection to a specified community. Used in conjunction with the additems CLI command to populate the new collection with items.
-#### [replaceValueInCommunityFromCSV.py](replaceValueInCommunityFromCSV.py)
-Based on a user specified CSV, replaces specified values in the specified community with specified replacement values. A CSV log is written with all of the changes made and a 'dc.description.provenance' note describing the change is added to the metadata of each item that is updated.
+Option (short) | Option (long) | Description
+------ | ------ | -------
+-c | --community-handle | The handle of the community in which to create the collection.
+-n | --collection-name | The name of the collection to be created.
-#### [repositoryMetadataBackup.py](repositoryMetadataBackup.py)
-Creates a folder with a timestamp in the folder name and creates a JSON file for every collection in the repository with the metadata for all of the items in that collection.
+#### Example Usage
+```
+pipenv run dsaps --url https://dspace.com/rest -e abc@def.com -p ******** newcollection -c 222.2/222222 -n Test\ Collection additems -m coll_metadata.csv -f config/aspace_mapping.json -d /files/pdfs -t pdf -r
+```
-#### [repositoryMetadataRestore.py](repositoryMetadataRestore.py)
-Based on user input, restores the metadata from a specified backup folder that was created by the repositoryMetadataBackup.py script.
+### reconcile
+Runs a reconciliation of the specified files and metadata that produces reports of files with no metadata, metadata with no files, metadata matched to files, and an updated version of the metadata CSV with only the records that have matching files.
-#### [splitFieldIntoMultipleFields.py](splitFieldIntoMultipleFields.py)
-Based on a user specified CSV, replaces a single field with multiple values into multiple fields which each contain a single value.
-#### [updateLanguageTagsForKey.py](updateLanguageTagsForKey.py)
-Based on user input, updates the language value for the specified key to 'en_us' for all items with that key in the repository. A CSV log is written with all of the changes made and a 'dc.description.provenance' note describing the change is added to the metadata of each item that is updated.
+Option (short) | Option (long) | Description
+------ | ------ | -------
+-m | --metadata-csv | The path of the CSV file of metadata.
+-o | --output-directory | The path of the output files, include / at the end of the path.
+-d | --content-directory | The full path to the content, either a directory of files or a URL for the storage location.
+-t | --file-type | The file type to be uploaded.
-#### [updateLanguageTagsForKeyInCollection.py](updateLanguageTagsForKeyInCollection.py)
-Based on user input, updates the language value for the specified key to 'en_us' for all items with that key in the specified collection. A CSV log is written with all of the changes made and a 'dc.description.provenance' note describing the change is added to the metadata of each item that is updated.
+#### Example Usage
+```
+pipenv run dsaps --url https://dspace.com/rest -e abc@def.com -p ******** reconcile -m coll_metadata.csv -o /output -d /files/pdfs -t pdf
+```
diff --git a/addKeyValuePairOnHandleCSV.py b/addKeyValuePairOnHandleCSV.py
deleted file mode 100644
index bb7a024..0000000
--- a/addKeyValuePairOnHandleCSV.py
+++ /dev/null
@@ -1,84 +0,0 @@
-import json
-import requests
-import time
-import csv
-from datetime import datetime
-import urllib3
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header,
- verify=verify, params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-fileName = filePath + input('Enter fileName (including \'.csv\'): ')
-addedKey = input('Enter key: ')
-startTime = time.time()
-
-date = datetime.now().strftime('%Y-%m-%d %H.%M.%S')
-f = csv.writer(open(filePath + 'addKeyValuePair' + date + '.csv', 'w'))
-f.writerow(['itemID'] + ['addedKey'] + ['addedValue'] + ['delete'] + ['post'])
-
-with open(fileName) as csvfile:
- reader = csv.DictReader(csvfile)
- for row in reader:
- addedValue = row['value']
- handle = row['handle'].strip()
- addedMetadataElement = {}
- addedMetadataElement['key'] = addedKey
- addedMetadataElement['value'] = addedValue
- addedMetadataElement['language'] = 'en_us'
- endpoint = baseURL + '/rest/handle/' + handle
- item = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
- itemID = item['uuid']
- itemMetadata = requests.get(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header,
- cookies=cookies, verify=verify).json()
- itemMetadata.append(addedMetadataElement)
- itemMetadataProcessed = itemMetadata
-
- date = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
- provNote = '\'' + addedKey + ': ' + addedValue
- provNote += '\' was added through a batch process on '
- provNote += date + '.'
- provNoteElement = {}
- provNoteElement['key'] = 'dc.description.provenance'
- provNoteElement['value'] = provNote
- provNoteElement['language'] = 'en_US'
- itemMetadataProcessed.append(provNoteElement)
-
- itemMetadataProcessed = json.dumps(itemMetadataProcessed)
- delete = requests.delete(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header,
- cookies=cookies, verify=verify)
- print(delete)
- post = requests.put(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header, cookies=cookies,
- verify=verify, data=itemMetadataProcessed)
- print(post)
- f.writerow([itemID] + [addedMetadataElement['key']]
- + [addedMetadataElement['value']] + [delete] + [post])
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/addKeyValuePairToCollection.py b/addKeyValuePairToCollection.py
deleted file mode 100644
index d5a5367..0000000
--- a/addKeyValuePairToCollection.py
+++ /dev/null
@@ -1,135 +0,0 @@
-import json
-import requests
-import time
-import csv
-from datetime import datetime
-import urllib3
-import argparse
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-k', '--key', help='the key to be added. optional - if '
- 'not provided, the script will ask for input')
-parser.add_argument('-v', '--value', help='the value to be added. optional - '
- 'if not provided, the script will ask for input')
-parser.add_argument('-l', '--language', help='the language tag to be added. '
- 'optional - if not provided, the script will ask for '
- 'input')
-parser.add_argument('-i', '--handle', help='handle of the collection. '
- 'optional - if not provided, the script will ask for '
- 'input')
-args = parser.parse_args()
-
-if args.key:
- addedKey = args.key
-else:
- addedKey = input('Enter the key: ')
-if args.value:
- addedValue = args.value
-else:
- addedValue = input('Enter the value: ')
-if args.language:
- addedLanguage = args.language
-else:
- addedLanguage = input('Enter the language tag: ')
-if args.handle:
- handle = args.handle
-else:
- handle = input('Enter collection handle: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header,
- verify=verify, params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-itemList = []
-endpoint = baseURL + '/rest/handle/' + handle
-collection = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-collectionID = collection['uuid']
-offset = 0
-items = ''
-while items != []:
- items = requests.get(baseURL + '/rest/collections/' + str(collectionID)
- + '/items?limit=200&offset=' + str(offset),
- headers=header, cookies=cookies, verify=verify)
- while items.status_code != 200:
- time.sleep(5)
- items = requests.get(baseURL + '/rest/collections/' + str(collectionID)
- + '/items?limit=200&offset=' + str(offset),
- headers=header, cookies=cookies, verify=verify)
- items = items.json()
- for k in range(0, len(items)):
- itemID = items[k]['uuid']
- itemList.append(itemID)
- offset = offset + 200
-
-dsFunc.elapsedTime(startTime, 'Item list creation time')
-
-recordsEdited = 0
-f = csv.writer(open(filePath + 'addKeyValuePair'
- + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w'))
-f.writerow(['itemID'] + ['addedKey'] + ['addedValue'] + ['delete'] + ['post'])
-for number, itemID in enumerate(itemList):
- itemsRemaining = len(itemList) - number
- print('Items remaining: ', itemsRemaining, 'ItemID: ', itemID)
- metadata = requests.get(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header, cookies=cookies,
- verify=verify).json()
- itemMetadataProcessed = []
- for l in range(0, len(metadata)):
- metadata[l].pop('schema', None)
- metadata[l].pop('element', None)
- metadata[l].pop('qualifier', None)
- itemMetadataProcessed.append(metadata[l])
- addedMetadataElement = {}
- addedMetadataElement['key'] = addedKey
- addedMetadataElement['value'] = addedValue
- addedMetadataElement['language'] = addedLanguage
- itemMetadataProcessed.append(addedMetadataElement)
- provNote = ('\'' + addedKey + ': ' + addedValue + '\' was added through a '
- + 'batch process on '
- + datetime.now().strftime('%Y-%m-%d %H:%M:%S') + '.')
- provNoteElement = {}
- provNoteElement['key'] = 'dc.description.provenance'
- provNoteElement['value'] = provNote
- provNoteElement['language'] = 'en_US'
- itemMetadataProcessed.append(provNoteElement)
- recordsEdited = recordsEdited + 1
- itemMetadataProcessed = json.dumps(itemMetadataProcessed)
- print('updated', itemID, recordsEdited)
- delete = requests.delete(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header, cookies=cookies,
- verify=verify)
- print(delete)
- post = requests.put(baseURL + '/rest/items/' + str(itemID) + '/metadata',
- headers=header, cookies=cookies, verify=verify,
- data=itemMetadataProcessed)
- print(post)
- f.writerow([itemID] + [addedKey] + [addedValue] + [delete] + [post])
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/addKeyValuePairToCommunity.py b/addKeyValuePairToCommunity.py
deleted file mode 100644
index ec679bb..0000000
--- a/addKeyValuePairToCommunity.py
+++ /dev/null
@@ -1,151 +0,0 @@
-import json
-import requests
-import time
-import csv
-from datetime import datetime
-import urllib3
-import argparse
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-k', '--key', help='the key to be added. optional - if '
- 'not provided, the script will ask for input')
-parser.add_argument('-v', '--value', help='the value to be added. optional - '
- 'if not provided, the script will ask for input')
-parser.add_argument('-l', '--language', help='the language tag to be added. '
- 'optional - if not provided, the script will ask for '
- 'input')
-parser.add_argument('-i', '--handle', help='handle of the community. optional '
- '- if not provided, the script will ask for input')
-args = parser.parse_args()
-
-if args.key:
- addedKey = args.key
-else:
- addedKey = input('Enter the key: ')
-if args.value:
- addedValue = args.value
-else:
- addedValue = input('Enter the value: ')
-if args.language:
- addedLanguage = args.language
-else:
- addedLanguage = input('Enter the language tag: ')
-if args.handle:
- handle = args.handle
-else:
- handle = input('Enter community handle: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-itemList = []
-endpoint = baseURL + '/rest/handle/' + handle
-community = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-communityID = community['uuid']
-
-collections = requests.get(baseURL + '/rest/communities/' + str(communityID)
- + '/collections', headers=header, cookies=cookies,
- verify=verify).json()
-for j in range(0, len(collections)):
- collectionID = collections[j]['uuid']
- if collectionID not in skipColl:
- offset = 0
- items = ''
- while items != []:
- items = requests.get(baseURL + '/rest/collections/'
- + str(collectionID)
- + '/items?limit=200&offset=' + str(offset),
- headers=header, cookies=cookies,
- verify=verify)
- while items.status_code != 200:
- time.sleep(5)
- items = requests.get(baseURL + '/rest/collections/'
- + str(collectionID)
- + '/items?limit=200&offset='
- + str(offset), headers=header,
- cookies=cookies, verify=verify)
- items = items.json()
- for k in range(0, len(items)):
- itemID = items[k]['uuid']
- itemList.append(itemID)
- offset = offset + 200
-
-dsFunc.elapsedTime(startTime, 'Item list creation time')
-
-recordsEdited = 0
-f = csv.writer(open(filePath + 'addKeyValuePair'
- + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w'))
-f.writerow(['itemID'] + ['addedKey'] + ['addedValue'] + ['delete'] + ['post'])
-for number, itemID in enumerate(itemList):
- itemsRemaining = len(itemList) - number
- print('Items remaining: ', itemsRemaining, 'ItemID: ', itemID)
- metadata = requests.get(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header, cookies=cookies,
- verify=verify).json()
- itemMetadataProcessed = []
- changeRecord = True
- for metadataElement in metadata:
- key = metadataElement['key']
- value = metadataElement['value']
- if key == addedKey and value == addedValue:
- changeRecord = False
- metadataElement.pop('schema', None)
- metadataElement.pop('element', None)
- metadataElement.pop('qualifier', None)
- itemMetadataProcessed.append(metadataElement)
- if changeRecord is True:
- addedMetadataElement = {}
- addedMetadataElement['key'] = addedKey
- addedMetadataElement['value'] = addedValue
- addedMetadataElement['language'] = addedLanguage
- itemMetadataProcessed.append(addedMetadataElement)
- provNote = '\'' + addedKey + ': ' + addedValue
- provNote += '\' was added through a batch process on '
- provNote += datetime.now().strftime('%Y-%m-%d %H:%M:%S') + '.'
- provNoteElement = {}
- provNoteElement['key'] = 'dc.description.provenance'
- provNoteElement['value'] = provNote
- provNoteElement['language'] = 'en_US'
- itemMetadataProcessed.append(provNoteElement)
- recordsEdited = recordsEdited + 1
- itemMetadataProcessed = json.dumps(itemMetadataProcessed)
- print('updated', itemID, recordsEdited)
- delete = requests.delete(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header,
- cookies=cookies, verify=verify)
- print(delete)
- post = requests.put(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header, cookies=cookies,
- verify=verify, data=itemMetadataProcessed)
- print(post)
- f.writerow([itemID] + [addedKey] + [addedValue] + [delete] + [post])
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/addNewItemsToCollection.py b/addNewItemsToCollection.py
deleted file mode 100644
index 15a7e9a..0000000
--- a/addNewItemsToCollection.py
+++ /dev/null
@@ -1,199 +0,0 @@
-import json
-import requests
-import datetime
-import time
-import os
-import csv
-import urllib3
-import collections
-import argparse
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-d', '--directory', help='the directory of files to be '
- 'ingested. optional - if not provided, the script will '
- 'ask for input')
-parser.add_argument('-e', '--fileExtension', help='the extension of files to '
- 'be ingested. optional - if not provided, the script will '
- 'ask for input')
-parser.add_argument('-i', '--handle', help='handle of the object to retreive. '
- 'optional - if not provided, the script will ask for '
- 'input')
-args = parser.parse_args()
-
-if args.directory:
- directory = args.directory
-else:
- directory = input('Enter directory name: ')
-if args.fileExtension:
- fileExtension = args.fileExtension
-else:
- fileExtension = '.' + input('Enter file extension: ')
-if args.handle:
- handle = args.handle
-else:
- handle = input('Enter handle: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-
-# ccreate file list and export csv
-fileList = {}
-for root, dirs, files in os.walk(directory, topdown=True):
- for file in files:
- if file.endswith(fileExtension):
- fullFilePath = os.path.join(root, file).replace('\\', '/')
- fileList[file[:file.index('.')]] = fullFilePath
-
-dsFunc.elapsedTime(startTime, 'File list creation time')
-
-f = csv.writer(open(handle.replace('/', '-') + 'addedFilesList.csv', 'w'))
-f.writerow(['itemID'])
-
-for k, v in fileList.items():
- f.writerow([v[v.rindex('/') + 1:]])
-counter = len(fileList)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-headerFileUpload = {'accept': 'application/json'}
-
-# Get collection ID
-endpoint = baseURL + '/rest/handle/' + handle
-collection = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-collectionID = str(collection['uuid'])
-print(collectionID)
-
-# Post items
-collectionMetadata = json.load(open(directory + '/' + 'metadataNewFiles.json'))
-for itemMetadata in collectionMetadata:
- counter = counter - 1
- print('Items remaining: ', counter)
- fileExists = ''
- updatedItemMetadata = {}
- updatedItemMetadataList = []
- for element in itemMetadata['metadata']:
- if element['key'] == 'fileIdentifier':
- fileIdentifier = element['value']
- else:
- updatedItemMetadataList.append(element)
- updatedItemMetadata['metadata'] = updatedItemMetadataList
- updatedItemMetadata = json.dumps(updatedItemMetadata)
- for k in fileList:
- if fileIdentifier in k:
- fileExists = True
- if fileExists is True:
- print(fileIdentifier)
- post = requests.post(baseURL + '/rest/collections/' + collectionID
- + '/items', headers=header, cookies=cookies,
- verify=verify, data=updatedItemMetadata).json()
- print(json.dumps(post))
- itemID = post['link']
-
- # #Post bitstream - front and back
- # for k, v in fileList.items():
- # if k == fileIdentifier + '-Front':
- # bitstream = fileList[k]
- # fileName = bitstream[bitstream.rfind('/') + 1:]
- # data = open(bitstream, 'rb')
- # post = requests.post(baseURL + itemID + '/bitstreams?name='
- # + fileName, headers=headerFileUpload,
- # cookies=cookies, verify=verify,
- # data=data).json()
- # print(post)
- #
- # for k, v in fileList.items():
- # if k == fileIdentifier + '-Back':
- # bitstream = fileList[k]
- # fileName = bitstream[bitstream.rfind('/') + 1:]
- # data = open(bitstream, 'rb')
- # post = requests.post(baseURL + itemID + '/bitstreams?name='
- # + fileName, headers=headerFileUpload,
- # cookies=cookies, verify=verify,
- # data=data).json()
- # print(post)
-
- # Post bitstream - starts with file identifier
- orderedFileList = collections.OrderedDict(sorted(fileList.items()))
- for k, v in orderedFileList.items():
- if k.startswith(fileIdentifier):
- bitstream = orderedFileList[k]
- fileName = bitstream[bitstream.rfind('/') + 1:]
- print(fileName)
- data = open(bitstream, 'rb')
- post = requests.post(baseURL + itemID + '/bitstreams?name='
- + fileName, headers=headerFileUpload,
- cookies=cookies, verify=verify,
- data=data).json()
- print(post)
-
- # Create provenance notes
- provNote = {}
- provNote['key'] = 'dc.description.provenance'
- provNote['language'] = 'en_US'
- utc = datetime.datetime.utcnow()
- utcTime = utc.strftime('%Y-%m-%dT%H:%M:%SZ')
- bitstreams = requests.get(baseURL + itemID + '/bitstreams', headers=header,
- cookies=cookies, verify=verify).json()
- bitstreamCount = len(bitstreams)
- provNoteValue = 'Submitted by ' + userFullName + ' (' + email + ') on '
- provNoteValue = provNoteValue + utcTime + ' (GMT). No. of bitstreams: '
- provNoteValue = provNoteValue + str(bitstreamCount)
- for bitstream in bitstreams:
- fileName = bitstream['name']
- size = str(bitstream['sizeBytes'])
- checksum = bitstream['checkSum']['value']
- algorithm = bitstream['checkSum']['checkSumAlgorithm']
- provNoteValue = provNoteValue + ' ' + fileName + ': ' + size
- provNoteValue = provNoteValue + ' bytes, checkSum: ' + checksum
- provNoteValue = provNoteValue + ' (' + algorithm + ')'
- provNote['value'] = provNoteValue
-
- provNote2 = {}
- provNote2['key'] = 'dc.description.provenance'
- provNote2['language'] = 'en_US'
- provNote2Value = 'Made available in DSpace on ' + utcTime
- provNote2Value = provNote2Value + ' (GMT). No. of bitstreams: '
- provNote2Value = provNote2Value + str(bitstreamCount)
- for bitstream in bitstreams:
- fileName = bitstream['name']
- size = str(bitstream['sizeBytes'])
- checksum = bitstream['checkSum']['value']
- algorithm = bitstream['checkSum']['checkSumAlgorithm']
- provNote2Value = provNote2Value + ' ' + fileName + ': ' + size
- provNote2Value = provNote2Value + ' bytes, checkSum: ' + checksum
- provNote2Value = provNote2Value + ' (' + algorithm + ')'
- provNote2['value'] = provNote2Value
-
- # Post provenance notes
- provNote = json.dumps([provNote, provNote2])
- post = requests.put(baseURL + itemID + '/metadata', headers=header,
- cookies=cookies, verify=verify, data=provNote)
- print(post)
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/checkInventory.py b/checkInventory.py
deleted file mode 100644
index a2a8e2a..0000000
--- a/checkInventory.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import argparse
-import pandas as pd
-import os
-
-
-def main():
- """Define main function."""
- # begin: argument parsing
- parser = argparse.ArgumentParser()
-
- parser.add_argument('-i', '--inventory', required=True,
- help='csv file containing the inventory. the path, if '
- 'given, can be absolute or relative to this script')
-
- parser.add_argument('-d', '--dataDir',
- help='directory containing the data. if omitted, data '
- 'will be read from the directory containing the '
- 'inventory file')
-
- parser.add_argument('-f', '--field',
- help='field in the csv containing the fileNames. '
- 'default: name')
-
- parser.add_argument('-v', '--verbose', action='store_true',
- help='increase output verbosity')
-
- args = parser.parse_args()
-
- if not args.dataDir:
- (args.dataDir, null) = os.path.split(args.inventory)
-
- if not args.field:
- args.field = 'name'
-
- if args.verbose:
- print('verbosity turned on')
- print('reading inventory from {}'.format(args.inventory))
- print('fileNames read from field named {}'.format(args.field))
- print('searching for files in {}'.format(args.dataDir))
- # end: argument parsing
-
- inventory = pd.read_csv(args.inventory, usecols=[args.field])
- fileNames = inventory[args.field]
- foundfiles = 0
- missingfiles = 0
- for fileName in fileNames:
- if os.path.isfile(args.dataDir + '/' + fileName):
- if args.verbose:
- print('{} is not missing'.format(fileName))
- foundfiles += 1
- else:
- print('{} is missing'.format(fileName))
- missingfiles += 1
-
- print('{} files found and {} files \
- missing'.format(foundfiles, missingfiles))
-
-
-if __name__ == "__main__":
- main()
diff --git a/compareTwoKeysInCommunity.py b/compareTwoKeysInCommunity.py
deleted file mode 100644
index 7d68180..0000000
--- a/compareTwoKeysInCommunity.py
+++ /dev/null
@@ -1,129 +0,0 @@
-import requests
-import csv
-import time
-import urllib3
-import argparse
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-1', '--key', help='the first key to be output. '
- 'optional - if not provided, the script will ask for '
- 'input')
-parser.add_argument('-2', '--key2', help='the second key to be output. '
- 'optional - if not provided, the script will ask for '
- 'input')
-parser.add_argument('-i', '--handle', help='handle of the community to '
- 'retreive. optional - if not provided, the script will '
- 'ask for input')
-args = parser.parse_args()
-
-if args.key:
- key = args.key
-else:
- key = input('Enter first key: ')
-if args.key2:
- key2 = args.key2
-else:
- key2 = input('Enter second key: ')
-if args.handle:
- handle = args.handle
-else:
- handle = input('Enter community handle: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-endpoint = baseURL + '/rest/handle/' + handle
-community = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-communityID = community['uuid']
-
-itemList = []
-endpoint = baseURL + '/rest/communities'
-collections = requests.get(baseURL + '/rest/communities/' + str(communityID)
- + '/collections', headers=header, cookies=cookies,
- verify=verify).json()
-for j in range(0, len(collections)):
- collectionID = collections[j]['uuid']
- print(collectionID)
- if collectionID not in skipColl:
- offset = 0
- items = ''
- while items != []:
- items = requests.get(baseURL + '/rest/collections/'
- + str(collectionID)
- + '/items?limit=200&offset=' + str(offset),
- headers=header, cookies=cookies,
- verify=verify)
- while items.status_code != 200:
- time.sleep(5)
- items = requests.get(baseURL + '/rest/collections/'
- + str(collectionID)
- + '/items?limit=200&offset='
- + str(offset), headers=header,
- cookies=cookies, verify=verify)
- items = items.json()
- for k in range(0, len(items)):
- itemID = items[k]['uuid']
- itemList.append(itemID)
- offset = offset + 200
- print(offset)
-
-dsFunc.elapsedTime(startTime, 'Item list creation time')
-
-valueList = []
-for number, itemID in enumerate(itemList):
- itemsRemaining = len(itemList) - number
- print('Items remaining: ', itemsRemaining, 'ItemID: ', itemID)
- metadata = requests.get(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header, cookies=cookies,
- verify=verify).json()
- itemTuple = (itemID,)
- tupleValue1 = ''
- tupleValue2 = ''
- for l in range(0, len(metadata)):
- if metadata[l]['key'] == key:
- metadataValue = metadata[l]['value']
- tupleValue1 = metadataValue
- if metadata[l]['key'] == key2:
- metadataValue = metadata[l]['value']
- tupleValue2 = metadataValue
- itemTuple = itemTuple + (tupleValue1, tupleValue2)
- valueList.append(itemTuple)
- print(itemTuple)
-print(valueList)
-
-dsFunc.elapsedTime(startTime, 'Value list creation time')
-
-f = csv.writer(open(filePath + key + '-' + key2 + 'Values.csv', 'w'))
-f.writerow(['itemID'] + [key] + [key2])
-for i in range(0, len(valueList)):
- f.writerow([valueList[i][0]] + [valueList[i][1]] + [valueList[i][2]])
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/config/aspace_mapping.json b/config/aspace_mapping.json
new file mode 100644
index 0000000..ea41e3b
--- /dev/null
+++ b/config/aspace_mapping.json
@@ -0,0 +1,37 @@
+{
+ "file_identifier": {
+ "csv_field_name": "file_identifier",
+ "language": null,
+ "delimiter": ""
+ },
+ "dc.title": {
+ "csv_field_name": "title",
+ "language": "en_US",
+ "delimiter": ""
+ },
+ "source_system_identifier": {
+ "csv_field_name": "uri",
+ "language": null,
+ "delimiter": ""
+ },
+ "dc.contributor.author": {
+ "csv_field_name": "author",
+ "language": null,
+ "delimiter": "|"
+ },
+ "dc.description": {
+ "csv_field_name": "description",
+ "language": "en_US",
+ "delimiter": ""
+ },
+ "dc.rights": {
+ "csv_field_name": "rights_statement",
+ "language": "en_US",
+ "delimiter": ""
+ },
+ "dc.rights.uri": {
+ "csv_field_name": "rights_uri",
+ "language": null,
+ "delimiter": ""
+ }
+}
\ No newline at end of file
diff --git a/countInitialedNamesByCollection.py b/countInitialedNamesByCollection.py
deleted file mode 100644
index 64e79ca..0000000
--- a/countInitialedNamesByCollection.py
+++ /dev/null
@@ -1,116 +0,0 @@
-import requests
-import csv
-import re
-import time
-import urllib3
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-collectionIds = []
-endpoint = baseURL + '/rest/communities'
-communities = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-for community in communities:
- communityID = community['uuid']
- collections = requests.get(baseURL + '/rest/communities/'
- + str(communityID) + '/collections',
- headers=header, cookies=cookies,
- verify=verify).json()
- for collection in collections:
- collectionID = collection['uuid']
- if collectionID not in skipColl:
- collectionIds.append(collectionID)
-
-names = []
-keys = ['dc.contributor.advisor', 'dc.contributor.author',
- 'dc.contributor.committeeMember', 'dc.contributor.editor',
- 'dc.contributor.illustrator', 'dc.contributor.other', 'dc.creator']
-
-f = csv.writer(open('initialCountInCollection.csv', 'w'))
-f.writerow(['collectionName'] + ['handle'] + ['initialCount'])
-
-for number, collectionID in enumerate(collectionIds):
- initialCount = 0
- collectionsRemaining = len(collectionIds) - number
- print(collectionID, 'Collections remaining: ', collectionsRemaining)
- collection = requests.get(baseURL + '/rest/collections/'
- + str(collectionID), headers=header,
- cookies=cookies, verify=verify).json()
- collectionName = collection['name']
- collectionHandle = collection['handle']
- collSels = '&collSel[]=' + collectionID
- offset = 0
- recordsEdited = 0
- items = ''
- regexCI = r'(\s|,|[A-Z]|([A-Z]\.))[A-Z](\s|$|\.|,)'
- regexMI = r'((\w{2,},\s)|(\w{2,},))\w[a-z] + '
- regexPR = r'\(|\)'
- while items != []:
- for key in keys:
- endpoint = baseURL + '/rest/filtered-items?query_field[]=' + key
- endpoint += '&query_op[]=exists&query_val[]=' + collSels
- endpoint += '&limit=100&offset=' + str(offset)
- print(endpoint)
- response = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
- items = response['items']
- for item in items:
- itemLink = item['link']
- metadata = requests.get(baseURL + itemLink + '/metadata',
- headers=header, cookies=cookies,
- verify=verify).json()
- for metadata_element in metadata:
- if metadata_element['key'] == key:
- indvdl_nm = metadata_element['value']
- for metadata_element in metadata:
- if metadata_element['key'] == 'dc.identifier.uri':
- uri = metadata_element['value']
- contains_initials = re.search(regexCI,
- indvdl_nm)
- contains_middleinitial = re.search(regexMI,
- indvdl_nm)
- contains_parentheses = re.search(regexPR,
- indvdl_nm)
- if contains_middleinitial:
- continue
- elif contains_parentheses:
- continue
- elif contains_initials:
- initialCount += 1
- else:
- continue
- offset = offset + 200
- print(offset)
- if initialCount > 0:
- f.writerow([collectionName] + [baseURL + '/' + collectionHandle]
- + [str(initialCount).zfill(6)])
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/createItemMetadataFromCSV.py b/createItemMetadataFromCSV.py
deleted file mode 100644
index 8d6b7bd..0000000
--- a/createItemMetadataFromCSV.py
+++ /dev/null
@@ -1,88 +0,0 @@
-# -*- coding: utf-8 -*-
-import json
-import csv
-
-
-def createMetadataElementCSV(key, valueSource, language):
- """Create metadata element."""
- value = row[valueSource]
- if value != '':
- if language != '':
- metadataElement = {'key': key, 'language': language,
- 'value': value}
- metadata.append(metadataElement)
- else:
- metadataElement = {'key': key, 'value': value}
- metadata.append(metadataElement)
- else:
- pass
-
-
-def createMetadataElementCSVSplitField(key, valueSource, language):
- """Create multiple metadata elements from one field."""
- if row[valueSource] != '':
- if '|' in row[valueSource]:
- values = row[valueSource].split('|')
- for value in values:
- if language != '':
- metadataElement = {'key': key, 'language': language,
- 'value': value}
- metadata.append(metadataElement)
- else:
- metadataElement = {'key': key, 'value': value}
- metadata.append(metadataElement)
- else:
- value = row[valueSource]
- if language != '':
- metadataElement = {'key': key, 'language': language,
- 'value': value}
- metadata.append(metadataElement)
- else:
- metadataElement = {'key': key, 'value': value}
- metadata.append(metadataElement)
- else:
- pass
-
-
-def createMetadataElementDirect(key, value, language):
- """Create metadata element with specified value."""
- if language != '':
- metadataElement = {'key': key, 'language': language, 'value': value}
- metadata.append(metadataElement)
- else:
- metadataElement = {'key': key, 'value': value}
- metadata.append(metadataElement)
-
-
-fileName = input('Enter fileName (including \'.csv\'): ')
-
-with open(fileName) as csvfile:
- reader = csv.DictReader(csvfile)
- counter = 0
- metadataGroup = []
- for row in reader:
- metadata = []
- createMetadataElementCSV('fileIdentifier', '????', '')
- createMetadataElementCSV('dc.contributor.author', '????', '')
- createMetadataElementCSV('dc.contributor.other', '????', '')
- createMetadataElementCSV('dc.date.issued', '????', '')
- createMetadataElementCSV('dc.description.abstract', '????', 'en_US')
- createMetadataElementCSV('dc.format.extent', '????', '')
- createMetadataElementDirect('dc.format.mimetype', '????', 'en_US')
- createMetadataElementDirect('dc.identifier.other', '????', '')
- createMetadataElementDirect('dc.language.iso', '????', 'en_US')
- createMetadataElementDirect('dc.publisher', '????', 'en_US')
- createMetadataElementDirect('dc.relation', 'Access other ?????.', '')
- createMetadataElementCSV('dc.relation.ispartof', '????', 'en_US')
- createMetadataElementDirect('dc.rights', '????', 'en_US')
- createMetadataElementDirect('dc.subject', '????', 'en_US')
- createMetadataElementCSV('dc.title', '????', 'en_US')
- createMetadataElementDirect('dc.type', '????', 'en_US')
-
- item = {'metadata': metadata}
- metadataGroup.append(item)
- counter = counter + 1
- print(counter)
-
-f = open('metadata.json', 'w')
-json.dump(metadataGroup, f)
diff --git a/deleteBitstreamsFromItem.py b/deleteBitstreamsFromItem.py
deleted file mode 100644
index 7649ea1..0000000
--- a/deleteBitstreamsFromItem.py
+++ /dev/null
@@ -1,73 +0,0 @@
-import requests
-import time
-import csv
-from datetime import datetime
-import urllib3
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-itemHandle = input('Enter item handle: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-bitstreamList = []
-endpoint = baseURL + '/rest/handle/' + itemHandle
-item = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-itemID = item['uuid']
-print('itemID = %s' % itemID)
-bitstreams = ''
-url = baseURL + '/rest/items/' + str(itemID) + '/bitstreams?expand=bitstreams'
-bitstreams = requests.get(url, headers=header, cookies=cookies, verify=verify)
-while bitstreams.status_code != 200:
- time.sleep(5)
- bitstreams = requests.get(url, headers=header, cookies=cookies,
- verify=verify)
-bitstreams = bitstreams.json()
-print('found %d bitstreams' % len(bitstreams))
-for k in range(0, len(bitstreams)):
- bitstreamID = bitstreams[k]['uuid']
- bitstreamList.append(bitstreamID)
-
-dsFunc.elapsedTime(startTime, 'Bitstream list creation time')
-print(bitstreamList)
-
-f = csv.writer(open(filePath + 'deletedBitstreams'
- + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w'))
-f.writerow(['bitstreamID'] + ['delete'])
-for number, bitstreamID in enumerate(bitstreamList):
- bitstreamsRemaining = len(bitstreamList) - number
- print('Bitstreams remaining: ', bitstreamsRemaining, 'bitstreamID: ',
- bitstreamID)
- delete = requests.delete(baseURL + '/rest/bitstreams/' + str(bitstreamID),
- headers=header, cookies=cookies, verify=verify)
- print(delete)
- f.writerow([bitstreamID] + [delete])
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/deleteKeyFromCollection.py b/deleteKeyFromCollection.py
deleted file mode 100644
index 685bbc6..0000000
--- a/deleteKeyFromCollection.py
+++ /dev/null
@@ -1,120 +0,0 @@
-import json
-import requests
-import time
-import csv
-from datetime import datetime
-import urllib3
-import argparse
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-k', '--deletedKey', help='the key to be deleted. '
- 'optional - if not provided, the script will ask for '
- 'input')
-parser.add_argument('-i', '--handle', help='handle of the collection to '
- 'retreive. optional - if not provided, the script will '
- 'ask for input')
-args = parser.parse_args()
-
-if args.deletedKey:
- deletedKey = args.deletedKey
-else:
- deletedKey = input('Enter the key to be deleted: ')
-
-if args.handle:
- handle = args.handle
-else:
- handle = input('Enter collection handle: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-endpoint = baseURL + '/rest/handle/' + handle
-collection = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-collectionID = collection['uuid']
-collSels = '&collSel[]=' + collectionID
-
-f = csv.writer(open(filePath + 'deletedValues'
- + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w'))
-f.writerow(['handle'] + ['deletedValue'] + ['delete'] + ['post'])
-offset = 0
-recordsEdited = 0
-items = ''
-itemLinks = []
-while items != []:
- endpoint = baseURL + '/rest/filtered-items?query_field[]=' + deletedKey
- endpoint += '&query_op[]=exists&query_val[]=' + collSels
- endpoint += '&limit=200&offset=' + str(offset)
- print(endpoint)
- response = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
- items = response['items']
- for item in items:
- itemMetadataProcessed = []
- itemLink = item['link']
- itemLinks.append(itemLink)
- offset = offset + 200
- print(offset)
-for itemLink in itemLinks:
- itemMetadataProcessed = []
- print(itemLink)
- metadata = requests.get(baseURL + itemLink + '/metadata', headers=header,
- cookies=cookies, verify=verify).json()
- for l in range(0, len(metadata)):
- metadata[l].pop('schema', None)
- metadata[l].pop('element', None)
- metadata[l].pop('qualifier', None)
- languageValue = metadata[l]['language']
- if metadata[l]['key'] == deletedKey:
- provNote = '\'' + deletedKey
- provNote += '\' was deleted through a batch process on '
- provNote += datetime.now().strftime('%Y-%m-%d %H:%M:%S')
- provNote += '.'
- provNoteElement = {}
- provNoteElement['key'] = 'dc.description.provenance'
- provNoteElement['value'] = provNote
- provNoteElement['language'] = 'en_US'
- itemMetadataProcessed.append(provNoteElement)
- else:
- itemMetadataProcessed.append(metadata[l])
- recordsEdited = recordsEdited + 1
- itemMetadataProcessed = json.dumps(itemMetadataProcessed)
- print('updated', itemLink, recordsEdited)
- delete = requests.delete(baseURL + itemLink + '/metadata', headers=header,
- cookies=cookies, verify=verify)
- print(delete)
- post = requests.put(baseURL + itemLink + '/metadata', headers=header,
- cookies=cookies, verify=verify,
- data=itemMetadataProcessed)
- print(post)
- f.writerow([itemLink] + [deletedKey] + [delete] + [post])
-
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/deleteKeyFromCommunity.py b/deleteKeyFromCommunity.py
deleted file mode 100644
index a57c777..0000000
--- a/deleteKeyFromCommunity.py
+++ /dev/null
@@ -1,126 +0,0 @@
-import json
-import requests
-import time
-import csv
-from datetime import datetime
-import urllib3
-import argparse
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-k', '--deletedKey', help='the key to be deleted. '
- 'optional - if not provided, the script will ask for '
- 'input')
-parser.add_argument('-i', '--handle', help='handle of the community to '
- 'retreive. optional - if not provided, the script will '
- 'ask for input')
-args = parser.parse_args()
-
-if args.deletedKey:
- deletedKey = args.deletedKey
-else:
- deletedKey = input('Enter the key to be deleted: ')
-
-if args.handle:
- handle = args.handle
-else:
- handle = input('Enter collection handle: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-endpoint = baseURL + '/rest/handle/' + handle
-community = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-communityID = community['uuid']
-collections = requests.get(baseURL + '/rest/communities/' + str(communityID)
- + '/collections', headers=header, cookies=cookies,
- verify=verify).json()
-collSels = ''
-for j in range(0, len(collections)):
- collectionID = collections[j]['uuid']
- collSel = '&collSel[]=' + collectionID
- collSels = collSels + collSel
-
-f = csv.writer(open(filePath + 'deletedValues'
- + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w'))
-f.writerow(['handle'] + ['deletedValue'] + ['delete'] + ['post'])
-offset = 0
-recordsEdited = 0
-items = ''
-itemLinks = []
-while items != []:
- endpoint = baseURL + '/rest/filtered-items?query_field[]=' + deletedKey
- endpoint += '&query_op[]=exists&query_val[]=' + collSels
- endpoint += '&limit=200&offset=' + str(offset)
- print(endpoint)
- response = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
- items = response['items']
- for item in items:
- itemMetadataProcessed = []
- itemLink = item['link']
- itemLinks.append(itemLink)
- offset = offset + 200
- print(offset)
-for itemLink in itemLinks:
- itemMetadataProcessed = []
- print(itemLink)
- metadata = requests.get(baseURL + itemLink + '/metadata', headers=header,
- cookies=cookies, verify=verify).json()
- for l in range(0, len(metadata)):
- metadata[l].pop('schema', None)
- metadata[l].pop('element', None)
- metadata[l].pop('qualifier', None)
- languageValue = metadata[l]['language']
- if metadata[l]['key'] == deletedKey:
- provNote = '\'' + deletedKey
- provNote += '\' was deleted through a batch process on '
- provNote += datetime.now().strftime('%Y-%m-%d %H:%M:%S')
- provNote += '.'
- provNoteElement = {}
- provNoteElement['key'] = 'dc.description.provenance'
- provNoteElement['value'] = provNote
- provNoteElement['language'] = 'en_US'
- itemMetadataProcessed.append(provNoteElement)
- else:
- itemMetadataProcessed.append(metadata[l])
- recordsEdited = recordsEdited + 1
- itemMetadataProcessed = json.dumps(itemMetadataProcessed)
- print('updated', itemLink, recordsEdited)
- delete = requests.delete(baseURL + itemLink + '/metadata', headers=header,
- cookies=cookies, verify=verify)
- print(delete)
- post = requests.put(baseURL + itemLink + '/metadata', headers=header,
- cookies=cookies, verify=verify,
- data=itemMetadataProcessed)
- print(post)
- f.writerow([itemLink] + [deletedKey] + [delete] + [post])
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/deleteKeyValuePairFromCollection.py b/deleteKeyValuePairFromCollection.py
deleted file mode 100644
index a27a851..0000000
--- a/deleteKeyValuePairFromCollection.py
+++ /dev/null
@@ -1,130 +0,0 @@
-import json
-import requests
-import time
-import csv
-from datetime import datetime
-import urllib3
-import argparse
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-k', '--deletedKey', help='the key to be deleted. '
- 'optional - if not provided, the script will ask for '
- 'input')
-parser.add_argument('-v', '--deletedValue', help='the value to be deleted. '
- 'optional - if not provided, the script will ask for '
- 'input')
-parser.add_argument('-i', '--handle', help='handle of the community to '
- 'retreive. optional - if not provided, the script will '
- 'ask for input')
-args = parser.parse_args()
-
-if args.deletedKey:
- deletedKey = args.deletedKey
-else:
- deletedKey = input('Enter the key to be deleted: ')
-if args.deletedValue:
- deletedValue = args.deletedValue
-else:
- deletedValue = input('Enter the value to be deleted: ')
-if args.handle:
- handle = args.handle
-else:
- handle = input('Enter collection handle: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-endpoint = baseURL + '/rest/handle/' + handle
-collection = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-collectionID = collection['uuid']
-collSels = '&collSel[]=' + collectionID
-
-f = csv.writer(open(filePath + 'deletedKey'
- + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w'))
-f.writerow(['itemID'] + ['deletedKey'] + ['deletedValue'] + ['delete']
- + ['post'])
-recordsEdited = 0
-offset = 0
-items = ''
-itemLinks = []
-while items != []:
- endpoint = baseURL + '/rest/filtered-items?query_field[]=' + deletedKey
- endpoint += '&query_op[]=exists&query_val[]=' + collSels
- endpoint += '&limit=200&offset=' + str(offset)
- print(endpoint)
- response = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
- items = response['items']
- for item in items:
- itemMetadataProcessed = []
- itemLink = item['link']
- itemLinks.append(itemLink)
- offset = offset + 200
- print(offset)
-for itemLink in itemLinks:
- itemMetadataProcessed = []
- print(itemLink)
- metadata = requests.get(baseURL + itemLink + '/metadata', headers=header,
- cookies=cookies, verify=verify).json()
- for l in range(0, len(metadata)):
- metadata[l].pop('schema', None)
- metadata[l].pop('element', None)
- metadata[l].pop('qualifier', None)
- key = metadata[l]['key']
- value = metadata[l]['value']
- if key == deletedKey and value == deletedValue:
- provNote = '\'' + deletedKey + ':' + deletedValue
- provNote += '\' was deleted through a batch process on '
- provNote += datetime.now().strftime('%Y-%m-%d %H:%M:%S')
- provNote += '.'
- provNoteElement = {}
- provNoteElement['key'] = 'dc.description.provenance'
- provNoteElement['value'] = provNote
- provNoteElement['language'] = 'en_US'
- itemMetadataProcessed.append(provNoteElement)
- else:
- itemMetadataProcessed.append(metadata[l])
- if itemMetadataProcessed != metadata:
- recordsEdited = recordsEdited + 1
- itemMetadataProcessed = json.dumps(itemMetadataProcessed)
- print('updated', itemLink, recordsEdited)
- delete = requests.delete(baseURL + itemLink + '/metadata',
- headers=header, cookies=cookies,
- verify=verify)
- print(delete)
- post = requests.put(baseURL + itemLink + '/metadata', headers=header,
- cookies=cookies, verify=verify,
- data=itemMetadataProcessed)
- print(post)
- f.writerow([itemLink] + [deletedKey] + [deletedValue] + [delete]
- + [post])
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/dsFunc.py b/dsFunc.py
deleted file mode 100644
index f3f7cac..0000000
--- a/dsFunc.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import datetime
-import time
-import requests
-
-
-def auth(email, password, baseURL, verify):
- """Authenticate the user to the DSpace API."""
- data = {'email': email, 'password': password}
- header = {'content-type': 'application/json',
- 'accept': 'application/json'}
- session = requests.post(baseURL + '/rest/login', headers=header,
- verify=verify,
- params=data).cookies['JSESSIONID']
- cookies = {'JSESSIONID': session}
- return(cookies, header)
-
-
-def authConfirm(cookies, baseURL, header, verify):
- """Confirm user was successfully authenticated to the DSpace API."""
- status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
- uName = status['fullname']
- authEmail = status['email']
- print('authenticated', uName, authEmail)
- return(uName, authEmail)
-
-
-def elapsedTime(startTime, label):
- """Generate elapsed time."""
- td = datetime.timedelta(seconds=time.time() - startTime)
- print(label + ': {}'.format(td))
- return td
-
-
-def instSelect(instance):
- """Select secrets.py file for the appropriate DSpace instance."""
- if instance != '':
- try:
- secrets = __import__(instance)
- print('Editing ' + secrets.baseURL)
- except ImportError:
- secrets = __import__('secrets')
- print('Editing ' + secrets.baseURL)
- else:
- secrets = __import__('secrets')
- print('Editing ' + secrets.baseURL)
-
- return secrets
diff --git a/data/.keep b/dsaps/__init__.py
similarity index 100%
rename from data/.keep
rename to dsaps/__init__.py
diff --git a/dsaps/cli.py b/dsaps/cli.py
new file mode 100644
index 0000000..087fb6c
--- /dev/null
+++ b/dsaps/cli.py
@@ -0,0 +1,162 @@
+import csv
+import datetime
+import json
+import logging
+import os
+import time
+
+import click
+import structlog
+
+from dsaps.models import Client, Collection
+from dsaps import helpers
+
+logger = structlog.get_logger()
+
+
+def validate_path(ctx, param, value):
+ """Validates th formatting of The submitted path"""
+ if value[-1] == '/':
+ return value
+ else:
+ raise click.BadParameter('Include / at the end of the path.')
+
+
+@click.group(chain=True)
+@click.option('--url', envvar='DSPACE_URL', required=True,)
+@click.option('-e', '--email', envvar='TEST_EMAIL', required=True,
+ help='The email of the user for authentication.')
+@click.option('-p', '--password', envvar='TEST_PASS', required=True,
+ hide_input=True, help='The password for authentication.')
+@click.pass_context
+def main(ctx, url, email, password):
+ ctx.obj = {}
+ if os.path.isdir('logs') is False:
+ os.mkdir('logs')
+ dt = datetime.datetime.utcnow().isoformat(timespec='seconds')
+ log_suffix = f'{dt}.log'
+ structlog.configure(processors=[
+ structlog.stdlib.filter_by_level,
+ structlog.stdlib.add_log_level,
+ structlog.stdlib.PositionalArgumentsFormatter(),
+ structlog.processors.TimeStamper(fmt="iso"),
+ structlog.processors.JSONRenderer()
+ ],
+ context_class=dict,
+ logger_factory=structlog.stdlib.LoggerFactory())
+ logging.basicConfig(format="%(message)s",
+ handlers=[logging.FileHandler(f'logs/log-{log_suffix}',
+ 'w')],
+ level=logging.INFO)
+ logger.info('Application start')
+ client = Client(url)
+ client.authenticate(email, password)
+ start_time = time.time()
+ ctx.obj['client'] = client
+ ctx.obj['start_time'] = start_time
+ ctx.obj['log_suffix'] = log_suffix
+
+
+@main.command()
+@click.option('-m', '--metadata-csv', required=True,
+ type=click.Path(exists=True, file_okay=True, dir_okay=False),
+ help='The path to the CSV file of metadata for the items.')
+@click.option('-f', '--field-map', required=True,
+ type=click.Path(exists=True, file_okay=True, dir_okay=False),
+ help='The path to JSON field mapping file.')
+@click.option('-d', '--content-directory', required=True,
+ type=click.Path(exists=True, dir_okay=True, file_okay=False),
+ help='The full path to the content, either a directory of files '
+ 'or a URL for the storage location.')
+@click.option('-t', '--file-type',
+ help='The file type to be uploaded, if limited to one file '
+ 'type.', default='*')
+@click.option('-r', '--ingest-report', is_flag=True,
+ help='Create ingest report for updating other systems.')
+@click.option('-c', '--collection-handle',
+ help='The handle of the collection to which items are being '
+ 'added.', default=None)
+@click.pass_context
+def additems(ctx, metadata_csv, field_map, content_directory, file_type,
+ ingest_report, collection_handle):
+ """Adds items to a specified collection from a metadata CSV, a field
+ mapping file, and a directory of files. May be run in conjunction with the
+ newcollection CLI command."""
+ client = ctx.obj['client']
+ start_time = ctx.obj['start_time']
+ if 'collection_uuid' not in ctx.obj and collection_handle is None:
+ raise click.UsageError('collection_handle option must be used or '
+ 'additems must be run after newcollection '
+ 'command.')
+ elif 'collection_uuid' in ctx.obj:
+ collection_uuid = ctx.obj['collection_uuid']
+ else:
+ collection_uuid = client.get_uuid_from_handle(collection_handle)
+ with open(metadata_csv, 'r') as csvfile, open(field_map, 'r') as jsonfile:
+ metadata = csv.DictReader(csvfile)
+ mapping = json.load(jsonfile)
+ collection = Collection.from_csv(metadata, mapping)
+ for item in collection.items:
+ item.bitstreams_from_directory(content_directory, file_type)
+ collection.uuid = collection_uuid
+ items = collection.post_items(client)
+ if ingest_report:
+ report_name = metadata_csv.replace('.csv', '-ingest.csv')
+ helpers.create_ingest_report(items, report_name)
+ elapsed_time = datetime.timedelta(seconds=time.time() - start_time)
+ logger.info(f'Total runtime : {elapsed_time}')
+
+
+@main.command()
+@click.option('-c', '--community-handle', required=True,
+ help='The handle of the community in which to create the ,'
+ 'collection.')
+@click.option('-n', '--collection-name', required=True,
+ help='The name of the collection to be created.')
+@click.pass_context
+def newcollection(ctx, community_handle, collection_name):
+ """Posts a new collection to a specified community. Used in conjunction
+ with the additems CLI command to populate the new collection with
+ items."""
+ client = ctx.obj['client']
+ collection_uuid = client.post_coll_to_comm(community_handle,
+ collection_name)
+ ctx.obj['collection_uuid'] = collection_uuid
+
+
+@main.command()
+@click.option('-m', '--metadata-csv', required=True,
+ type=click.Path(exists=True, file_okay=True, dir_okay=False),
+ help='The path of the CSV file of metadata.')
+@click.option('-o', '--output-directory',
+ type=click.Path(exists=True, file_okay=False),
+ default=f'{os.getcwd()}/', callback=validate_path,
+ help='The path of the output files, include / at the end of the '
+ 'path.')
+@click.option('-d', '--content-directory', required=True,
+ help='The full path to the content, either a directory of files '
+ 'or a URL for the storage location.')
+@click.option('-t', '--file-type',
+ help='The file type to be uploaded, if limited to one file '
+ 'type.', default='*')
+def reconcile(metadata_csv, output_directory, content_directory, file_type):
+ """Runs a reconciliation of the specified files and metadata that produces
+ reports of files with no metadata, metadata with no files, metadata
+ matched to files, and an updated version of the metadata CSV with only
+ the records that have matching files."""
+ file_ids = helpers.create_file_list(content_directory, file_type)
+ metadata_ids = helpers.create_metadata_id_list(metadata_csv)
+ metadata_matches = helpers.match_metadata_to_files(file_ids, metadata_ids)
+ file_matches = helpers.match_files_to_metadata(file_ids, metadata_ids)
+ no_files = set(metadata_ids) - set(metadata_matches)
+ no_metadata = set(file_ids) - set(file_matches)
+ helpers.create_csv_from_list(no_metadata, f'{output_directory}no_metadata')
+ helpers.create_csv_from_list(no_files, f'{output_directory}no_files')
+ helpers.create_csv_from_list(metadata_matches,
+ f'{output_directory}metadata_matches')
+ helpers.update_metadata_csv(metadata_csv, output_directory,
+ metadata_matches)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/dsaps/helpers.py b/dsaps/helpers.py
new file mode 100644
index 0000000..c8f4fbd
--- /dev/null
+++ b/dsaps/helpers.py
@@ -0,0 +1,68 @@
+import csv
+import glob
+import os
+
+
+def create_csv_from_list(list_name, output):
+ """Creates CSV file from list content."""
+ with open(f'{output}.csv', 'w') as csvfile:
+ writer = csv.writer(csvfile)
+ writer.writerow(['id'])
+ for item in list_name:
+ writer.writerow([item])
+
+
+def create_file_list(file_path, file_type):
+ """Creates a list of file names."""
+ files = glob.glob(f'{file_path}/**/*.{file_type}', recursive=True)
+ file_list = [os.path.basename(file) for file in files]
+ return file_list
+
+
+def create_ingest_report(items, file_name):
+ """Creates ingest report of other systems' identifiers with a newly created
+ DSpace handle."""
+ with open(f'{file_name}', 'w') as writecsv:
+ writer = csv.writer(writecsv)
+ writer.writerow(['uri', 'link'])
+ for item in items:
+ writer.writerow([item.source_system_identifier]
+ + [f'https://hdl.handle.net/{item.handle}'])
+
+
+def create_metadata_id_list(metadata_csv):
+ """Creates a list of IDs from a metadata CSV"""
+ metadata_ids = []
+ with open(metadata_csv) as csvfile:
+ reader = csv.DictReader(csvfile)
+ metadata_ids = [row['file_identifier'] for row in reader
+ if row['file_identifier'] != '']
+ return metadata_ids
+
+
+def match_files_to_metadata(file_list, metadata_ids):
+ """Creates a list of files matched to metadata records."""
+ file_matches = [file_id for metadata_id in metadata_ids
+ for file_id in file_list
+ if file_id.startswith(metadata_id)]
+ return file_matches
+
+
+def match_metadata_to_files(file_list, metadata_ids):
+ """Creates a list of metadata records matched to files."""
+ metadata_matches = [metadata_id for f in file_list for metadata_id in
+ metadata_ids if f.startswith(metadata_id)]
+ return metadata_matches
+
+
+def update_metadata_csv(metadata_csv, output_directory, metadata_matches):
+ """Creates an updated CSV of metadata records with matching files."""
+ with open(metadata_csv) as csvfile:
+ reader = csv.DictReader(csvfile)
+ upd_md_file_name = f'updated-{os.path.basename(metadata_csv)}'
+ with open(f'{output_directory}{upd_md_file_name}', 'w') as updated_csv:
+ writer = csv.DictWriter(updated_csv, fieldnames=reader.fieldnames)
+ writer.writeheader()
+ for row in reader:
+ if row['file_identifier'] in metadata_matches:
+ writer.writerow(row)
diff --git a/dsaps/models.py b/dsaps/models.py
new file mode 100644
index 0000000..a95f088
--- /dev/null
+++ b/dsaps/models.py
@@ -0,0 +1,237 @@
+from functools import partial
+import glob
+import operator
+import os
+
+import attr
+import requests
+import structlog
+
+Field = partial(attr.ib, default=None)
+Group = partial(attr.ib, default=[])
+
+logger = structlog.get_logger()
+op = operator.attrgetter('name')
+
+
+class Client:
+ def __init__(self, url):
+ header = {'content-type': 'application/json', 'accept':
+ 'application/json'}
+ self.url = url.rstrip('/')
+ self.cookies = None
+ self.header = header
+ logger.info('Initializing client')
+
+ def authenticate(self, email, password):
+ """Authenticate user to DSpace API."""
+ header = self.header
+ data = {'email': email, 'password': password}
+ session = requests.post(f'{self.url}/login', headers=header,
+ params=data).cookies['JSESSIONID']
+ cookies = {'JSESSIONID': session}
+ status = requests.get(f'{self.url}/status', headers=header,
+ cookies=cookies).json()
+ self.user_full_name = status['fullname']
+ self.cookies = cookies
+ self.header = header
+ logger.info(f'Authenticated to {self.url} as 'f'{self.user_full_name}')
+
+ def filtered_item_search(self, key, string, query_type,
+ selected_collections=''):
+ """Performs a search against the filtered items endpoint."""
+ offset = 0
+ items = ''
+ item_links = []
+ while items != []:
+ endpoint = f'{self.url}/filtered-items?'
+ params = {'query_field[]': key, 'query_op[]': query_type,
+ 'query_val[]': string, '&collSel[]':
+ selected_collections, 'limit': 200, 'offset': offset}
+ logger.info(params)
+ response = requests.get(endpoint, headers=self.header,
+ params=params, cookies=self.cookies)
+ logger.info(f'Response url: {response.url}')
+ response = response.json()
+ items = response['items']
+ for item in items:
+ item_links.append(item['link'])
+ offset = offset + 200
+ return item_links
+
+ def get_uuid_from_handle(self, handle):
+ """Retrieves UUID for an object based on its handle."""
+ hdl_endpoint = f'{self.url}/handle/{handle}'
+ rec_obj = requests.get(hdl_endpoint, headers=self.header,
+ cookies=self.cookies).json()
+ return rec_obj['uuid']
+
+ def get_record(self, uuid, rec_type):
+ """Retrieve an individual record of a particular type."""
+ url = f'{self.url}/{rec_type}/{uuid}?expand=all'
+ record = requests.get(url, headers=self.header,
+ cookies=self.cookies).json()
+ if rec_type == 'items':
+ rec_obj = self._pop_inst(Item, record)
+ elif rec_type == 'communities':
+ rec_obj = self._pop_inst(Community, record)
+ elif rec_type == 'collections':
+ rec_obj = self._pop_inst(Collection, record)
+ else:
+ logger.info('Invalid record type.')
+ exit()
+ return rec_obj
+
+ def post_bitstream(self, item_uuid, bitstream):
+ """Posts a bitstream to a specified item and returns the bitstream
+ ID."""
+ endpoint = (f'{self.url}/items/{item_uuid}'
+ f'/bitstreams?name={bitstream.name}')
+ header_upload = {'accept': 'application/json'}
+ data = open(bitstream.file_path, 'rb')
+ response = requests.post(endpoint, headers=header_upload,
+ cookies=self.cookies, data=data).json()
+ bitstream_uuid = response['uuid']
+ return bitstream_uuid
+
+ def post_coll_to_comm(self, comm_handle, coll_name):
+ """Posts a collection to a specified community."""
+ hdl_endpoint = f'{self.url}/handle/{comm_handle}'
+ community = requests.get(hdl_endpoint, headers=self.header,
+ cookies=self.cookies).json()
+ comm_uuid = community['uuid']
+ uuid_endpoint = f'{self.url}/communities/{comm_uuid}/collections'
+ coll_uuid = requests.post(uuid_endpoint, headers=self.header,
+ cookies=self.cookies,
+ json={'name': coll_name}).json()
+ coll_uuid = coll_uuid['uuid']
+ logger.info(f'Collection posted: {coll_uuid}')
+ return coll_uuid
+
+ def post_item_to_collection(self, collection_uuid, item):
+ """Posts item to a specified collection and returns the item ID."""
+ endpoint = f'{self.url}/collections/{collection_uuid}/items'
+ post_response = requests.post(
+ endpoint, headers=self.header, cookies=self.cookies,
+ json={'metadata': attr.asdict(item)['metadata']}).json()
+ item_uuid = post_response['uuid']
+ item_handle = post_response['handle']
+ return item_uuid, item_handle
+
+ def _pop_inst(self, class_type, rec_obj):
+ """Populate class instance with data from record."""
+ fields = [op(field) for field in attr.fields(class_type)]
+ kwargs = {k: v for k, v in rec_obj.items() if k in fields}
+ kwargs['objtype'] = rec_obj['type']
+ if class_type == Community:
+ collections = self._build_uuid_list(rec_obj, kwargs, 'collections')
+ rec_obj['collections'] = collections
+ elif class_type == Collection:
+ items = self._build_uuid_list(rec_obj, 'items')
+ rec_obj['items'] = items
+ rec_obj = class_type(**kwargs)
+ return rec_obj
+
+ def _build_uuid_list(self, rec_obj, children):
+ """Builds a list of the uuids for an object's children."""
+ child_list = []
+ for child in rec_obj[children]:
+ child_list.append(child['uuid'])
+ return child_list
+
+
+@attr.s
+class BaseRecord:
+ uuid = Field()
+ name = Field()
+ handle = Field()
+ link = Field()
+ objtype = Field()
+
+
+@attr.s
+class Collection(BaseRecord):
+ items = Group()
+
+ def post_items(self, client):
+ for item in self.items:
+ item_uuid, item_handle = client.post_item_to_collection(self.uuid,
+ item)
+ item.uuid = item_uuid
+ item.handle = item_handle
+ logger.info(f'Item posted: {item_uuid}')
+ for bitstream in item.bitstreams:
+ bitstream_uuid = client.post_bitstream(item_uuid, bitstream)
+ bitstream.uuid = bitstream_uuid
+ logger.info(f'Bitstream posted: {bitstream_uuid}')
+ yield item
+
+ @classmethod
+ def from_csv(cls, csv_reader, field_map):
+ items = [
+ Item.from_row(row, field_map) for row in csv_reader
+ ]
+ return cls(items=items)
+
+
+@attr.s
+class Community(BaseRecord):
+ collections = Field()
+
+
+@attr.s
+class Item(BaseRecord):
+ metadata = Group()
+ bitstreams = Group()
+ file_identifier = Field()
+ source_system_identifier = Field()
+
+ def bitstreams_from_directory(self, directory, file_type='*'):
+ files = glob.iglob(
+ f'{directory}/**/{self.file_identifier}*.{file_type}',
+ recursive=True
+ )
+ self.bitstreams = [
+ Bitstream(name=os.path.basename(f),
+ file_path=f) for f in files
+ ]
+ self.bitstreams.sort(key=lambda x: x.name)
+
+ @classmethod
+ def from_row(cls, row, field_map):
+ metadata = []
+ for f in field_map:
+ field = row[field_map[f]['csv_field_name']]
+ if f == 'file_identifier':
+ file_identifier = field
+ continue # file_identifier is not included in DSpace metadata
+ if f == 'source_system_identifier':
+ source_system_identifier = field
+ continue # source_system_identifier is not included in DSpace
+ # metadata
+ delimiter = field_map[f]['delimiter']
+ language = field_map[f]['language']
+ if delimiter:
+ metadata.extend([
+ MetadataEntry(key=f, value=v, language=language)
+ for v in field.split(delimiter)
+ ])
+ else:
+ metadata.append(
+ MetadataEntry(key=f, value=field, language=language)
+ )
+ return cls(metadata=metadata, file_identifier=file_identifier,
+ source_system_identifier=source_system_identifier)
+
+
+@attr.s
+class Bitstream():
+ name = Field()
+ file_path = Field()
+
+
+@attr.s
+class MetadataEntry():
+ key = Field()
+ value = Field()
+ language = Field()
diff --git a/editBitstreamsNames.py b/editBitstreamsNames.py
deleted file mode 100644
index ed20863..0000000
--- a/editBitstreamsNames.py
+++ /dev/null
@@ -1,104 +0,0 @@
-import json
-import requests
-import time
-import urllib3
-import csv
-from datetime import datetime
-import argparse
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-f', '--fileName', help='the name of the CSV with the '
- 'bitstream name changes. optional - if not provided, the '
- 'script will ask for input')
-args = parser.parse_args()
-if args.uri:
- fileName = args.fileName
-else:
- fileName = input('Enter file name: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header,
- verify=verify, params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-f = csv.writer(open(filePath + 'editBitstreamName'
- + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w'))
-f.writerow(['itemID'] + ['oldBitstreamName'] + ['newBitstreamName'] + ['post'])
-with open(fileName) as csvfile:
- reader = csv.DictReader(csvfile)
- for row in reader:
- oldValue = row['oldFileId']
- newValue = row['newFileId']
- handle = row['handle']
- endpoint = baseURL + '/rest/handle/' + handle
- item = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
- itemID = str(item['uuid'])
- bitstreams = requests.get(baseURL + '/rest/items/' + itemID
- + '/bitstreams', headers=header,
- cookies=cookies, verify=verify).json()
- for bitstream in bitstreams:
- oldBitstreamName = bitstream['name']
- bitstreamID = bitstream['link']
- updatedBitstream = json.dumps(bitstream)
- print(json.dumps(bitstream))
- updatedBitstream = updatedBitstream.replace(oldValue, newValue)
- post = requests.put(baseURL + bitstreamID, headers=header,
- cookies=cookies, verify=verify,
- data=updatedBitstream)
- print(post)
- f.writerow([itemID] + [oldValue] + [newValue] + [post])
- updatedItemMetadataList = []
- metadata = requests.get(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header,
- cookies=cookies, verify=verify).json()
- for l in range(0, len(metadata)):
- metadata[l].pop('schema', None)
- metadata[l].pop('element', None)
- metadata[l].pop('qualifier', None)
- updatedItemMetadataList.append(metadata[l])
- provNote = 'Bitstream name changed from ' + oldValue + ' to '
- provNote += newValue + ' through a batch process on '
- provNote += datetime.now().strftime('%Y-%m-%d %H:%M:%S')
- provNote += '.'
- provNoteElement = {}
- provNoteElement['key'] = 'dc.description.provenance'
- provNoteElement['value'] = provNote
- provNoteElement['language'] = 'en_US'
- updatedItemMetadataList.append(provNoteElement)
- updatedItemMetadata = json.dumps(updatedItemMetadataList)
- delete = requests.delete(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header,
- cookies=cookies, verify=verify)
- print(delete)
- post = requests.put(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header, cookies=cookies,
- verify=verify, data=updatedItemMetadata)
- print(post)
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/exportCollectionMetadataToCSV.py b/exportCollectionMetadataToCSV.py
deleted file mode 100644
index 8709c3a..0000000
--- a/exportCollectionMetadataToCSV.py
+++ /dev/null
@@ -1,108 +0,0 @@
-import requests
-import time
-import csv
-import urllib3
-import argparse
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-i', '--handle', help='handle of the collection to '
- 'retreive. optional - if not provided, the script will '
- 'ask for input')
-args = parser.parse_args()
-
-if args.handle:
- handle = args.handle
-else:
- handle = input('Enter collection handle: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-# authentication
-startTime = time.time()
-cookies, header = dsFunc.auth(email, password, baseURL, verify)
-
-uName, authEmail = dsFunc.authConfirm(cookies, baseURL, header, verify)
-
-endpoint = baseURL + '/rest/handle/' + handle
-collection = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-collectionID = collection['uuid']
-
-itemList = {}
-offset = 0
-items = ''
-while items != []:
- items = requests.get(baseURL + '/rest/collections/' + str(collectionID)
- + '/items?limit=200&offset=' + str(offset),
- headers=header, cookies=cookies, verify=verify)
- while items.status_code != 200:
- time.sleep(5)
- items = requests.get(baseURL + '/rest/collections/' + str(collectionID)
- + '/items?limit=200&offset=' + str(offset),
- headers=header, cookies=cookies, verify=verify)
- items = items.json()
- for k in range(0, len(items)):
- itemID = items[k]['uuid']
- itemHandle = items[k]['handle']
- itemList[itemID] = itemHandle
- offset = offset + 200
- print(offset)
-
-keyList = []
-for itemID in itemList:
- print(baseURL + '/rest/items/' + str(itemID) + '/metadata')
- metadata = requests.get(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header, cookies=cookies,
- verify=verify).json()
- for metadataElement in metadata:
- key = metadataElement['key']
- if key not in keyList and key != 'dc.description.provenance':
- keyList.append(key)
- print(itemID, key)
-
-keyListHeader = ['itemID']
-keyListHeader = keyListHeader + keyList
-print(keyListHeader)
-f = csv.writer(open(filePath + handle.replace('/', '-') + 'Metadata.csv', 'w'))
-f.writerow(keyListHeader)
-
-itemRows = []
-for itemID in itemList:
- itemRow = dict.fromkeys(keyListHeader, '')
- itemRow['itemID'] = itemID
- print(itemID)
- metadata = requests.get(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header, cookies=cookies,
- verify=verify).json()
- for metadataElement in metadata:
- for key in keyListHeader:
- if metadataElement['key'] == key:
- try:
- value = metadataElement['value'] + '|'
- except ValueError:
- value = '' + '|'
- try:
- itemRow[key] = itemRow[key] + value
- except ValueError:
- itemRow[key] = value
- itemList = []
- for key in keyListHeader:
- itemList.append(itemRow[key][:len(itemRow[key]) - 1])
- f.writerow(itemList)
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/exportSelectedRecordMetadataToCSV.py b/exportSelectedRecordMetadataToCSV.py
deleted file mode 100644
index bedf21c..0000000
--- a/exportSelectedRecordMetadataToCSV.py
+++ /dev/null
@@ -1,97 +0,0 @@
-import requests
-import time
-import csv
-import urllib3
-import argparse
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-f', '--fileName', help='the CSV file of record handles. '
- 'optional - if not provided, the script will ask for '
- 'input')
-args = parser.parse_args()
-
-if args.fileName:
- fileName = filePath + args.fileName
-else:
- fileName = filePath + input('Enter the CSV file of record handles '
- '(including \'.csv\'): ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-# authentication
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-
-handles = []
-with open(fileName) as csvfile:
- reader = csv.DictReader(csvfile)
- for row in reader:
- handles.append(row['handle'])
-
-itemList = []
-for handle in handles:
- endpoint = baseURL + '/rest/handle/' + handle
- item = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
- itemID = item['uuid']
- itemList.append(itemID)
-
-keyList = []
-for itemID in itemList:
- metadata = requests.get(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header, cookies=cookies,
- verify=verify).json()
- for metadataElement in metadata:
- key = metadataElement['key']
- if key not in keyList and key != 'dc.description.provenance':
- keyList.append(key)
- print(itemID, key)
-
-keyListHeader = ['itemID']
-keyListHeader = keyListHeader + keyList
-print(keyListHeader)
-f = csv.writer(open(filePath + 'selectedRecordMetadata.csv', 'w'))
-f.writerow(keyListHeader)
-
-itemRows = []
-for itemID in itemList:
- itemRow = dict.fromkeys(keyListHeader, '')
- itemRow['itemID'] = itemID
- print(itemRow)
- metadata = requests.get(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header, cookies=cookies,
- verify=verify).json()
- for metadataElement in metadata:
- for key in keyListHeader:
- if metadataElement['key'] == key:
- value = metadataElement['value'] + '|'
- try:
- itemRow[key] = itemRow[key] + value
- except ValueError:
- itemRow[key] = value
- print(itemRow)
- for key in keyListHeader:
- itemList.append(itemRow[key][:len(itemRow[key]) - 1])
- f.writerow(itemList)
diff --git a/fileListMetadataReconcile.py b/fileListMetadataReconcile.py
deleted file mode 100644
index 7a648f6..0000000
--- a/fileListMetadataReconcile.py
+++ /dev/null
@@ -1,112 +0,0 @@
-# -*- coding: utf-8 -*-
-import csv
-import time
-import os
-import argparse
-import dsFunc
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-d', '--directory', help='the directory of the files. '
- 'optional - if not provided, the script will ask for '
- 'input')
-parser.add_argument('-f', '--fileNameCSV', help='the metadata CSV file. '
- 'optional - if not provided, the script will ask for '
- 'input')
-parser.add_argument('-e', '--fileExtension', help='the file extension. '
- 'optional - if not provided, the script will ask for '
- 'input')
-args = parser.parse_args()
-
-if args.directory:
- directory = args.directory
-else:
- directory = input('Enter directory (C:/Test/): ')
-if args.fileNameCSV:
- fileNameCSV = args.fileNameCSV
-else:
- fileNameCSV = input('Enter metadata CSV file: ')
-if args.fileExtension:
- fileExtension = args.fileExtension
-else:
- fileExtension = input('Enter file extension: ')
-
-startTime = time.time()
-fileIdentifierList = []
-for root, dirs, files in os.walk(directory, topdown=True):
- for file in files:
- if file.endswith(fileExtension):
- file.replace('.' + fileExtension, '')
- fileIdentifierList.append(file)
-
-dsFunc.elapsedTime(startTime, 'File list creation time')
-
-f = csv.writer(open('collectionfileList.csv', 'w'))
-f.writerow(['fileName'])
-
-for file in fileIdentifierList:
- f.writerow([file])
-
-metadataIdentifierList = []
-f = csv.writer(open('metadataFileList.csv', 'w'))
-f.writerow(['metadataItemID'])
-with open(fileNameCSV) as csvfile:
- reader = csv.DictReader(csvfile)
- for row in reader:
- value = row['fileIdentifier']
- f.writerow([value])
- metadataIdentifierList.append(value)
-
-fileMatches = []
-for fileID in fileIdentifierList:
- for metadataID in metadataIdentifierList:
- if fileID.startswith(metadataID):
- fileMatches.append(fileID)
-
-f = csv.writer(open('filesNotInMetadata.csv', 'w'))
-f.writerow(['fileItemID'])
-filesNotInMetadata = set(fileIdentifierList) - set(fileMatches)
-for file in filesNotInMetadata:
- f.writerow([file])
-
-metadataMatches = []
-for metadataID in metadataIdentifierList:
- for fileID in fileIdentifierList:
- if fileID.startswith(metadataID):
- metadataMatches.append(metadataID)
-
-metadataWithNoFiles = set(metadataIdentifierList) - set(metadataMatches)
-
-with open(fileNameCSV) as csvfile:
- f = csv.writer(open('metadataWithNoFiles.csv', 'w'))
- reader = csv.DictReader(csvfile)
- header = next(reader)
- headerRow = []
- for k, v in header.iteritems():
- headerRow.append(k)
- f.writerow(headerRow)
- for row in reader:
- csvRow = []
- for metadata in metadataWithNoFiles:
- if metadata == row['fileIdentifier']:
- for value in headerRow:
- csvRow.append(row[value])
- f.writerow(csvRow)
-
-with open(fileNameCSV) as csvfile:
- f = csv.writer(open('metadataWithFiles.csv', 'w'))
- reader = csv.DictReader(csvfile)
- header = next(reader)
- headerRow = []
- for k, v in header.iteritems():
- headerRow.append(k)
- f.writerow(headerRow)
- for row in reader:
- csvRow = []
- for metadata in metadataMatches:
- if metadata == row['fileIdentifier']:
- for value in headerRow:
- csvRow.append(row[value])
- f.writerow(csvRow)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/findBogusUris.py b/findBogusUris.py
deleted file mode 100644
index a8c5eb3..0000000
--- a/findBogusUris.py
+++ /dev/null
@@ -1,74 +0,0 @@
-import requests
-import csv
-import time
-import urllib3
-import dsFunc
-import argparse
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-p', '--handlePrefix', help='Enter the handle prefix')
-args = parser.parse_args()
-
-if args.handlePrefix:
- handlePrefix = args.handlePrefix
-else:
- handlePrefix = input('Enter the handle prefix: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header,
- verify=verify, params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-f = csv.writer(open(filePath + 'bogusUris.csv', 'w'))
-f.writerow(['itemID'] + ['uri'])
-offset = 0
-recordsEdited = 0
-items = ''
-while items != []:
- endpoint = baseURL + '/rest/filtered-items?query_field[]='
- endpoint += 'dc.identifier.uri&query_op[]=doesnt_contain'
- endpoint += '&query_val[]=' + handlePrefix
- endpoint += '&limit=200&offset=' + str(offset)
- print(endpoint)
- response = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
- items = response['items']
- for item in items:
- itemMetadataProcessed = []
- itemLink = item['link']
- metadata = requests.get(baseURL + itemLink + '/metadata',
- headers=header, cookies=cookies,
- verify=verify).json()
- for l in range(0, len(metadata)):
- if metadata[l]['key'] == 'dc.identifier.uri':
- uri = str(metadata[l]['value'])
- if uri.startswith(handlePrefix) is False:
- f.writerow([itemLink] + [uri])
- offset = offset + 200
- print(offset)
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/findDuplicateKeys.py b/findDuplicateKeys.py
deleted file mode 100644
index 4350889..0000000
--- a/findDuplicateKeys.py
+++ /dev/null
@@ -1,75 +0,0 @@
-import json
-import requests
-import time
-import csv
-import urllib3
-import argparse
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-k', '--key', help='the key to be searched. optional - '
- 'if not provided, the script will ask for input')
-args = parser.parse_args()
-
-if args.key:
- key = args.key
-else:
- key = input('Enter the key: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-searchString = "\"" + key + "\""
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header,
- verify=verify, params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-f = csv.writer(open(filePath + 'recordsWithDuplicate-' + key + '.csv', 'w'))
-f.writerow(['itemID'])
-offset = 0
-recordsEdited = 0
-items = ''
-while items != []:
- endpoint = baseURL + '/rest/filtered-items?query_field[]=' + key
- endpoint += '&query_op[]=exists&query_val[]=&limit=200&offset='
- endpoint += str(offset)
- print(endpoint)
- response = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
- items = response['items']
- for item in items:
- itemMetadataProcessed = []
- itemLink = item['link']
- metadata = requests.get(baseURL + itemLink + '/metadata',
- headers=header, cookies=cookies,
- verify=verify).json()
- metadata = json.dumps(metadata)
- if metadata.find(searchString) != metadata.rfind(searchString):
- f.writerow([itemLink])
- offset = offset + 200
- print(offset)
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/generateCollectionLevelAbstract.py b/generateCollectionLevelAbstract.py
deleted file mode 100644
index 37262a0..0000000
--- a/generateCollectionLevelAbstract.py
+++ /dev/null
@@ -1,93 +0,0 @@
-import json
-import requests
-import csv
-import argparse
-import urllib3
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-f', '--fileNameCSV', help='the metadata CSV file. '
- 'optional - if not provided, the script will ask for '
- 'input')
-parser.add_argument('-i', '--handle', help='handle of the collection. '
- 'optional - if not provided, the script will ask for '
- 'input')
-args = parser.parse_args()
-
-if args.fileNameCSV:
- fileNameCSV = args.fileNameCSV
-else:
- fileNameCSV = input('Enter the metadata CSV file (including \'.csv\'): ')
-if args.handle:
- handle = args.handle
-else:
- handle = input('Enter collection handle: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-endpoint = baseURL + '/rest/handle/' + handle
-collection = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-collectionID = collection['uuid']
-print(collection)
-
-# Enter abstract text here
-abstractText = ''
-
-seriesTitles = []
-
-with open(fileNameCSV) as csvfile:
- reader = csv.DictReader(csvfile)
- for row in reader:
- seriesTitle = row['Series title']
- if seriesTitle not in seriesTitles:
- seriesTitles.append(seriesTitle)
-
-seriesLinks = ''
-
-for seriesTitle in seriesTitles:
- handleEdited = handle.replace('/', '%2F')
- editedSeriesTitle = seriesTitle.replace(' ', ' + ')
- seriesLink = '
'
- seriesLinks += seriesTitle + ''
- seriesLinks += seriesLink
-
-abstractText = '' + abstractText + '
'
-seriesLinks = ''
-introductoryText = abstractText + seriesLinks
-
-collection['introductoryText'] = introductoryText
-collection = json.dumps(collection)
-print(collection)
-post = requests.put(baseURL + '/rest/collections/' + collectionID,
- headers=header, cookies=cookies, verify=verify,
- data=collection)
-print(post)
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
diff --git a/getBitstreams.py b/getBitstreams.py
deleted file mode 100644
index cbdf467..0000000
--- a/getBitstreams.py
+++ /dev/null
@@ -1,273 +0,0 @@
-import requests
-import time
-import csv
-import urllib3
-import argparse
-import os
-import re
-from six.moves import input
-import dsFunc
-
-
-def main():
- """Define function."""
- # NOTE: this is the secrets file, not a module
- import secrets
-
- # define defaults
- default_response_timeout = 1
- default_limit = 100
-
- # define globals for requests, so we needn't pass too many arguments to our
- # functions
- global header
- global cookies
-
- # begin: argument parsing
- parser = argparse.ArgumentParser()
-
- parser.add_argument('-v', '--verbose', action='store_true',
- help='increase output verbosity')
-
- parser.add_argument('-i', '--handle',
- help='handle of the object to retreive. optional - if '
- 'not provided, the script will ask for input')
-
- # bitstream formats:
- # REM: set number of args
- # ' + ' == 1 or more.
- # '*' == 0 or more.
- # '?' == 0 or 1.
- # An int is an explicit number of arguments to accept.
- parser.add_argument('-f', '--formats', nargs='*',
- help='optional list of bitstream formats. will return ''
- 'all formats if not provided')
-
- parser.add_argument('-b', '--bundles', nargs='*',
- help='optional list of bundles (e.g. ORIGINAL or '
- 'LICENSE). will return all bundles if not provided')
-
- parser.add_argument('-dl', '--download', action='store_true',
- help='download bitstreams (rather than just retreive '
- 'metadata about them). default: false')
-
- parser.add_argument('-rt', '--rtimeout', type=int,
- help='response timeout - number of seconds to wait '
- 'for a response. not a timeout for a download or run '
- 'of the entire script. default: '
- + str(default_response_timeout))
-
- parser.add_argument('-l', '--limit', type=int,
- help='limit to the number of objects to return in a '
- 'given request. default: ' + str(default_limit))
-
- parser.add_argument('-u', '--baseURL',
- help='url of the dspace instance. can be read from '
- 'the secrets file')
-
- parser.add_argument('-e', '--email',
- help='email of an authorized dspace user. can be '
- 'read from the secrets file')
-
- parser.add_argument('-p', '--password',
- help='password of an authorized dspace user. can be '
- 'read from the secrets file')
-
- parser.add_argument('-d', '--filePath',
- help='directory into which output files will be '
- 'written. can be read from the secrets file')
-
- parser.add_argument('-s', '--verify',
- help='ssl verification enabled (boolean) OR the path '
- 'to a CA_BUNDLE file or directory with certificates '
- 'of trusted CAs. use false if using an ssh tunnel to '
- 'connect to the dspace api. can be read from the '
- secrets file')
-
- args = parser.parse_args()
-
- inst = input('To edit production server, enter the name of the secrets '
- 'file: ')
-
- secrets = dsFunc.instSelect(inst)
-
- baseURL = secrets.baseURL
- email = secrets.email
- password = secrets.password
- filePath = secrets.filePath
- verify = secrets.verify
- skipColl = secrets.skipColl
-
- if not args.rtimeout:
- args.rtimeout = default_response_timeout
-
- if not args.limit:
- args.limit = default_limit
-
- if not args.baseURL:
- args.baseURL = secrets.baseURL
-
- if not args.email:
- args.email = secrets.email
-
- if not args.password:
- args.password = secrets.password
-
- if not args.filePath:
- args.filePath = secrets.filePath
-
- if not args.verify:
- args.verify = secrets.verify
-
- if args.handle:
- handle = args.handle
- else:
- handle = input('Enter handle: ')
-
- if args.verbose:
- print('verbosity turned on')
-
- if args.handle:
- print('retreiving object with handle {}'.format(args.handle))
-
- if args.formats:
- print('filtering results to the following bitstream '
- 'formats: {}'.format(args.formats))
- else:
- print('returning bitstreams of any format')
-
- if args.bundles:
- print('filtering results to the following bundles: '
- '{}'.format(args.bundles))
- else:
- print('returning bitstreams from any bundle')
-
- if args.download:
- print('downloading bitstreams')
-
- if args.rtimeout:
- print('response_timeout set to {}'.format(args.rtimeout))
-
- # end: argument parsing
-
- urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
- startTime = time.time()
- data = {'email': args.email, 'password': args.password}
- header = {'content-type': 'application/json', 'accept': 'application/json'}
- session = requests.post(args.baseURL + '/rest/login', headers=header,
- verify=args.verify, params=data,
- timeout=args.rtimeout).cookies['JSESSIONID']
- cookies = {'JSESSIONID': session}
- print('authenticated')
-
- # NOTE: expanding items (of collections) and bitstreams (of items) to get
- # the count
- endpoint = args.baseURL + '/rest/handle/' + handle
- endpoint += '?expand=items,bitstreams'
- dsObject = requests.get(endpoint, headers=header, cookies=cookies,
- verify=args.verify, timeout=args.rtimeout)
- dsObject.raise_for_status() # ensure we notice bad responses
- dsObject = dsObject.json()
- if args.verbose:
- print(dsObject)
- dsObjectID = dsObject['uuid']
- # TODO: extend
- if dsObject['type'] == 'collection':
- if args.verbose:
- print(dsObject['type'])
-
- itemCount = len(dsObject['items'])
- print('{} items'.format(itemCount))
- for collItem in dsObject['items']:
- endpoint = args.baseURL + collItem['link'] + '?expand=bitstreams'
- item = requests.get(endpoint, headers=header, cookies=cookies,
- verify=args.verify, timeout=args.rtimeout)
- item.raise_for_status() # ensure we notice bad responses
- item = item.json()
- processItem(item, args)
-
- elif dsObject['type'] == 'item':
- processItem(dsObject, args)
-
- else:
- print('object is of an invalid type for this script ({}). please '
- 'enter the handle of an item or a '
- 'collection.'.format(dsObject['type']))
-
- logout = requests.post(args.baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=args.verify,
- timeout=args.rtimeout)
-
- elapsedTime = time.time() - startTime
- m, s = divmod(elapsedTime, 60)
- h, m = divmod(m, 60)
- print('Total script run time: {:01.0f}:{:02.0f}:{:02.0f}'.format(h, m, s))
-
-
-def processItem(dsObject, args):
- if args.verbose:
- print(dsObject['type'])
-
- itemHandle = dsObject['handle']
- handleID = re.sub(r'.*\/', '', itemHandle)
- itemPath = args.filePath + '/' + handleID + '/'
- if not os.path.exists(itemPath):
- os.makedirs(itemPath)
-
- f = csv.writer(open(itemPath + handleID + '_bitstreams.csv', 'w'))
- f.writerow(['sequenceId'] + ['name'] + ['format'] + ['bundleName'])
-
- itemID = dsObject['uuid']
- bitstreamCount = len(dsObject['bitstreams'])
- dlBitstreams = []
- offset = 0
- limit = args.limit
- bitstreams = ''
- # while bitstreams != []:
- while bitstreamCount > 0:
- # don't retreive more bitstreams than we have left
- if limit > bitstreamCount:
- limit = bitstreamCount
- print('bitstreamCount: {0} offset: {1} '
- 'limit: {2}'.format(bitstreamCount, offset, limit))
- bitstreams = requests.get(args.baseURL + '/rest/items/' + str(itemID)
- + '/bitstreams?limit=' + str(limit)
- + '&offset=' + str(offset), headers=header,
- cookies=cookies, verify=args.verify,
- timeout=args.rtimeout)
- bitstreams.raise_for_status() # ensure we notice bad responses
- bitstreams = bitstreams.json()
- for bitstream in bitstreams:
- if ((args.formats and bitstream['format'] in args.formats
- or not args.formats)
- and (args.bundles and bitstream['bundleName'] in args.bundles
- or not args.bundles)):
- if args.verbose:
- print(bitstream)
- sequenceId = str(bitstream['sequenceId'])
- fileName = bitstream['name']
- fileFormat = bitstream['format']
- bundleName = bitstream['bundleName']
- f.writerow([sequenceId] + [fileName] + [fileFormat]
- + [bundleName])
-
- if args.download:
- dlBitstreams.append(bitstream)
- offset += limit
- bitstreamCount -= limit
-
- for dlBitstream in dlBitstreams:
- if not os.path.isfile(itemPath + dlBitstream['name']):
- response = requests.get(args.baseURL
- + str(dlBitstream['retrieveLink']),
- headers=header, cookies=cookies,
- verify=args.verify, timeout=args.rtimeout)
- response.raise_for_status() # ensure we notice bad responses
- file = open(itemPath + dlBitstream['name'], 'wb')
- file.write(response.content)
- file.close()
-
-
-if __name__ == "__main__":
- main()
diff --git a/getCollectionMetadataJson.py b/getCollectionMetadataJson.py
deleted file mode 100644
index 24b1ace..0000000
--- a/getCollectionMetadataJson.py
+++ /dev/null
@@ -1,63 +0,0 @@
-import json
-import requests
-import time
-import urllib3
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-handle = input('Enter handle: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header,
- verify=verify, params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-endpoint = baseURL + '/rest/handle/' + handle
-collection = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-collectionID = collection['uuid']
-collectionTitle = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-endpoint = baseURL + '/rest/collections/' + str(collectionID) + '/items'
-output = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-
-itemList = []
-for i in range(0, len(output)):
- name = output[i]['name']
- itemID = output[i]['uuid']
- itemList.append(itemID)
-
-f = open(filePath + handle.replace('/', '-') + '.json', 'w')
-metadataGroup = []
-for itemID in itemList:
- metadata = requests.get(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header, cookies=cookies,
- verify=verify).json()
- metadataGroup.append(metadata)
-json.dump(metadataGroup, f)
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/getCompleteAndUniqueValuesForAllKeys.py b/getCompleteAndUniqueValuesForAllKeys.py
deleted file mode 100644
index 9ea91ea..0000000
--- a/getCompleteAndUniqueValuesForAllKeys.py
+++ /dev/null
@@ -1,119 +0,0 @@
-import requests
-import csv
-import time
-import os.path
-from collections import Counter
-from datetime import datetime
-import urllib3
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-date = datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '/'
-filePathComplete = filePath + 'completeValueLists' + date
-filePathUnique = filePath + 'uniqueValueLists' + date
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header,
- verify=verify, params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-collectionIds = []
-endpoint = baseURL + '/rest/communities'
-communities = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-for i in range(0, len(communities)):
- communityID = communities[i]['uuid']
- collections = requests.get(baseURL + '/rest/communities/'
- + str(communityID) + '/collections',
- headers=header, cookies=cookies,
- verify=verify).json()
- for j in range(0, len(collections)):
- collectionID = collections[j]['uuid']
- if collectionID not in skipColl:
- collectionIds.append(collectionID)
-
-os.mkdir(filePathComplete)
-os.mkdir(filePathUnique)
-
-for number, collectionID in enumerate(collectionIds):
- collectionsRemaining = len(collectionIds) - number
- print(collectionID, 'Collections remaining: ', collectionsRemaining)
- collSels = '&collSel[]=' + collectionID
- offset = 0
- recordsEdited = 0
- items = ''
- while items != []:
- setTime = time.time()
- endpoint = baseURL
- + '/rest/filtered-items?query_field[]=*&query_op[]=exists&query_val[]='
- + collSels + '&expand=metadata&limit=20&offset=' + str(offset)
- response = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
- items = response['items']
- for item in items:
- metadata = item['metadata']
- for i in range(0, len(metadata)):
- if metadata[i]['key'] != 'dc.description.provenance':
- key = metadata[i]['key']
- try:
- value = metadata[i]['value']
- except ValueError:
- value = ''
- for i in range(0, len(metadata)):
- if metadata[i]['key'] == 'dc.identifier.uri':
- uri = metadata[i]['value']
- if os.path.isfile(filePathComplete + key
- + 'ValuesComplete.csv') is False:
- f = csv.writer(open(filePathComplete + key
- + 'ValuesComplete.csv', 'w'))
- f.writerow(['handle'] + ['value'])
- f.writerow([uri] + [value])
- else:
- f = csv.writer(open(filePathComplete + key
- + 'ValuesComplete.csv', 'a'))
- f.writerow([uri] + [value])
- offset = offset + 20
- print(offset)
-
- dsFunc.elapsedTime(setTime, 'Set run time')
-
- dsFunc.elapsedTime(startTime, 'Collection run time')
-
-dsFunc.elapsedTime(startTime, 'Complete value list creation time')
-#
-for fileName in os.listdir(filePathComplete):
- reader = csv.DictReader(open(filePathComplete + fileName))
- fileName = fileName.replace('Complete', 'Unique')
- valueList = []
- for row in reader:
- valueList.append(row['value'])
- valueListCount = Counter(valueList)
- f = csv.writer(open(filePathUnique + fileName, 'w'))
- f.writerow(['value'] + ['count'])
- for key, value in valueListCount.items():
- f.writerow([key] + [str(value).zfill(6)])
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/getCompleteAndUniqueValuesForAllKeysInCommunity.py b/getCompleteAndUniqueValuesForAllKeysInCommunity.py
deleted file mode 100644
index 80d37cb..0000000
--- a/getCompleteAndUniqueValuesForAllKeysInCommunity.py
+++ /dev/null
@@ -1,120 +0,0 @@
-import requests
-import csv
-import time
-import os.path
-from collections import Counter
-from datetime import datetime
-import urllib3
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-handle = input('Enter community handle: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-itemList = []
-endpoint = baseURL + '/rest/handle/' + handle
-community = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-communityName = community['name'].replace(' ', '')
-communityID = community['uuid']
-
-date = datetime.now().strftime('%Y-%m-%d %H.%M.%S')
-filePathComplete = filePath + 'completeValueLists' + communityName + date + '/'
-filePathUnique = filePath + 'uniqueValueLists' + communityName + date + '/'
-
-collections = requests.get(baseURL + '/rest/communities/' + str(communityID)
- + '/collections', headers=header, cookies=cookies,
- verify=verify).json()
-for j in range(0, len(collections)):
- collectionID = collections[j]['uuid']
- if collectionID not in skipColl:
- offset = 0
- items = ''
- while items != []:
- items = requests.get(baseURL + '/rest/collections/'
- + str(collectionID)
- + '/items?limit=100&offset=' + str(offset),
- headers=header, cookies=cookies,
- verify=verify)
- while items.status_code != 200:
- time.sleep(5)
- items = requests.get(baseURL + '/rest/collections/'
- + str(collectionID)
- + '/items?limit=100&offset='
- + str(offset), headers=header,
- cookies=cookies, verify=verify)
- items = items.json()
- for k in range(0, len(items)):
- itemID = items[k]['uuid']
- itemList.append(itemID)
- offset = offset + 100
-
-dsFunc.elapsedTime(startTime, 'Item list creation time')
-
-os.mkdir(filePathComplete)
-os.mkdir(filePathUnique)
-for number, itemID in enumerate(itemList):
- itemsRemaining = len(itemList) - number
- print('Items remaining: ', itemsRemaining, 'ItemID: ', itemID)
- metadata = requests.get(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header, cookies=cookies,
- verify=verify).json()
- for l in range(0, len(metadata)):
- if metadata[l]['key'] != 'dc.description.provenance':
- key = metadata[l]['key']
- try:
- value = metadata[l]['value']
- except ValueError:
- value = ''
- fileName = filePathComplete + key + 'ValuesComplete.csv'
- if os.path.isfile(fileName) is False:
- f = csv.writer(open(fileName, 'w'))
- f.writerow(['itemID'] + ['value'])
- f.writerow([itemID] + [value])
- else:
- f = csv.writer(open(filePathComplete + key
- + 'ValuesComplete.csv', 'a'))
- f.writerow([itemID] + [value])
-
-dsFunc.elapsedTime(startTime, 'Complete value list creation time')
-
-for fileName in os.listdir(filePathComplete):
- reader = csv.DictReader(open(filePathComplete + fileName))
- fileName = fileName.replace('Complete', 'Unique')
- valueList = []
- for row in reader:
- valueList.append(row['value'])
- valueListCount = Counter(valueList)
- f = csv.writer(open(filePathUnique + fileName, 'w'))
- f.writerow(['value'] + ['count'])
- for key, value in valueListCount.items():
- f.writerow([key] + [str(value).zfill(6)])
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/getFacultyNamesFromETDs.py b/getFacultyNamesFromETDs.py
deleted file mode 100644
index f1dd4de..0000000
--- a/getFacultyNamesFromETDs.py
+++ /dev/null
@@ -1,98 +0,0 @@
-import requests
-import time
-import csv
-from datetime import datetime
-import urllib3
-import argparse
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-i', '--handle', help='handle of the community to '
- 'retreive. optional - if not provided, the script will '
- 'ask for input')
-args = parser.parse_args()
-
-if args.handle:
- handle = args.handle
-else:
- handle = input('Enter community handle: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-endpoint = baseURL + '/rest/handle/' + handle
-community = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-communityID = community['uuid']
-collections = requests.get(baseURL + '/rest/communities/' + str(communityID)
- + '/collections', headers=header, cookies=cookies,
- verify=verify).json()
-collSels = ''
-for j in range(0, len(collections)):
- collectionID = collections[j]['uuid']
- collSel = '&collSel[]=' + collectionID
- collSels = collSels + collSel
-
-date = datetime.now().strftime('%Y-%m-%d %H.%M.%S')
-
-f = csv.writer(open(filePath + 'EtdFacultyNames' + date + '.csv', 'w'))
-f.writerow(['name'])
-
-nameFields = ['dc.contributor.advisor', 'dc.contributor.committeeMember']
-
-facultyNames = []
-
-offset = 0
-recordsEdited = 0
-items = ''
-while items != []:
- endpoint = baseURL + '/rest/filtered-items?&query_val[]=' + collSels
- endpoint += '&limit=200&offset=' + str(offset)
- print(endpoint)
- response = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
- items = response['items']
- for item in items:
- itemMetadataProcessed = []
- itemLink = item['link']
- metadata = requests.get(baseURL + itemLink + '/metadata',
- headers=header, cookies=cookies,
- verify=verify).json()
- for metadataElement in metadata:
- if metadataElement['key'] in nameFields:
- facultyName = metadataElement['value']
- if facultyName not in facultyNames:
- facultyNames.append(facultyName)
- offset = offset + 200
- print(offset)
-
-for facultyName in facultyNames:
- f.writerow([facultyName])
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/getGlobalLanguageValues.py b/getGlobalLanguageValues.py
deleted file mode 100644
index 0abfc7c..0000000
--- a/getGlobalLanguageValues.py
+++ /dev/null
@@ -1,89 +0,0 @@
-import requests
-import csv
-import time
-import urllib3
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-itemList = []
-endpoint = baseURL + '/rest/communities'
-communities = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-for i in range(0, len(communities)):
- communityID = communities[i]['uuid']
- collections = requests.get(baseURL + '/rest/communities/'
- + str(communityID) + '/collections',
- headers=header, cookies=cookies,
- verify=verify).json()
- for j in range(0, len(collections)):
- collectionID = collections[j]['uuid']
- if collectionID not in skipColl:
- offset = 0
- items = ''
- while items != []:
- items = requests.get(baseURL + '/rest/collections/'
- + str(collectionID)
- + '/items?limit=100&offset='
- + str(offset), headers=header,
- cookies=cookies, verify=verify)
- while items.status_code != 200:
- time.sleep(5)
- items = requests.get(baseURL + '/rest/collections/'
- + str(collectionID)
- + '/items?limit=100&offset='
- + str(offset), headers=header,
- cookies=cookies, verify=verify)
- items = items.json()
- for k in range(0, len(items)):
- itemID = items[k]['uuid']
- itemList.append(itemID)
- offset = offset + 100
-
-dsFunc.elapsedTime(startTime, 'Item list creation time')
-
-valueList = []
-for number, itemID in enumerate(itemList):
- itemsRemaining = len(itemList) - number
- print('Items remaining: ', itemsRemaining, 'ItemID: ', itemID)
- metadata = requests.get(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header, cookies=cookies,
- verify=verify).json()
- for l in range(0, len(metadata)):
- metadataValue = metadata[l]['language']
- if metadataValue not in valueList:
- valueList.append(metadataValue)
-
-f = csv.writer(open(filePath + 'globalLanguageValues.csv', 'w'))
-f.writerow(['language'])
-for m in range(0, len(valueList)):
- f.writerow([valueList[m]])
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/getHandlesAndBitstreamsFromCollection.py b/getHandlesAndBitstreamsFromCollection.py
deleted file mode 100644
index e6d7b26..0000000
--- a/getHandlesAndBitstreamsFromCollection.py
+++ /dev/null
@@ -1,95 +0,0 @@
-import requests
-import time
-import csv
-import urllib3
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-handle = input('Enter handle: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-endpoint = baseURL + '/rest/handle/' + handle
-collection = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-collectionID = collection['uuid']
-collectionTitle = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-itemList = {}
-offset = 0
-items = ''
-while items != []:
- items = requests.get(baseURL + '/rest/collections/' + str(collectionID)
- + '/items?limit=200&offset=' + str(offset),
- headers=header, cookies=cookies, verify=verify)
- while items.status_code != 200:
- time.sleep(5)
- items = requests.get(baseURL + '/rest/collections/'
- + str(collectionID) + '/items?limit=200&offset='
- + str(offset), headers=header, cookies=cookies,
- verify=verify)
- items = items.json()
- for k in range(0, len(items)):
- itemID = items[k]['uuid']
- itemID = '/rest/items/' + itemID
- itemHandle = items[k]['handle']
- itemList[itemID] = itemHandle
- offset = offset + 200
- print(offset)
-
-handle = handle.replace('/', '-')
-f = csv.writer(open(filePath + handle + 'handlesAndBitstreams.csv', 'w'))
-f.writerow(['bitstream'] + ['handle'] + ['title'] + ['date'] + ['description'])
-
-for k, v in itemList.items():
- itemID = k
- itemHandle = v
- print(itemID)
- metadata = requests.get(baseURL + itemID + '/metadata', headers=header,
- cookies=cookies, verify=verify).json()
- title = ''
- date = ''
- description = ''
- for i in range(0, len(metadata)):
- if metadata[i]['key'] == 'dc.title':
- title = metadata[i]['value']
- if metadata[i]['key'] == 'dc.date.issued':
- date = metadata[i]['value']
- if metadata[i]['key'] == 'dc.description.abstract':
- description = metadata[i]['value']
-
- bitstreams = requests.get(baseURL + itemID + '/bitstreams', headers=header,
- cookies=cookies, verify=verify).json()
- for bitstream in bitstreams:
- fileName = bitstream['name']
- fileName.replace('.jpg', '')
- f.writerow([fileName] + [itemHandle] + [title] + [date]
- + [description])
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/getLanguageValuesForKeys.py b/getLanguageValuesForKeys.py
deleted file mode 100644
index a3ba664..0000000
--- a/getLanguageValuesForKeys.py
+++ /dev/null
@@ -1,93 +0,0 @@
-import requests
-import csv
-import time
-import urllib3
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header,
- verify=verify, params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-itemList = []
-endpoint = baseURL + '/rest/communities'
-communities = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-for i in range(0, len(communities)):
- communityID = communities[i]['uuid']
- collections = requests.get(baseURL + '/rest/communities/'
- + str(communityID) + '/collections',
- headers=header, cookies=cookies,
- verify=verify).json()
- for j in range(0, len(collections)):
- collectionID = collections[j]['uuid']
- if collectionID not in skipColl:
- offset = 0
- items = ''
- while items != []:
- items = requests.get(baseURL + '/rest/collections/'
- + str(collectionID)
- + '/items?limit=200&offset='
- + str(offset), headers=header,
- cookies=cookies, verify=verify)
- while items.status_code != 200:
- time.sleep(5)
- items = requests.get(baseURL + '/rest/collections/'
- + str(collectionID)
- + '/items?limit=200&offset='
- + str(offset), headers=header,
- cookies=cookies, verify=verify)
- items = items.json()
- for k in range(0, len(items)):
- itemID = items[k]['uuid']
- itemList.append(itemID)
- offset = offset + 200
-
-dsFunc.elapsedTime(startTime, 'Item list creation time')
-
-valueList = []
-for number, itemID in enumerate(itemList):
- itemsRemaining = len(itemList) - number
- print('Items remaining: ', itemsRemaining, 'ItemID: ', itemID)
- metadata = requests.get(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header, cookies=cookies,
- verify=verify).json()
- for l in range(0, len(metadata)):
- metadataKeyLanguagePair = {}
- metadataKey = metadata[l]['key']
- metadataLanguage = metadata[l]['language']
- metadataKeyLanguagePair[metadataKey] = metadataLanguage
- if metadataKeyLanguagePair not in valueList:
- valueList.append(metadataKeyLanguagePair)
-
-f = csv.writer(open(filePath + 'keyLanguageValues.csv', 'w'))
-f.writerow(['key'] + ['language'])
-for m in range(0, len(valueList)):
- for k, v in valueList[m].iteritems():
- f.writerow([k] + [v])
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/getRecordsAndValuesForKey.py b/getRecordsAndValuesForKey.py
deleted file mode 100644
index af79ccd..0000000
--- a/getRecordsAndValuesForKey.py
+++ /dev/null
@@ -1,77 +0,0 @@
-import requests
-import csv
-import time
-import urllib3
-import argparse
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-k', '--key', help='the key to be searched. optional - '
- 'if not provided, the script will ask for input')
-args = parser.parse_args()
-
-if args.key:
- key = args.key
-else:
- key = input('Enter the key: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-f = csv.writer(open(filePath + 'recordsWith' + key + '.csv', 'w'))
-f.writerow(['itemID'] + ['uri'] + [key])
-offset = 0
-recordsEdited = 0
-items = ''
-itemLinks = []
-while items != []:
- endpoint = baseURL + '/rest/filtered-items?query_field[]=' + key
- endpoint += '&query_op[]=exists&query_val[]=&limit=200&offset='
- endpoint += str(offset)
- print(endpoint)
- response = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
- items = response['items']
- for item in items:
- itemMetadataProcessed = []
- itemLink = item['link']
- itemLinks.append(itemLink)
- offset = offset + 200
- print(offset)
-for itemLink in itemLinks:
- metadata = requests.get(baseURL + itemLink + '/metadata', headers=header,
- cookies=cookies, verify=verify).json()
- for l in range(0, len(metadata)):
- if metadata[l]['key'] == key:
- metadataValue = metadata[l]['value']
- for l in range(0, len(metadata)):
- if metadata[l]['key'] == 'dc.identifier.uri':
- uri = metadata[l]['value']
- f.writerow([itemLink] + [uri] + [metadataValue])
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/getRecordsAndValuesForKeyInCollection.py b/getRecordsAndValuesForKeyInCollection.py
deleted file mode 100644
index 3f2b005..0000000
--- a/getRecordsAndValuesForKeyInCollection.py
+++ /dev/null
@@ -1,92 +0,0 @@
-import requests
-import csv
-import time
-import urllib3
-import argparse
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-k', '--key', help='the key to be searched. optional - '
- 'if not provided, the script will ask for input')
-parser.add_argument('-i', '--handle', help='handle of the collection to '
- 'retreive. optional - if not provided, the script will '
- 'ask for input')
-args = parser.parse_args()
-
-if args.key:
- key = args.key
-else:
- key = input('Enter the key: ')
-if args.handle:
- handle = args.handle
-else:
- handle = input('Enter collection handle: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-endpoint = baseURL + '/rest/handle/' + handle
-collection = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-collectionID = collection['uuid']
-collSels = '&collSel[]=' + collectionID
-
-f = csv.writer(open(filePath + 'recordsWith' + key + handle.replace('/', '-')
- + '.csv', 'w'))
-f.writerow(['itemID'] + ['uri'] + [key])
-offset = 0
-recordsEdited = 0
-items = ''
-itemLinks = []
-while items != []:
- endpoint = baseURL + '/rest/filtered-items?query_field[]=' + key
- endpoint += '&query_op[]=exists&query_val[]=' + collSels
- endpoint += '&limit=200&offset=' + str(offset)
- print(endpoint)
- response = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
- items = response['items']
- for item in items:
- itemMetadataProcessed = []
- itemLink = item['link']
- itemLinks.append(itemLink)
- offset = offset + 200
- print(offset)
-for itemLink in itemLinks:
- metadata = requests.get(baseURL + itemLink + '/metadata', headers=header,
- cookies=cookies, verify=verify).json()
- for l in range(0, len(metadata)):
- if metadata[l]['key'] == key:
- metadataValue = metadata[l]['value']
- for l in range(0, len(metadata)):
- if metadata[l]['key'] == 'dc.identifier.uri':
- uri = metadata[l]['value']
- f.writerow([itemLink] + [uri] + [metadataValue])
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/getRecordsWithKeyAndValue.py b/getRecordsWithKeyAndValue.py
deleted file mode 100644
index 335e404..0000000
--- a/getRecordsWithKeyAndValue.py
+++ /dev/null
@@ -1,84 +0,0 @@
-import requests
-import csv
-import time
-import urllib3
-import argparse
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-k', '--key', help='the key to be searched. optional - '
- 'if not provided, the script will ask for input')
-parser.add_argument('-v', '--value', help='the value to be searched. optional '
- '- if not provided, the script will ask for input')
-args = parser.parse_args()
-
-if args.key:
- key = args.key
-else:
- key = input('Enter the key: ')
-if args.value:
- value = args.value
-else:
- value = input('Enter the value: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-f = csv.writer(open(filePath + 'Key=' + key + ' Value=' + value + '.csv', 'w'))
-f.writerow(['itemID'] + ['uri'] + ['key'] + ['value'])
-offset = 0
-recordsEdited = 0
-items = ''
-itemLinks = []
-while items != []:
- endpoint = baseURL + '/rest/filtered-items?query_field[]=' + key
- endpoint += '&query_op[]=equals&query_val[]=' + value
- endpoint += '&limit=200&offset=' + str(offset)
- print(endpoint)
- response = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
- items = response['items']
- for item in items:
- itemMetadataProcessed = []
- itemLink = item['link']
- itemLinks.append(itemLink)
- offset = offset + 200
- print(offset)
-for itemLink in itemLinks:
- metadata = requests.get(baseURL + itemLink + '/metadata', headers=header,
- cookies=cookies, verify=verify).json()
- for i in range(0, len(metadata)):
- if metadata[i]['key'] == key and metadata[i]['value'] == value:
- metadataValue = metadata[i]['value']
- for i in range(0, len(metadata)):
- if metadata[i]['key'] == 'dc.identifier.uri':
- uri = metadata[i]['value']
- f.writerow([itemLink] + [uri] + [key] + [metadataValue])
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/identifyItemsMissingKeyInCommunity.py b/identifyItemsMissingKeyInCommunity.py
deleted file mode 100644
index ffa7d0a..0000000
--- a/identifyItemsMissingKeyInCommunity.py
+++ /dev/null
@@ -1,97 +0,0 @@
-import requests
-import time
-import csv
-from datetime import datetime
-import urllib3
-import argparse
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-k', '--key', help='the key to be searched. optional - '
- 'if not provided, the script will ask for input')
-parser.add_argument('-i', '--handle', help='handle of the community to '
- 'retreive. optional - if not provided, the script will '
- 'ask for input')
-args = parser.parse_args()
-
-if args.key:
- key = args.key
-else:
- key = input('Enter the key to be searched: ')
-
-if args.handle:
- handle = args.handle
-else:
- handle = input('Enter collection handle: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header,
- verify=verify, params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-endpoint = baseURL + '/rest/handle/' + handle
-community = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-communityID = community['uuid']
-collections = requests.get(baseURL + '/rest/communities/' + str(communityID)
- + '/collections', headers=header, cookies=cookies,
- verify=verify).json()
-collSels = ''
-for j in range(0, len(collections)):
- collectionID = collections[j]['uuid']
- collSel = '&collSel[]=' + collectionID
- collSels = collSels + collSel
-
-date = datetime.now().strftime('%Y-%m-%d %H.%M.%S')
-f = csv.writer(open(filePath + 'recordsMissing' + key + date + '.csv', 'w'))
-f.writerow(['itemID'] + ['key'])
-offset = 0
-recordsEdited = 0
-items = ''
-itemLinks = []
-while items != []:
- endpoint = baseURL + '/rest/filtered-items?query_field[]=' + key
- endpoint += '&query_op[]=doesnt_exist&query_val[]=' + collSels
- endpoint += '&limit=200&offset=' + str(offset)
- print(endpoint)
- response = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
- items = response['items']
- for item in items:
- itemMetadataProcessed = []
- itemLink = item['link']
- offset = offset + 200
- print(offset)
-for itemLink in itemLinks:
- metadata = requests.get(baseURL + itemLink + '/metadata', headers=header,
- cookies=cookies, verify=verify).json()
- for metadataElement in metadata:
- itemMetadataProcessed.append(metadataElement['key'])
- if key not in itemMetadataProcessed:
- f.writerow([itemLink])
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/metadataCollectionsKeysMatrix.py b/metadataCollectionsKeysMatrix.py
deleted file mode 100644
index 7d1bc45..0000000
--- a/metadataCollectionsKeysMatrix.py
+++ /dev/null
@@ -1,154 +0,0 @@
-import requests
-import time
-import csv
-import urllib3
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-# authentication
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-endpoint = baseURL + '/rest/communities'
-communities = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-
-# create list of all item IDs
-itemList = []
-endpoint = baseURL + '/rest/communities'
-communities = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-for i in range(0, len(communities)):
- communityID = communities[i]['uuid']
- collections = requests.get(baseURL + '/rest/communities/'
- + str(communityID) + '/collections',
- headers=header, cookies=cookies,
- verify=verify).json()
- for j in range(0, len(collections)):
- collectionID = collections[j]['uuid']
- print(collectionID)
- if collectionID not in skipColl:
- offset = 0
- items = ''
- while items != []:
- items = requests.get(baseURL + '/rest/collections/'
- + str(collectionID)
- + '/items?limit=200&offset='
- + str(offset), headers=header,
- cookies=cookies, verify=verify)
- while items.status_code != 200:
- time.sleep(5)
- items = requests.get(baseURL + '/rest/collections/'
- + str(collectionID)
- + '/items?limit=200&offset='
- + str(offset), headers=header,
- cookies=cookies, verify=verify)
- items = items.json()
- for k in range(0, len(items)):
- itemID = items[k]['uuid']
- itemList.append(itemID)
- offset = offset + 200
- print(offset)
-
-dsFunc.elapsedTime(startTime, 'Item list creation time')
-
-# retrieve metadata from all items
-keyList = []
-for itemID in itemList:
- print(itemID)
- metadata = requests.get(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header, cookies=cookies,
- verify=verify).json()
- for i in range(0, len(metadata)):
- key = metadata[i]['key']
- if key not in keyList:
- keyList.append(key)
-
-keyListHeader = ['collectionNameColumn']
-keyList.sort()
-keyListHeader = keyListHeader + keyList
-f = csv.writer(open(filePath + 'collectionsKeysMatrix.csv', 'w'))
-f.writerow(keyListHeader)
-
-for i in range(0, len(communities)):
- communityID = communities[i]['uuid']
- communityName = communities[i]['name']
- collections = requests.get(baseURL + '/rest/communities/'
- + str(communityID) + '/collections',
- headers=header, cookies=cookies,
- verify=verify).json()
- for j in range(0, len(collections)):
- collectionID = collections[j]['uuid']
- if collectionID not in skipColl:
- print('Collection skipped')
- else:
- collectionItemList = []
- collectionName = collections[j]['name']
- fullName = communityName + ' - ' + collectionName
- items = requests.get(baseURL + '/rest/collections/'
- + str(collectionID) + '/items?limit=5000',
- headers=header, cookies=cookies,
- verify=verify)
- while items.status_code != 200:
- time.sleep(5)
- items = requests.get(baseURL + '/rest/collections/'
- + str(collectionID) + '/items?limit=5000',
- headers=header, cookies=cookies,
- verify=verify)
- items = items.json()
- for i in range(0, len(items)):
- itemID = items[i]['uuid']
- collectionItemList.append(itemID)
-
- collectionKeyCount = {}
- for key in keyList:
- collectionKeyCount[key] = 0
- for itemID in collectionItemList:
- print(itemID)
- metadata = requests.get(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header,
- cookies=cookies, verify=verify).json()
- for i in range(0, len(metadata)):
- itemKey = metadata[i]['key']
- for key in keyList:
- if itemKey == key:
- collectionKeyCount[key] += 1
-
- collectionKeyCountList = []
- for k, v in collectionKeyCount.items():
- collectionKeyCountList.append(k + ' ' + str(v))
- collectionKeyCountList.sort()
- updatedCollKeyCountList = []
- for entry in collectionKeyCountList:
- count = entry[entry.index(' ') + 1:]
- updatedCollKeyCountList.append(count)
- fullName = [fullName]
- updatedCollKeyCountList = fullName + updatedCollKeyCountList
- f.writerow(updatedCollKeyCountList)
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/metadataOverview.py b/metadataOverview.py
deleted file mode 100644
index 9da9783..0000000
--- a/metadataOverview.py
+++ /dev/null
@@ -1,151 +0,0 @@
-import requests
-import time
-import csv
-from collections import Counter
-import urllib3
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-# authentication
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-f = csv.writer(open(filePath + 'collectionStats.csv', 'w'))
-f.writerow(['Name'] + ['collectionID'] + ['collectionHandle']
- + ['numberOfItems'])
-
-itemList = []
-endpoint = baseURL + '/rest/communities'
-communities = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-for i in range(0, len(communities)):
- communityID = communities[i]['uuid']
- communityName = communities[i]['name']
- collections = requests.get(baseURL + '/rest/communities/'
- + str(communityID) + '/collections',
- headers=header, cookies=cookies,
- verify=verify).json()
- for j in range(0, len(collections)):
- collectionID = collections[j]['uuid']
- collectionID = collections[j]['uuid']
- numberItems = collections[j]['numberItems']
- collectionName = collections[j]['name']
- collectionHandle = collections[j]['handle']
- fullName = communityName + ' - ' + collectionName
- print(collectionID)
- if collectionID not in skipColl:
- offset = 0
- items = ''
- while items != []:
- f.writerow([fullName] + [collectionID] + [collectionHandle]
- + [str(numberItems).zfill(6)])
- items = requests.get(baseURL + '/rest/collections/'
- + str(collectionID)
- + '/items?limit=200&offset='
- + str(offset), headers=header,
- cookies=cookies, verify=verify)
- while items.status_code != 200:
- time.sleep(5)
- items = requests.get(baseURL + '/rest/collections/'
- + str(collectionID)
- + '/items?limit=200&offset='
- + str(offset), headers=header,
- cookies=cookies, verify=verify)
- items = items.json()
- for k in range(0, len(items)):
- itemID = items[k]['uuid']
- concat = str(communityID) + ':' + str(collectionID) + '|'
- + str(itemID)
- itemList.append(concat)
- offset = offset + 200
- print(offset)
-
-dsFunc.elapsedTime(startTime, 'Item list creation time')
-
-# retrieve metadata from all items
-keyList = []
-dcTypeList = []
-keyCount = []
-f = csv.writer(open(filePath + 'dspaceIDs.csv', 'w'))
-f.writerow(['communityID'] + ['collectionID'] + ['itemID'])
-for concat in itemList:
- communityID = concat[:concat.find(':')]
- collectionID = concat[concat.find(':') + 1:concat.find('|')]
- itemID = concat[concat.find('|') + 1:]
- f.writerow([communityID] + [collectionID] + [itemID])
- concat = concat[:concat.find('|')]
- print(itemID)
- metadata = requests.get(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header, cookies=cookies,
- verify=verify).json()
- for i in range(0, len(metadata)):
- key = metadata[i]['key']
- keyCount.append(key)
- keyConcat = concat + '|' + metadata[i]['key']
- if keyConcat not in keyList:
- keyList.append(keyConcat)
- if metadata[i]['key'] == 'dc.type':
- dcType = metadata[i]['value']
- if dcType not in dcTypeList:
- dcTypeList.append(dcType)
-
-print('writing types')
-f = csv.writer(open(filePath + 'dspaceTypes.csv', 'w'))
-f.writerow(['type'])
-for dcType in dcTypeList:
- f.writerow([dcType])
-
-print('writing global key counts')
-f = csv.writer(open(filePath + 'keyCount.csv', 'w'))
-f.writerow(['key'] + ['count'])
-countDict = Counter(keyCount)
-for key, value in countDict.items():
- f.writerow([key] + [str(value).zfill(6)])
-
-print('writing collection metadata keys')
-f = csv.writer(open(filePath + 'collectionMetadataKeys.csv', 'w'))
-f.writerow(['fullName'] + ['collectionID'] + ['collectionHandle'] + ['key'])
-for concat in keyList:
- communityID = concat[:concat.find(':')]
- collectionID = concat[concat.find(':') + 1:concat.find('|')]
- key = concat[concat.rfind('|') + 1:]
- additionalDataCommunity = requests.get(baseURL + '/rest/communities/'
- + str(communityID), headers=header,
- cookies=cookies,
- verify=verify).json()
- communityName = additionalDataCommunity['name']
- additionalDataCollection = requests.get(baseURL + '/rest/collections/'
- + str(collectionID),
- headers=header, cookies=cookies,
- verify=verify).json()
- collectionName = additionalDataCollection['name']
- collectionHandle = additionalDataCollection['handle']
- fullName = communityName + ' - ' + collectionName
- f.writerow([fullName] + [collectionID] + [collectionHandle] + [key])
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/overwriteExistingMetadata.py b/overwriteExistingMetadata.py
deleted file mode 100644
index ab15143..0000000
--- a/overwriteExistingMetadata.py
+++ /dev/null
@@ -1,116 +0,0 @@
-import json
-import requests
-import time
-import csv
-from datetime import datetime
-import urllib3
-import argparse
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-f', '--fileName', help='the name of the CSV with '
- 'handles and file identifiers. optional - if not '
- 'provided, the script will ask for input')
-args = parser.parse_args()
-if args.fileName:
- fileName = args.fileName
-else:
- fileName = input('Enter file name: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-handleIdDict = {}
-with open(fileName) as csvfile:
- reader = csv.DictReader(csvfile)
- for row in reader:
- fileIdentifier = row['fileId']
- handle = row['handle']
- handleIdDict[fileIdentifier] = handle
-print(handleIdDict)
-id = input('test')
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-collectionMetadata = json.load(open('metadataOverwrite.json'))
-
-f = csv.writer(open(filePath + 'metadataOverwrite'
- + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w'))
-f.writerow(['itemID'] + ['delete'] + ['post'])
-
-for k, v in handleIdDict.items():
- for itemMetadata in collectionMetadata:
- updatedItemMetadata = {}
- updatedItemMetadataList = []
- for element in itemMetadata['metadata']:
- if element['key'] == 'fileIdentifier':
- fileIdentifier = element['value']
- else:
- updatedItemMetadataList.append(element)
- uriElement = {}
- uriElement['key'] = 'dc.identifier.uri'
- uriElement['value'] = 'http://jhir.library.jhu.edu/handle/' + v
- updatedItemMetadataList.append(uriElement)
- provNote = ('Item metadata updated through a batch process on '
- + datetime.now().strftime('%Y-%m-%d %H:%M:%S') + '.')
- provNoteElement = {}
- provNoteElement['key'] = 'dc.description.provenance'
- provNoteElement['value'] = provNote
- provNoteElement['language'] = 'en_US'
- updatedItemMetadataList.append(provNoteElement)
-
- if fileIdentifier == k:
- print(fileIdentifier)
- endpoint = baseURL + '/rest/handle/' + v
- item = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
- itemID = item['uuid']
- metadata = requests.get(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header,
- cookies=cookies, verify=verify).json()
- for l in range(0, len(metadata)):
- metadata[l].pop('schema', None)
- metadata[l].pop('element', None)
- metadata[l].pop('qualifier', None)
- if metadata[l]['key'] == 'dc.description.provenance':
- updatedItemMetadataList.append(metadata[l])
- if metadata[l]['key'] == 'dc.date.available':
- updatedItemMetadataList.append(metadata[l])
- if metadata[l]['key'] == 'dc.date.accessioned':
- updatedItemMetadataList.append(metadata[l])
- updatedItemMetadata = json.dumps(updatedItemMetadataList)
- delete = requests.delete(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header,
- cookies=cookies, verify=verify)
- print(delete)
- post = requests.put(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header, cookies=cookies,
- verify=verify, data=updatedItemMetadata)
- print(post)
- f.writerow([itemID] + [delete] + [post])
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/postCollection.py b/postCollection.py
deleted file mode 100644
index 845e865..0000000
--- a/postCollection.py
+++ /dev/null
@@ -1,215 +0,0 @@
-import json
-import requests
-import datetime
-import time
-import os
-import csv
-import urllib3
-import argparse
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-d', '--directory', help='the directory of the files. '
- 'optional - if not provided, the script will ask for '
- 'input')
-parser.add_argument('-e', '--fileExtension', help='the file extension. '
- 'optional - if not provided, the script will ask for '
- 'input')
-parser.add_argument('-i', '--communityHandle', help='handle of the community. '
- 'optional - if not provided, the script will ask for '
- 'input')
-parser.add_argument('-n', '--collectionName', help='the name of the '
- 'collection. optional - if not provided, the script will '
- 'ask for input')
-args = parser.parse_args()
-
-if args.directory:
- directory = args.directory
-else:
- directory = input('Enter directory (C:/Test/): ')
-if args.fileExtension:
- fileExtension = args.fileExtension
-else:
- fileExtension = input('Enter file extension: ')
-if args.communityHandle:
- communityHandle = args.communityHandle
-else:
- communityHandle = input('Enter community handle: ')
-if args.collectionName:
- collectionName = args.collectionName
-else:
- collectionName = input('Enter collection name: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-headerFileUpload = {'accept': 'application/json'}
-
-# create file list and export csv
-fileList = {}
-for root, dirs, files in os.walk(directory, topdown=True):
- print('building file list')
- for file in files:
- if file.endswith(fileExtension):
- fullFilePath = os.path.join(root, file).replace('\\', '/')
- fileList[file[:file.index('.')]] = fullFilePath
-
-dsFunc.elapsedTime(startTime, 'File list creation time')
-
-f = csv.writer(open(collectionName.replace(' ', '') + 'fileList.csv', 'w'))
-f.writerow(['itemID'])
-
-for k, v in fileList.items():
- f.writerow([v[v.rindex('/') + 1:]])
-
-f2 = open('fileListDict.txt', 'w')
-f2.write(json.dumps(fileList))
-
-# Use this section of code if 'fileListDict.txt' has already been generated and
-# comment out lines 64-83. This is useful if uploading a very large collection
-# as generating the file list will take some time.
-# f3=open('fileListDict.txt', 'r')
-# fileList = json.load(f3)
-
-# Get community ID
-endpoint = baseURL + '/rest/handle/' + communityHandle
-community = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-communityID = str(community['uuid'])
-
-# Post collection
-collection = json.dumps({'name': collectionName})
-post = requests.post(baseURL + '/rest/communities/' + communityID
- + '/collections', headers=header, cookies=cookies,
- verify=verify, data=collection).json()
-collectionID = post['link']
-
-# Post items
-collectionMetadata = json.load(open(directory + '/metadata.json'))
-for itemMetadata in collectionMetadata:
- fileExists = ''
- updatedItemMetadata = {}
- updatedItemMetadataList = []
- for element in itemMetadata['metadata']:
- if element['key'] == 'fileIdentifier':
- fileIdentifier = element['value']
- else:
- updatedItemMetadataList.append(element)
- updatedItemMetadata['metadata'] = updatedItemMetadataList
- updatedItemMetadata = json.dumps(updatedItemMetadata)
- for k in fileList:
- if fileIdentifier in k:
- fileExists = True
- if fileExists is True:
- print(fileIdentifier)
- post = requests.post(baseURL + collectionID + '/items', headers=header,
- cookies=cookies, verify=verify,
- data=updatedItemMetadata).json()
- print(json.dumps(post))
- itemID = post['link']
-
- # #Post bitstream - front and back. Deprecated method
- # for k, v in fileList.items():
- # if k == fileIdentifier + '-Front':
- # bitstream = fileList[k]
- # fileName = bitstream[bitstream.rfind('/') + 1:]
- # data = open(bitstream, 'rb')
- # post = requests.post(baseURL + itemID + '/bitstreams?name='
- # + fileName, headers=headerFileUpload,
- # verify=verify, data=data).json()
- # print(post)
- #
- # for k, v in fileList.items():
- # if k == fileIdentifier + '-Back':
- # bitstream = fileList[k]
- # fileName = bitstream[bitstream.rfind('/') + 1:]
- # data = open(bitstream, 'rb')
- # post = requests.post(baseURL + itemID + '/bitstreams?name='
- # + fileName, headers=headerFileUpload,
- # verify=verify, data=data).json()
- # print(post)
-
- # Post bitstream - starts with file identifier
- for k, v in fileList.items():
- if k.startswith(fileIdentifier):
- bitstream = fileList[k]
- fileName = bitstream[bitstream.rfind('/') + 1:]
- data = open(bitstream, 'rb')
- post = requests.post(baseURL + itemID + '/bitstreams?name='
- + fileName, headers=headerFileUpload,
- cookies=cookies, verify=verify,
- data=data).json()
- print(json.dumps(post))
-
- # Create provenance notes
- provNote = {}
- provNote['key'] = 'dc.description.provenance'
- provNote['language'] = 'en_US'
- utc = datetime.datetime.utcnow()
- utcTime = utc.strftime('%Y-%m-%dT%H:%M:%SZ')
- bitstreams = requests.get(baseURL + itemID + '/bitstreams',
- headers=header, cookies=cookies,
- verify=verify).json()
- bitstreamCount = len(bitstreams)
- provNoteValue = ('Submitted by ' + userFullName + ' (' + email + ')'
- + ' on ' + utcTime + ' (GMT). No. of bitstreams: '
- + str(bitstreamCount))
- for bitstream in bitstreams:
- fileName = bitstream['name']
- size = str(bitstream['sizeBytes'])
- checksum = bitstream['checkSum']['value']
- algorithm = bitstream['checkSum']['checkSumAlgorithm']
- provNoteValue = (provNoteValue + ' ' + fileName + ': ' + size
- + ' bytes, checkSum: ' + checksum + ' ('
- + algorithm + ')')
- provNote['value'] = provNoteValue
-
- provNote2 = {}
- provNote2['key'] = 'dc.description.provenance'
- provNote2['language'] = 'en_US'
-
- provNote2Value = ('Made available in DSpace on ' + utcTime
- + ' (GMT). No. of bitstreams: '
- + str(bitstreamCount))
- for bitstream in bitstreams:
- fileName = bitstream['name']
- size = str(bitstream['sizeBytes'])
- checksum = bitstream['checkSum']['value']
- algorithm = bitstream['checkSum']['checkSumAlgorithm']
- provNote2Value = (provNote2Value + ' ' + fileName + ': ' + size
- + ' bytes, checkSum: ' + checksum + ' ('
- + algorithm + ')')
- provNote2['value'] = provNote2Value
-
- # Post provenance notes
- provNote = json.dumps([provNote, provNote2])
- post = requests.put(baseURL + itemID + '/metadata', headers=header,
- cookies=cookies, verify=verify, data=provNote)
- print(post)
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/removeDuplicateKeyValuePairsFromItems.py b/removeDuplicateKeyValuePairsFromItems.py
deleted file mode 100644
index c1eda9f..0000000
--- a/removeDuplicateKeyValuePairsFromItems.py
+++ /dev/null
@@ -1,124 +0,0 @@
-import json
-import requests
-import time
-import csv
-from datetime import datetime
-import urllib3
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-itemList = []
-endpoint = baseURL + '/rest/communities'
-communities = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-for i in range(0, len(communities)):
- communityID = communities[i]['uuid']
- collections = requests.get(baseURL + '/rest/communities/'
- + str(communityID) + '/collections',
- headers=header, cookies=cookies,
- verify=verify).json()
- for j in range(0, len(collections)):
- collectionID = collections[j]['uuid']
- if collectionID not in skipColl:
- offset = 0
- items = ''
- while items != []:
- items = requests.get(baseURL + '/rest/collections/'
- + str(collectionID)
- + '/items?limit=200&offset='
- + str(offset), headers=header,
- cookies=cookies, verify=verify)
- while items.status_code != 200:
- time.sleep(5)
- items = requests.get(baseURL + '/rest/collections/'
- + str(collectionID)
- + '/items?limit=200&offset='
- + str(offset), headers=header,
- cookies=cookies, verify=verify)
- items = items.json()
- for k in range(0, len(items)):
- itemID = items[k]['uuid']
- itemList.append(itemID)
- offset = offset + 200
-
-dsFunc.elapsedTime(startTime, 'Item list creation time')
-
-f = csv.writer(open(filePath + 'DuplicateKeysRemoved'
- + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w'))
-f.writerow(['itemID'] + ['key:value'])
-for number, itemID in enumerate(itemList):
- itemMetadataProcessed = []
- keyValueList = []
- itemsRemaining = len(itemList) - number
- print('Items remaining: ', itemsRemaining, 'ItemID: ', itemID)
- metadata = requests.get(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header, cookies=cookies,
- verify=verify).json()
- changeRecord = False
- for metadataElement in metadata:
- metadataElement.pop('schema', None)
- metadataElement.pop('element', None)
- metadataElement.pop('qualifier', None)
- key = metadataElement['key']
- try:
- value = metadataElement['value']
- except ValueError:
- value = ''
- if key != 'dc.description.provenance':
- keyValue = {'key': key, 'value': value}
- if keyValue not in keyValueList:
- itemMetadataProcessed.append(metadataElement)
- keyValueList.append(keyValue)
- else:
- f.writerow([itemID] + [keyValue])
- currTime = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
- provNote = 'A duplicate element, \'' + key + ': ' + value
- provNote += ',\' was removed through a batch process'
- provNote += 'on ' + currTime + '.'
- provNoteElement = {}
- provNoteElement['key'] = 'dc.description.provenance'
- provNoteElement['value'] = provNote
- provNoteElement['language'] = 'en_US'
- itemMetadataProcessed.append(provNoteElement)
- changeRecord = True
- if changeRecord is True:
- itemMetadataProcessed = json.dumps(itemMetadataProcessed)
- print(itemID)
- delete = requests.delete(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header,
- cookies=cookies, verify=verify)
- print(delete)
- post = requests.put(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header, cookies=cookies,
- verify=verify, data=itemMetadataProcessed)
- print(post)
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/replaceKey.py b/replaceKey.py
deleted file mode 100644
index 01ffc7e..0000000
--- a/replaceKey.py
+++ /dev/null
@@ -1,120 +0,0 @@
-import json
-import requests
-import time
-import csv
-from datetime import datetime
-import urllib3
-import argparse
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-1', '--replacedKey', help='the key to be replaced. '
- 'optional - if not provided, the script will ask for '
- 'input')
-parser.add_argument('-2', '--replacementKey', help='the replacement key. '
- 'optional - if not provided, the script will ask for '
- 'input')
-args = parser.parse_args()
-
-if args.replacedKey:
- replacedKey = args.replacedKey
-else:
- replacedKey = input('Enter the key to be replaced: ')
-if args.replacementKey:
- replacementKey = args.replacementKey
-else:
- replacementKey = input('Enter the replacement key: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-date = datetime.now().strftime('%Y-%m-%d %H.%M.%S')
-f = csv.writer(open(filePath + 'replaceKey' + date + '.csv', 'w'))
-f.writerow(['itemID'] + ['replacedKey'] + ['replacedValue'] + ['delete']
- + ['post'])
-offset = 0
-recordsEdited = 0
-items = ''
-itemLinks = []
-while items != []:
- endpoint = baseURL + '/rest/filtered-items?query_field[]='
- endpoint += replacedKey
- endpoint += '&query_op[]=exists&query_val[]=&limit=200&offset='
- endpoint += str(offset)
- print(endpoint)
- response = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
- items = response['items']
- for item in items:
- itemMetadataProcessed = []
- itemLink = item['link']
- itemLinks.append(itemLink)
- offset = offset + 200
- print(offset)
-for itemLink in itemLinks:
- itemMetadataProcessed = []
- print(itemLink)
- metadata = requests.get(baseURL + itemLink + '/metadata', headers=header,
- cookies=cookies, verify=verify).json()
- for l in range(0, len(metadata)):
- metadata[l].pop('schema', None)
- metadata[l].pop('element', None)
- metadata[l].pop('qualifier', None)
- if metadata[l]['key'] == replacedKey:
- replacedElement = metadata[l]
- updatedMetadataElement = {}
- updatedMetadataElement['key'] = replacementKey
- updatedMetadataElement['value'] = replacedElement['value']
- updatedMetadataElement['language'] = replacedElement['language']
- print(updatedMetadataElement)
- itemMetadataProcessed.append(updatedMetadataElement)
- provNote = '\'' + replacedKey + '\' was replaced by \''
- provNote += replacementKey
- date = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
- provNote += '\' through a batch process on ' + date + '.'
- provNoteElement = {}
- provNoteElement['key'] = 'dc.description.provenance'
- provNoteElement['value'] = provNote
- provNoteElement['language'] = 'en_US'
- itemMetadataProcessed.append(provNoteElement)
- else:
- if metadata[l] not in itemMetadataProcessed:
- itemMetadataProcessed.append(metadata[l])
- itemMetadataProcessed = json.dumps(itemMetadataProcessed)
- delete = requests.delete(baseURL + itemLink + '/metadata', headers=header,
- cookies=cookies, verify=verify)
- print(delete)
- post = requests.put(baseURL + itemLink + '/metadata', headers=header,
- cookies=cookies, verify=verify,
- data=itemMetadataProcessed)
- print(post)
- f.writerow([itemLink] + [replacedElement['key']]
- + [replacedElement['value']] + [delete] + [post])
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/replaceKeyForCollection.py b/replaceKeyForCollection.py
deleted file mode 100644
index f91e58c..0000000
--- a/replaceKeyForCollection.py
+++ /dev/null
@@ -1,132 +0,0 @@
-import json
-import requests
-import time
-import csv
-from datetime import datetime
-import urllib3
-import argparse
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-1', '--replacedKey', help='the key to be replaced. '
- 'optional - if not provided, the script will ask for '
- 'input')
-parser.add_argument('-2', '--replacementKey', help='the replacement key. '
- 'optional - if not provided, the script will ask for '
- 'input')
-parser.add_argument('-i', '--handle', help='handle of the collection to '
- 'retreive. optional - if not provided, the script will '
- 'ask for input')
-args = parser.parse_args()
-
-if args.replacedKey:
- replacedKey = args.replacedKey
-else:
- replacedKey = input('Enter the key to be replaced: ')
-if args.replacementKey:
- replacementKey = args.replacementKey
-else:
- replacementKey = input('Enter the replacement key: ')
-if args.handle:
- handle = args.handle
-else:
- handle = input('Enter collection handle: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-endpoint = baseURL + '/rest/handle/' + handle
-collection = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-collectionID = collection['uuid']
-collSels = '&collSel[]=' + collectionID
-date = datetime.now().strftime('%Y-%m-%d %H.%M.%S')
-f = csv.writer(open(filePath + 'replaceKey' + date + '.csv', 'w'))
-f.writerow(['itemID'] + ['replacedKey'] + ['replacedValue'] + ['delete']
- + ['post'])
-offset = 0
-recordsEdited = 0
-items = ''
-itemLinks = []
-while items != []:
- endpoint = baseURL + '/rest/filtered-items?query_field[]='
- endpoint += replacedKey + '&query_op[]=exists&query_val[]='
- endpoint += collSels + '&limit=200&offset=' + str(offset)
- print(endpoint)
- response = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
- items = response['items']
- for item in items:
- itemMetadataProcessed = []
- itemLink = item['link']
- itemLinks.append(itemLink)
- offset = offset + 200
- print(offset)
-for itemLink in itemLinks:
- itemMetadataProcessed = []
- print(itemLink)
- metadata = requests.get(baseURL + itemLink + '/metadata', headers=header,
- cookies=cookies, verify=verify).json()
- for l in range(0, len(metadata)):
- metadata[l].pop('schema', None)
- metadata[l].pop('element', None)
- metadata[l].pop('qualifier', None)
- if metadata[l]['key'] == replacedKey:
- replacedElement = metadata[l]
- updatedMetadataElement = {}
- updatedMetadataElement['key'] = replacementKey
- updatedMetadataElement['value'] = replacedElement['value']
- updatedMetadataElement['language'] = replacedElement['language']
- print(updatedMetadataElement)
- itemMetadataProcessed.append(updatedMetadataElement)
- provNote = '\'' + replacedKey + '\' was replaced by \''
- provNote += replacementKey
- provNote += '\' through a batch process on '
- date = datetime.now().strftime('%Y-%m-%d %H:%M:%S') + '.'
- provNote += date
- provNoteElement = {}
- provNoteElement['key'] = 'dc.description.provenance'
- provNoteElement['value'] = provNote
- provNoteElement['language'] = 'en_US'
- itemMetadataProcessed.append(provNoteElement)
- else:
- if metadata[l] not in itemMetadataProcessed:
- itemMetadataProcessed.append(metadata[l])
- itemMetadataProcessed = json.dumps(itemMetadataProcessed)
- delete = requests.delete(baseURL + itemLink + '/metadata', headers=header,
- cookies=cookies, verify=verify)
- print(delete)
- post = requests.put(baseURL + itemLink + '/metadata', headers=header,
- cookies=cookies, verify=verify,
- data=itemMetadataProcessed)
- print(post)
- f.writerow([itemLink] + [replacedElement['key']]
- + [replacedElement['value']] + [delete] + [post])
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/replaceKeyForCommunity.py b/replaceKeyForCommunity.py
deleted file mode 100644
index e98957b..0000000
--- a/replaceKeyForCommunity.py
+++ /dev/null
@@ -1,139 +0,0 @@
-import json
-import requests
-import time
-import csv
-from datetime import datetime
-import urllib3
-import argparse
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-1', '--replacedKey', help='the key to be replaced. '
- 'optional - if not provided, the script will ask for '
- 'input')
-parser.add_argument('-2', '--replacementKey', help='the replacement key. '
- 'optional - if not provided, the script will ask for '
- 'input')
-parser.add_argument('-i', '--handle', help='handle of the community to '
- 'retreive. optional - if not provided, the script will '
- 'ask for input')
-args = parser.parse_args()
-
-if args.replacedKey:
- replacedKey = args.replacedKey
-else:
- replacedKey = input('Enter the key to be replaced: ')
-if args.replacementKey:
- replacementKey = args.replacementKey
-else:
- replacementKey = input('Enter the replacement key: ')
-if args.handle:
- handle = args.handle
-else:
- handle = input('Enter community handle: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-endpoint = baseURL + '/rest/handle/' + handle
-community = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-communityID = community['uuid']
-collections = requests.get(baseURL + '/rest/communities/' + str(communityID)
- + '/collections', headers=header, cookies=cookies,
- verify=verify).json()
-collSels = ''
-for j in range(0, len(collections)):
- collectionID = collections[j]['uuid']
- collSel = '&collSel[]=' + collectionID
- collSels = collSels + collSel
-date = datetime.now().strftime('%Y-%m-%d %H.%M.%S')
-f = csv.writer(open(filePath + 'replaceKey' + date + '.csv', 'w'))
-f.writerow(['itemID'] + ['replacedKey'] + ['replacedValue'] + ['delete']
- + ['post'])
-offset = 0
-recordsEdited = 0
-items = ''
-itemLinks = []
-while items != []:
- endpoint = baseURL + '/rest/filtered-items?query_field[]=' + replacedKey
- endpoint += '&query_op[]=exists&query_val[]=' + collSels
- endpoint += '&limit=200&offset=' + str(offset)
- print(endpoint)
- response = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
- items = response['items']
- for item in items:
- itemMetadataProcessed = []
- itemLink = item['link']
- itemLinks.append(itemLink)
- offset = offset + 200
- print(offset)
-for itemLink in itemLinks:
- itemMetadataProcessed = []
- print(itemLink)
- metadata = requests.get(baseURL + itemLink + '/metadata', headers=header,
- cookies=cookies, verify=verify).json()
- for l in range(0, len(metadata)):
- metadata[l].pop('schema', None)
- metadata[l].pop('element', None)
- metadata[l].pop('qualifier', None)
- if metadata[l]['key'] == replacedKey:
- replacedElement = metadata[l]
- updatedMetadataElement = {}
- updatedMetadataElement['key'] = replacementKey
- updatedMetadataElement['value'] = replacedElement['value']
- updatedMetadataElement['language'] = replacedElement['language']
- print(updatedMetadataElement)
- itemMetadataProcessed.append(updatedMetadataElement)
- provNote = '\'' + replacedKey + '\' was replaced by \''
- provNote += replacementKey
- provNote += '\' through a batch process on '
- date = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
- provNote += date + '.'
- provNoteElement = {}
- provNoteElement['key'] = 'dc.description.provenance'
- provNoteElement['value'] = provNote
- provNoteElement['language'] = 'en_US'
- itemMetadataProcessed.append(provNoteElement)
- else:
- if metadata[l] not in itemMetadataProcessed:
- itemMetadataProcessed.append(metadata[l])
- itemMetadataProcessed = json.dumps(itemMetadataProcessed)
- delete = requests.delete(baseURL + itemLink + '/metadata', headers=header,
- cookies=cookies, verify=verify)
- print(delete)
- post = requests.put(baseURL + itemLink + '/metadata', headers=header,
- cookies=cookies, verify=verify,
- data=itemMetadataProcessed)
- print(post)
- f.writerow([itemLink] + [replacedElement['key']]
- + [replacedElement['value']] + [delete] + [post])
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/replaceKeyValuePairOnItemIdCSV.py b/replaceKeyValuePairOnItemIdCSV.py
deleted file mode 100644
index f9da491..0000000
--- a/replaceKeyValuePairOnItemIdCSV.py
+++ /dev/null
@@ -1,92 +0,0 @@
-import json
-import requests
-import time
-import csv
-from datetime import datetime
-import urllib3
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-fileName = filePath + input('Enter fileName (including \'.csv\'): ')
-replacedKey = input('Enter key: ')
-replacementKey = replacedKey
-
-f = csv.writer(open(filePath + 'replacedKeyValuePair'
- + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w'))
-f.writerow(['itemID'] + ['replacedKey'] + ['replacedValue']
- + ['replacementValue'] + ['delete'] + ['post'])
-
-with open(fileName) as csvfile:
- reader = csv.DictReader(csvfile)
- for row in reader:
- itemMetadataProcessed = []
- itemID = row['itemID']
- replacedValue = row['replacedValue']
- replacementValue = row['replacementValue']
- itemMetadata = requests.get(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header,
- cookies=cookies, verify=verify).json()
- for element in itemMetadata:
- languageValue = element['language']
- key = element['key'] == replacedKey
- value = element['value']
- if key and value == replacedValue:
- updatedMetadataElement = {}
- updatedMetadataElement['key'] = replacementKey
- updatedMetadataElement['value'] = replacementValue
- updatedMetadataElement['language'] = languageValue
- itemMetadataProcessed.append(updatedMetadataElement)
-
- provNote = '\'' + replacedKey + ': ' + replacedValue
- provNote += '\' was replaced by \'' + replacementKey
- provNote += ': ' + replacementValue
- provNote += '\' through a batch process on '
- currTime = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
- provNote += currTime + '.'
- provNoteElement = {}
- provNoteElement['key'] = 'dc.description.provenance'
- provNoteElement['value'] = provNote
- provNoteElement['language'] = 'en_US'
- itemMetadataProcessed.append(provNoteElement)
- else:
- itemMetadataProcessed.append(element)
- print(itemMetadata)
- itemMetadataProcessed = json.dumps(itemMetadataProcessed)
-
- delete = requests.delete(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header,
- cookies=cookies, verify=verify)
- print(delete)
- post = requests.put(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header, cookies=cookies,
- verify=verify, data=itemMetadataProcessed)
- print(post)
- f.writerow([itemID] + [replacedKey] + [replacedValue]
- + [replacementValue] + [delete] + [post])
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/replaceKeyValuePairsFromCSV.py b/replaceKeyValuePairsFromCSV.py
deleted file mode 100644
index fd7618b..0000000
--- a/replaceKeyValuePairsFromCSV.py
+++ /dev/null
@@ -1,129 +0,0 @@
-import json
-import requests
-import time
-import csv
-from datetime import datetime
-import urllib3
-import argparse
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-f', '--fileName', help='the CSV file of changes. '
- 'optional - if not provided, the script will ask for '
- 'input')
-args = parser.parse_args()
-
-if args.fileName:
- fileName = filePath + args.fileName
-else:
- fileName = filePath + input('Enter the CSV of changes '
- '(including \'.csv\'): ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-f = csv.writer(open(filePath + 'searchAndReplace'
- + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w'))
-f.writerow(['itemID'] + ['replacedKey'] + ['replacedValue'] + ['delete']
- + ['post'])
-with open(fileName) as csvfile:
- reader = csv.DictReader(csvfile)
- for row in reader:
- replacedKey = row['replacedKey']
- replacementKey = row['replacementKey']
- replacedValue = row['replacedValue']
- replacementValue = row['replacementValue']
- offset = 0
- recordsEdited = 0
- items = ''
- itemLinks = []
- while items != []:
- endpoint = baseURL + '/rest/filtered-items?query_field[]='
- endpoint += replacedKey
- endpoint += '&query_op[]=equals&query_val[]='
- endpoint += replacedValue + '&limit=200&offset='
- endpoint += str(offset)
- print(endpoint)
- response = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
- items = response['items']
- for item in items:
- itemMetadataProcessed = []
- itemLink = item['link']
- itemLinks.append(itemLink)
- offset = offset + 200
- print(offset)
- for itemLink in itemLinks:
- itemMetadataProcessed = []
- print(itemLink)
- metadata = requests.get(baseURL + itemLink + '/metadata',
- headers=header, cookies=cookies,
- verify=verify).json()
- for l in range(0, len(metadata)):
- metadata[l].pop('schema', None)
- metadata[l].pop('element', None)
- metadata[l].pop('qualifier', None)
- languageValue = metadata[l]['language']
- key = metadata[l]['key']
- value = metadata[l]['value']
- if key == replacedKey and value == replacedValue:
- replacedElement = metadata[l]
- updatedMetadataElement = {}
- updatedMetadataElement['key'] = replacementKey
- updatedMetadataElement['value'] = replacementValue
- updatedMetadataElement['language'] = languageValue
- itemMetadataProcessed.append(updatedMetadataElement)
- provNote = '\'' + replacedKey + ': ' + replacedValue
- provNote += '\' was replaced by \''
- provNote += replacementKey + ': '
- provNote += replacementValue
- provNote += '\' through a batch process on '
- currTime = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
- provNote += currTime + '.'
- provNoteElement = {}
- provNoteElement['key'] = 'dc.description.provenance'
- provNoteElement['value'] = provNote
- provNoteElement['language'] = 'en_US'
- itemMetadataProcessed.append(provNoteElement)
- else:
- if metadata[l] not in itemMetadataProcessed:
- itemMetadataProcessed.append(metadata[l])
- itemMetadataProcessed = json.dumps(itemMetadataProcessed)
- delete = requests.delete(baseURL + itemLink + '/metadata',
- headers=header, cookies=cookies,
- verify=verify)
- print(delete)
- post = requests.put(baseURL + itemLink + '/metadata',
- headers=header, cookies=cookies, verify=verify,
- data=itemMetadataProcessed)
- print(post)
- f.writerow([itemLink] + [replacedElement['key']]
- + [replacedElement['value']] + [delete] + [post])
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/replaceUnnecessarySpaces.py b/replaceUnnecessarySpaces.py
deleted file mode 100644
index fd4865c..0000000
--- a/replaceUnnecessarySpaces.py
+++ /dev/null
@@ -1,115 +0,0 @@
-import json
-import requests
-import csv
-import time
-from datetime import datetime
-import urllib3
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-communityHandle = input('Enter community handle: ')
-key = input('Enter key: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-itemList = []
-endpoint = baseURL + '/rest/communities'
-communities = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-for i in range(0, len(communities)):
- communityID = communities[i]['uuid']
- collections = requests.get(baseURL + '/rest/communities/'
- + str(communityID) + '/collections',
- headers=header, cookies=cookies,
- verify=verify).json()
- for j in range(0, len(collections)):
- collectionID = collections[j]['uuid']
- if collectionID not in skipColl:
- offset = 0
- items = ''
- while items != []:
- items = requests.get(baseURL + '/rest/collections/'
- + str(collectionID)
- + '/items?limit=200&offset='
- + str(offset), headers=header,
- cookies=cookies, verify=verify)
- while items.status_code != 200:
- time.sleep(5)
- items = requests.get(baseURL + '/rest/collections/'
- + str(collectionID)
- + '/items?limit=200&offset='
- + str(offset), headers=header,
- cookies=cookies, verify=verify)
- items = items.json()
- for k in range(0, len(items)):
- itemID = items[k]['uuid']
- itemList.append(itemID)
- offset = offset + 200
-
-dsFunc.elapsedTime(startTime, 'Item list creation time')
-
-f = csv.writer(open(filePath + 'removeUnnecessarySpaces'
- + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w'))
-f.writerow(['itemID'] + ['replacedKey'] + ['replacedValue'] + ['delete']
- + ['post'])
-for itemID in itemList:
- itemMetadataProcessed = []
- metadata = requests.get(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header, cookies=cookies,
- verify=verify).json()
- for i in range(0, len(metadata)):
- if metadata[i]['key'] == key:
- metadataItem = json.dumps(metadata[i])
- if ' ' in metadataItem or ' ,' in metadataItem:
- uptdMetadataElement = json.loads(metadataItem)
- uptdMetadataElement = uptdMetadataElement.replace(' ', ' ')
- uptdMetadataElement = uptdMetadataElement.replace(' ', ' ')
- uptdMetadataElement = uptdMetadataElement.replace(' ,', ',')
- itemMetadataProcessed.append(uptdMetadataElement)
- f.writerow([itemID] + [metadata[i]['key']]
- + [metadata[i]['value']])
- else:
- itemMetadataProcessed.append(metadata[i])
- else:
- itemMetadataProcessed.append(metadata[i])
- if json.dumps(itemMetadataProcessed) != json.dumps(metadata):
- itemMetadataProcessed = json.dumps(itemMetadataProcessed)
- print('updated', itemID)
- delete = requests.delete(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header,
- cookies=cookies, verify=verify)
- print(delete)
- post = requests.put(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header, cookies=cookies,
- verify=verify, data=itemMetadataProcessed)
- print(post)
- else:
- print('not updated', itemID)
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/replaceValueInCollection.py b/replaceValueInCollection.py
deleted file mode 100644
index 2a37fd7..0000000
--- a/replaceValueInCollection.py
+++ /dev/null
@@ -1,145 +0,0 @@
-import json
-import requests
-import csv
-import time
-import urllib3
-import argparse
-from datetime import datetime
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-k', '--key', help='the key to be searched. optional - '
- 'if not provided, the script will ask for input')
-parser.add_argument('-1', '--replacedValue', help='the value to be replaced. '
- 'optional - if not provided, the script will ask for'
- 'input')
-parser.add_argument('-2', '--replacementValue', help='the replacement value. '
- 'optional - if not provided, the script will ask for '
- 'input')
-parser.add_argument('-i', '--handle', help='handle of the collection to '
- 'retreive. optional - if not provided, the script will '
- 'ask for input')
-args = parser.parse_args()
-
-if args.key:
- key = args.key
-else:
- key = input('Enter the key: ')
-if args.replacedValue:
- replacedValue = args.replacedValue
-else:
- replacedValue = input('Enter the value to be replaced: ')
-if args.replacementValue:
- replacementValue = args.replacementValue
-else:
- replacementValue = input('Enter the replacement value: ')
-if args.handle:
- handle = args.handle
-else:
- handle = input('Enter collection handle: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-endpoint = baseURL + '/rest/handle/' + handle
-collection = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-collectionID = collection['uuid']
-collSels = '&collSel[]=' + collectionID
-
-f = csv.writer(open(filePath + 'replacedValues'
- + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w'))
-f.writerow(['handle'] + ['replacedValue'] + ['replacementValue'])
-offset = 0
-recordsEdited = 0
-items = ''
-itemLinks = []
-while items != []:
- endpoint = baseURL + '/rest/filtered-items?query_field[]=' + key
- endpoint += '&query_op[]=equals&query_val[]=' + replacedValue
- endpoint += collSels + '&limit=200&offset=' + str(offset)
- print(endpoint)
- replacedKey = key
- replacementKey = key
- response = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
- items = response['items']
- for item in items:
- itemMetadataProcessed = []
- itemLink = item['link']
- itemLinks.append(itemLink)
- offset = offset + 200
- print(offset)
-for itemLink in itemLinks:
- itemMetadataProcessed = []
- print(itemLink)
- metadata = requests.get(baseURL + itemLink + '/metadata', headers=header,
- cookies=cookies, verify=verify).json()
- for l in range(0, len(metadata)):
- metadata[l].pop('schema', None)
- metadata[l].pop('element', None)
- metadata[l].pop('qualifier', None)
- languageValue = metadata[l]['language']
- key = metadata[l]['key']
- value = metadata[l]['value']
- if key == replacedKey and key == replacedValue:
- replacedElement = metadata[l]
- updatedMetadataElement = {}
- updatedMetadataElement['key'] = replacementKey
- updatedMetadataElement['value'] = replacementValue
- updatedMetadataElement['language'] = languageValue
- itemMetadataProcessed.append(updatedMetadataElement)
- provNote = '\'' + replacedKey + ': ' + replacedValue
- provNote += '\' was replaced by \'' + replacementKey
- provNote += ': ' + replacementValue
- provNote += '\' through a batch process on '
- provNote += datetime.now().strftime('%Y-%m-%d %H:%M:%S')
- provNote += '.'
- provNoteElement = {}
- provNoteElement['key'] = 'dc.description.provenance'
- provNoteElement['value'] = provNote
- provNoteElement['language'] = 'en_US'
- itemMetadataProcessed.append(provNoteElement)
- recordsEdited = recordsEdited + 1
- else:
- if metadata[l] not in itemMetadataProcessed:
- itemMetadataProcessed.append(metadata[l])
- itemMetadataProcessed = json.dumps(itemMetadataProcessed)
- print('updated', itemLink, recordsEdited)
- delete = requests.delete(baseURL + itemLink + '/metadata', headers=header,
- cookies=cookies, verify=verify)
- print(delete)
- post = requests.put(baseURL + itemLink + '/metadata', headers=header,
- cookies=cookies, verify=verify,
- data=itemMetadataProcessed)
- print(post)
- f.writerow([itemLink] + [updatedMetadataElement['key']]
- + [updatedMetadataElement['value']] + [delete] + [post])
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/replaceValueInCommunityFromCSV.py b/replaceValueInCommunityFromCSV.py
deleted file mode 100644
index 15b81b4..0000000
--- a/replaceValueInCommunityFromCSV.py
+++ /dev/null
@@ -1,157 +0,0 @@
-# -*- coding: utf-8 -*-
-import json
-import requests
-import csv
-import time
-import urllib3
-import argparse
-from datetime import datetime
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-i', '--handle', help='handle of the community. optional '
- '- if not provided, the script will ask for input')
-parser.add_argument('-f', '--fileName', help='the CSV file of changes. '
- 'optional - if not provided, the script will ask for '
- 'input')
-args = parser.parse_args()
-
-if args.fileName:
- fileName = args.fileName
-else:
- fileName = input('Enter the CSV of changes (including \'.csv\'): ')
-if args.handle:
- handle = args.handle
-else:
- handle = input('Enter community handle: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-endpoint = baseURL + '/rest/handle/' + handle
-community = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-communityID = community['uuid']
-collections = requests.get(baseURL + '/rest/communities/' + str(communityID)
- + '/collections', headers=header, cookies=cookies,
- verify=verify).json()
-collSels = ''
-for j in range(0, len(collections)):
- collectionID = collections[j]['uuid']
- collSel = '&collSel[]=' + collectionID
- collSels = collSels + collSel
-
-counter = 0
-f = csv.writer(open(filePath + 'replacedValues'
- + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w'))
-f.writerow(['handle'] + ['replacedValue'] + ['replacementValue'])
-with open(fileName) as csvfile:
- reader = csv.DictReader(csvfile)
- rowCount = len(list(reader))
-with open(fileName) as csvfile:
- reader = csv.DictReader(csvfile)
- for row in reader:
- rowCount -= 1
- replacedValue = row['replacedValue']
- replacementValue = row['replacementValue']
- print('Rows remaining: ', rowCount)
- print(replacedValue, ' -- ', replacementValue)
- if replacedValue != replacementValue:
- print(replacedValue)
- offset = 0
- recordsEdited = 0
- items = ''
- itemLinks = []
- while items != []:
- endpoint = baseURL + '/rest/filtered-items?'
- endpoint += 'query_field[]=*&query_op[]=equals'
- endpoint += '&query_val[]=' + replacedValue
- endpoint += collSels + '&limit=200&offset='
- endpoint += str(offset)
- print(endpoint)
- response = requests.get(endpoint, headers=header,
- cookies=cookies, verify=verify)
- print(response)
- response = response.json()
- items = response['items']
- print(len(items), ' search results')
- for item in items:
- itemLink = item['link']
- itemLinks.append(itemLink)
- offset = offset + 200
- print(offset)
- for itemLink in itemLinks:
- itemMetadataProcessed = []
- metadata = requests.get(baseURL + itemLink + '/metadata',
- headers=header, cookies=cookies,
- verify=verify).json()
- counter += 1
- print(counter)
- for l in range(0, len(metadata)):
- metadata[l].pop('schema', None)
- metadata[l].pop('element', None)
- metadata[l].pop('qualifier', None)
- languageValue = metadata[l]['language']
- if metadata[l]['value'] == replacedValue:
- key = metadata[l]['key']
- replacedElement = metadata[l]
- updatedMetadataElement = {}
- updatedMetadataElement['key'] = metadata[l]['key']
- updatedMetadataElement['value'] = replacementValue
- updatedMetadataElement['language'] = languageValue
- itemMetadataProcessed.append(updatedMetadataElement)
- provNote = '\'' + key + ': ' + replacedValue
- provNote += '\' was replaced by \'' + key
- provNote += ': ' + replacementValue
- provNote += '\' through a batch process on '
- currTime = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
- provNote += currTime + '.'
- provNoteElement = {}
- provNoteElement['key'] = 'dc.description.provenance'
- provNoteElement['value'] = provNote
- provNoteElement['language'] = 'en_US'
- itemMetadataProcessed.append(provNoteElement)
- recordsEdited = recordsEdited + 1
- else:
- if metadata[l] not in itemMetadataProcessed:
- itemMetadataProcessed.append(metadata[l])
- itemMetadataProcessed = json.dumps(itemMetadataProcessed)
- print('updated', itemLink, recordsEdited)
- delete = requests.delete(baseURL + itemLink + '/metadata',
- headers=header, cookies=cookies,
- verify=verify)
- print(delete)
- post = requests.put(baseURL + itemLink + '/metadata',
- headers=header, cookies=cookies,
- verify=verify, data=itemMetadataProcessed)
- print(post)
- f.writerow([itemLink] + [replacedValue] + [replacementValue]
- + [delete] + [post])
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/repositoryMetadataBackup.py b/repositoryMetadataBackup.py
deleted file mode 100644
index 0e338c4..0000000
--- a/repositoryMetadataBackup.py
+++ /dev/null
@@ -1,97 +0,0 @@
-import json
-import requests
-import time
-from datetime import datetime
-import urllib3
-import os
-import dsFunc
-import argparse
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-p', '--handlePrefix', help='Enter the handle prefix')
-args = parser.parse_args()
-
-if args.handlePrefix:
- handlePrefix = args.handlePrefix
-else:
- handlePrefix = input('Enter the handle prefix: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-date = datetime.now().strftime('%Y-%m-%d %H.%M.%S')
-endpoint = baseURL + '/rest/communities'
-communities = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-backupDirectory = filePath + 'backup' + date + '/'
-os.makedirs(backupDirectory)
-for i in range(0, len(communities)):
- communityID = communities[i]['uuid']
- collections = requests.get(baseURL + '/rest/communities/'
- + str(communityID) + '/collections',
- headers=header, cookies=cookies,
- verify=verify).json()
- for j in range(0, len(collections)):
- collectionID = collections[j]['uuid']
- if collectionID not in skipColl:
- collectionHandle = collections[j]['handle']
- collectionHandle = collectionHandle.replace(handlePrefix, '')
- collectionHandle = collectionHandle.replace('/', '-')
- print('collectionID: ', collectionID)
- itemList = []
- offset = 0
- items = ''
- while items != []:
- items = requests.get(baseURL + '/rest/collections/'
- + str(collectionID)
- + '/items?limit=1000&offset='
- + str(offset), headers=header,
- cookies=cookies, verify=verify)
- while items.status_code != 200:
- time.sleep(5)
- items = requests.get(baseURL + '/rest/collections/'
- + str(collectionID)
- + '/items?limit=1000&offset='
- + str(offset), headers=header,
- cookies=cookies, verify=verify)
- items = items.json()
- for k in range(0, len(items)):
- itemID = items[k]['uuid']
- itemList.append(itemID)
- offset = offset + 1000
- f = open(backupDirectory + collectionHandle + '.json', 'w')
- collectionMetadata = []
- for itemID in itemList:
- metadata = requests.get(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header,
- cookies=cookies, verify=verify).json()
- collectionMetadata.append(metadata)
- json.dump(collectionMetadata, f)
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/repositoryMetadataRestore.py b/repositoryMetadataRestore.py
deleted file mode 100644
index eef640a..0000000
--- a/repositoryMetadataRestore.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import json
-import requests
-import os
-import time
-import dsFunc
-import argparse
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-p', '--handlePrefix', help='Enter the handle prefix')
-args = parser.parse_args()
-
-if args.handlePrefix:
- handlePrefix = args.handlePrefix
-else:
- handlePrefix = input('Enter the handle prefix: ')
-
-requests.packages.urllib3.disable_warnings()
-
-directory = filePath + input('Enter directory name: ')
-
-startTime = time.time()
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-for fileName in os.listdir(directory):
- print(fileName)
- metadataGroup = json.load(open(directory + '/' + fileName))
- for i in range(0, len(metadataGroup)):
- metadata = metadataGroup[i]
- itemMetadata = json.dumps(metadata)
- for j in range(0, len(metadata)):
- key = metadata[j]['key']
- value = metadata[j]['value']
- if key == 'dc.identifier.uri' and value.startswith(handlePrefix):
- handle = metadata[j]['value'].replace(handlePrefix, '')
- print(handle)
- endpoint = baseURL + '/rest/handle/' + handle
- item = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
- itemID = item['uuid']
- print(fileName, itemID)
- delete = requests.delete(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header,
- cookies=cookies, verify=verify)
- print(delete)
- post = requests.put(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header,
- cookies=cookies, verify=verify,
- data=itemMetadata)
- print(post)
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..33de900
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,23 @@
+from setuptools import setup, find_packages
+
+setup(
+ name='dsaps',
+ version='1.0.0',
+ description='',
+ packages=find_packages(exclude=['tests']),
+ author='Eric Hanson',
+ author_email='ehanson@mit.edu',
+ install_requires=[
+ 'requests',
+ 'structlog',
+ 'attrs',
+ 'click',
+ 'lxml',
+ ],
+ entry_points={
+ 'console_scripts': [
+ 'dsaps=dsaps.cli:main',
+ ]
+ },
+ python_requires='>=3.8',
+)
diff --git a/splitFieldIntoMultipleFields.py b/splitFieldIntoMultipleFields.py
deleted file mode 100644
index 4492e3f..0000000
--- a/splitFieldIntoMultipleFields.py
+++ /dev/null
@@ -1,141 +0,0 @@
-# -*- coding: utf-8 -*-
-import json
-import requests
-import csv
-import time
-import urllib3
-from datetime import datetime
-import ast
-import argparse
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-r', '--replacedKey', help='the key to be replaced. '
- 'optional - if not provided, the script will ask for '
- 'input')
-parser.add_argument('-f', '--fileName', help='the CSV file of changes. '
- 'optional - if not provided, the script will ask for '
- 'input')
-args = parser.parse_args()
-
-if args.replacedKey:
- replacedKey = args.replacedKey
-else:
- replacedKey = input('Enter the key to be replaced: ')
-if args.fileName:
- fileName = filePath + args.fileName
-else:
- fileName = filePath + input('Enter the file name of the CSV of changes '
- '(including \'.csv\'): ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-recordsEdited = 0
-elementsEdited = 0
-f = csv.writer(open(filePath + 'splitFieldIntoMultipleFields'
- + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w'))
-f.writerow(['itemID'] + ['replacedKey'] + ['replacementValueList']
- + ['delete'] + ['post'])
-replacedElement = ''
-with open(fileName) as csvfile:
- reader = csv.DictReader(csvfile)
- for row in reader:
- replacedValue = row['value']
- print(replacedValue)
- replacementValueList = ast.literal_eval(row['structuredList'])
- offset = 0
- items = ''
- itemLinks = []
- while items != []:
- endpoint = baseURL + '/rest/filtered-items?query_field[]='
- endpoint += replacedKey
- endpoint += '&query_op[]=equals&query_val[]='
- endpoint += replacedValue + '&limit=200&offset='
- endpoint += str(offset)
- response = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
- items = response['items']
- for item in items:
- itemLink = item['link']
- itemLinks.append(itemLink)
- offset = offset + 200
- print(offset)
- for itemLink in itemLinks:
- itemMetadataProcessed = []
- print(itemLink)
- metadata = requests.get(baseURL + itemLink + '/metadata',
- headers=header, cookies=cookies,
- verify=verify).json()
- for l in range(0, len(metadata)):
- metadata[l].pop('schema', None)
- metadata[l].pop('element', None)
- metadata[l].pop('qualifier', None)
- languageValue = metadata[l]['language']
- key = metadata[l]['key']
- value = metadata[l]['value']
- if key == replacedKey and value == replacedValue:
- print('match')
- replacedElement = metadata[l]
- for replacementValue in replacementValueList:
- updatedMetadataElement = {}
- updatedMetadataElement['key'] = replacedKey
- updatedMetadataElement['value'] = replacementValue
- updatedMetadataElement['language'] = languageValue
- itemMetadataProcessed.append(updatedMetadataElement)
- currTime = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
- provNote = '\'' + replacedKey + ': ' + replacedValue
- provNote += '\' split into \'' + replacedKey
- provNote += ': ' + replacementValue
- provNote += '\' through a batch process on '
- provNote += currTime + '.'
- provNoteElement = {}
- provNoteElement['key'] = 'dc.description.provenance'
- provNoteElement['value'] = provNote
- provNoteElement['language'] = 'en_US'
- itemMetadataProcessed.append(provNoteElement)
- elementsEdited = elementsEdited + 1
- else:
- if metadata[l] not in itemMetadataProcessed:
- itemMetadataProcessed.append(metadata[l])
- recordsEdited = recordsEdited + 1
- itemMetadataProcessed = json.dumps(itemMetadataProcessed)
- print('updated', itemLink, recordsEdited, elementsEdited)
- delete = requests.delete(baseURL + itemLink + '/metadata',
- headers=header, cookies=cookies,
- verify=verify)
- print(delete)
- post = requests.put(baseURL + itemLink + '/metadata',
- headers=header, cookies=cookies, verify=verify,
- data=itemMetadataProcessed)
- print(post)
- f.writerow([itemLink] + [replacedKey] + [replacementValueList]
- + [delete] + [post])
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/tests.py b/tests.py
deleted file mode 100644
index b576d59..0000000
--- a/tests.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import dsFunc
-import time
-import unittest
-
-
-class dsFuncTests(unittest.TestCase):
- """Test dsFunc.py functions."""
-
- def testElapsedTime(self):
- """Test elapsed time function."""
- startTime = time.time()
- sleepTime = 5
- time.sleep(sleepTime)
- td = dsFunc.elapsedTime(startTime, 'Elapsed run time')
- self.assertTrue(sleepTime <= int(td.seconds) <= sleepTime + 1)
-
- def testInstaSelect(self):
- """Test instance select function."""
- instArray = ['secretsProd', '', 'secrets', '#$%#%##@']
- for inst in instArray:
- if inst == 'secretsProd':
- secrets = dsFunc.instSelect(inst)
- self.assertTrue(secrets.__name__ == inst)
- elif inst == 'secrets':
- secrets = dsFunc.instSelect(inst)
- self.assertTrue(secrets.__name__ == inst)
- else:
- secrets = dsFunc.instSelect(inst)
- self.assertTrue(secrets.__name__ == 'secrets')
-
- def testAuth(self):
- """Return email to confirm acceptance of credentials."""
- instArray = ['secretsProd', '', 'secrets', '#$%#%##@']
- for inst in instArray:
- secrets = dsFunc.instSelect(inst)
- email = secrets.email
- baseURL = secrets.baseURL
- password = secrets.password
- verify = secrets.verify
- cookies, header = dsFunc.auth(email, password, baseURL, verify)
-
- uName, authEmail = dsFunc.authConfirm(cookies, baseURL, header,
- verify)
- self.assertIn(email, authEmail)
-
-
-if __name__ == '__main__':
- unittest.main(warnings='ignore')
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..1b53bde
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,94 @@
+import csv
+import json
+
+from click.testing import CliRunner
+import pytest
+import requests_mock
+
+from dsaps import models
+
+
+@pytest.fixture()
+def client():
+ client = models.Client('mock://example.com/')
+ client.header = {}
+ client.cookies = {}
+ client.user_full_name = ''
+ return client
+
+
+@pytest.fixture()
+def input_dir(tmp_path):
+ input_dir = tmp_path / 'files'
+ input_dir.mkdir()
+ input_2nd_lvl = input_dir / 'more_files'
+ input_2nd_lvl.mkdir()
+ with open(f'{input_dir}/test_01.pdf', 'w'):
+ pass
+ with open(f'{input_2nd_lvl}/test_02.pdf', 'w'):
+ pass
+ with open(f'{input_dir}/best_01.pdf', 'w'):
+ pass
+ with open(f'{input_dir}/test_01.jpg', 'w'):
+ pass
+ return str(f'{input_dir}/')
+
+
+@pytest.fixture()
+def aspace_delimited_csv():
+ with open('tests/fixtures/aspace_metadata_delimited.csv') as f:
+ reader = csv.DictReader(f)
+ yield reader
+
+
+@pytest.fixture()
+def aspace_mapping():
+ with open('config/aspace_mapping.json') as f:
+ mapping = json.load(f)
+ yield mapping
+
+
+@pytest.fixture()
+def output_dir(tmp_path):
+ output_dir = tmp_path / 'output'
+ output_dir.mkdir()
+ return str(f'{output_dir}/')
+
+
+@pytest.fixture()
+def runner():
+ return CliRunner()
+
+
+@pytest.fixture(autouse=True)
+def web_mock():
+ with requests_mock.Mocker() as m:
+ cookies = {'JSESSIONID': '11111111'}
+ m.post('mock://example.com/login', cookies=cookies)
+ user_json = {'fullname': 'User Name'}
+ m.get('mock://example.com/status', json=user_json)
+ rec_json = {'metadata': {'title': 'Sample title'}, 'type': 'item'}
+ m.get('mock://example.com/items/123?expand=all', json=rec_json)
+ results_json1 = {'items': [{'link': '1234'}]}
+ results_json2 = {'items': []}
+ m.get('mock://example.com/filtered-items?', [{'json': results_json1},
+ {'json': results_json2}])
+ rec_json = {'uuid': 'a1b2'}
+ m.get('mock://example.com/handle/111.1111', json=rec_json)
+ coll_json = {'uuid': 'c3d4'}
+ m.post('mock://example.com/communities/a1b2/collections',
+ json=coll_json)
+ item_json = {'uuid': 'e5f6', 'handle': '222.2222'}
+ m.post('mock://example.com/collections/c3d4/items', json=item_json)
+ b_json_1 = {'uuid': 'g7h8'}
+ url_1 = 'mock://example.com/items/e5f6/bitstreams?name=test_01.pdf'
+ m.post(url_1, json=b_json_1)
+ b_json_2 = {'uuid': 'i9j0'}
+ url_2 = 'mock://example.com/items/e5f6/bitstreams?name=test_02.pdf'
+ m.post(url_2, json=b_json_2)
+ m.get('mock://remoteserver.com/files/test_01.pdf', content=b'Sample')
+ coll_json = {'uuid': 'k1l2'}
+ m.get('mock://example.com/handle/333.3333', json=coll_json)
+ item_json_2 = {'uuid': 'e5f6', 'handle': '222.2222'}
+ m.post('mock://example.com/collections/k1l2/items', json=item_json_2)
+ yield m
diff --git a/tests/fixtures/aspace_metadata_delimited.csv b/tests/fixtures/aspace_metadata_delimited.csv
new file mode 100644
index 0000000..5a0bab2
--- /dev/null
+++ b/tests/fixtures/aspace_metadata_delimited.csv
@@ -0,0 +1,3 @@
+uri,title,file_identifier,author,description,rights_statement,rights_uri
+/repo/0/ao/456,Tast Item,tast,"Smith, John|Smith, Jane","More info at /repo/0/ao/456","Totally Free","http://free.gov"
+/repo/0/ao/123,Test Item,test,"Smith, Jane","More info at /repo/0/ao/123","Totally Free","http://free.gov"
\ No newline at end of file
diff --git a/tests/fixtures/aspace_metadata_no_delimiter.csv b/tests/fixtures/aspace_metadata_no_delimiter.csv
new file mode 100644
index 0000000..d131dde
--- /dev/null
+++ b/tests/fixtures/aspace_metadata_no_delimiter.csv
@@ -0,0 +1,2 @@
+uri,title,file_identifier,author,description,rights_statement,rights_uri
+/repo/0/ao/123,Test Item,test,"Smith, Jane","More info at /repo/0/ao/123","Totally Free","http://free.gov"
diff --git a/tests/fixtures/metadata_num_col.csv b/tests/fixtures/metadata_num_col.csv
new file mode 100644
index 0000000..0c064d0
--- /dev/null
+++ b/tests/fixtures/metadata_num_col.csv
@@ -0,0 +1,2 @@
+dc.relation.isversionof,file_identifier,dc.title,dc.contributor.author_1,dc.contributor.author_2
+/repo/0/ao/456,tast,Tast Item,"Smith, John","Smith, Jane"
diff --git a/tests/test_cli.py b/tests/test_cli.py
new file mode 100644
index 0000000..6fc9846
--- /dev/null
+++ b/tests/test_cli.py
@@ -0,0 +1,59 @@
+from dsaps.cli import main
+
+
+def test_additems(runner, input_dir):
+ """Test adding items to a collection."""
+ result = runner.invoke(main,
+ ['--url', 'mock://example.com/',
+ '--email', 'test@test.mock',
+ '--password', '1234',
+ 'additems',
+ '--metadata-csv',
+ 'tests/fixtures/aspace_metadata_delimited.csv',
+ '--field-map', 'config/aspace_mapping.json',
+ '--content-directory', input_dir,
+ '--file-type', 'pdf',
+ '--collection-handle', '333.3333'])
+ assert result.exit_code == 0
+ result = runner.invoke(main,
+ ['--url', 'mock://example.com/',
+ '--email', 'test@test.mock',
+ '--password', '1234',
+ 'newcollection',
+ '--community-handle', '111.1111',
+ '--collection-name', 'Test Collection',
+ 'additems',
+ '--metadata-csv',
+ 'tests/fixtures/aspace_metadata_delimited.csv',
+ '--field-map', 'config/aspace_mapping.json',
+ '--content-directory', input_dir,
+ '--file-type', 'pdf'])
+ assert result.exit_code == 0
+
+
+def test_newcollection(runner, input_dir):
+ """Test newcoll command."""
+ result = runner.invoke(main,
+ ['--url', 'mock://example.com/',
+ '--email', 'test@test.mock',
+ '--password', '1234',
+ 'newcollection',
+ '--community-handle', '111.1111',
+ '--collection-name', 'Test Collection'])
+ assert result.exit_code == 0
+
+
+def test_reconcile(runner, input_dir, output_dir):
+ """Test reconcile command."""
+ result = runner.invoke(main,
+ ['--url', 'mock://example.com/',
+ '--email', 'test@test.mock',
+ '--password', '1234',
+ 'reconcile',
+ '--metadata-csv',
+ 'tests/fixtures/aspace_metadata_delimited.csv',
+ '--output-directory', output_dir,
+ '--content-directory', input_dir,
+ '--file-type', 'pdf'
+ ])
+ assert result.exit_code == 0
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
new file mode 100644
index 0000000..78e0729
--- /dev/null
+++ b/tests/test_helpers.py
@@ -0,0 +1,75 @@
+import csv
+
+from dsaps import helpers
+from dsaps.models import Item
+
+
+def test_create_csv_from_list(output_dir):
+ """Test create_csv_from_list function."""
+ list_name = ['123']
+ helpers.create_csv_from_list(list_name, f'{output_dir}output')
+ with open(f'{output_dir}output.csv') as csvfile:
+ reader = csv.DictReader(csvfile)
+ for row in reader:
+ assert row['id'] == '123'
+
+
+def test_create_file_list(input_dir):
+ """Test create_file_list function."""
+ file_list = helpers.create_file_list(input_dir, 'pdf')
+ for file_id in ['test_02.pdf', 'test_01.pdf', 'best_01.pdf']:
+ assert file_id in file_list
+
+
+def test_create_ingest_report(runner, output_dir):
+ """Test create_ingest_report function."""
+ file_name = 'ingest_report.csv'
+ items = [
+ Item(source_system_identifier='/repo/0/ao/123',
+ handle='111.1111')
+ ]
+ helpers.create_ingest_report(items, f'{output_dir}{file_name}')
+ with open(f'{output_dir}{file_name}') as csvfile:
+ reader = csv.DictReader(csvfile)
+ for row in reader:
+ assert row['uri'] == '/repo/0/ao/123'
+ assert row['link'] == 'https://hdl.handle.net/111.1111'
+
+
+def test_create_metadata_id_list(input_dir):
+ """Test create_metadata_id_list function."""
+ metadata_path = 'tests/fixtures/aspace_metadata_delimited.csv'
+ metadata_ids = helpers.create_metadata_id_list(metadata_path)
+ assert 'test' in metadata_ids
+ assert 'tast' in metadata_ids
+
+
+def test_match_files_to_metadata():
+ """Test match_files_to_metadata function."""
+ file_list = ['test_01.pdf']
+ metadata_ids = ['test', 'tast']
+ file_matches = helpers.match_files_to_metadata(file_list, metadata_ids)
+ assert len(file_matches) == 1
+ assert 'test_01.pdf' in file_matches
+
+
+def test_match_metadata_to_files():
+ """Test match_metadata_to_files function."""
+ file_list = ['test_01.pdf', 'tast_01.pdf']
+ metadata_ids = ['test']
+ file_matches = helpers.match_metadata_to_files(file_list, metadata_ids)
+ assert len(file_matches) == 1
+ assert 'test' in file_matches
+
+
+def test_update_metadata_csv(input_dir, output_dir):
+ """Test update_metadata_csv function."""
+ metadata_matches = ['test']
+ helpers.update_metadata_csv('tests/fixtures/aspace_metadata_delimited.csv',
+ output_dir, metadata_matches)
+ with open(f'{output_dir}updated-aspace_metadata_delimited.csv') as csvfile:
+ reader = csv.DictReader(csvfile)
+ for row in reader:
+ assert row['uri'] == '/repo/0/ao/123'
+ assert row['title'] == 'Test Item'
+ assert row['file_identifier'] == 'test'
diff --git a/tests/test_models.py b/tests/test_models.py
new file mode 100644
index 0000000..bd3eef4
--- /dev/null
+++ b/tests/test_models.py
@@ -0,0 +1,137 @@
+import attr
+
+from dsaps import models
+
+
+def test_authenticate(client):
+ """Test authenticate method."""
+ email = 'test@test.mock'
+ password = '1234'
+ client.authenticate(email, password)
+ assert client.user_full_name == 'User Name'
+ assert client.cookies == {'JSESSIONID': '11111111'}
+
+
+def test_filtered_item_search(client):
+ """Test filtered_item_search method."""
+ key = 'dc.title'
+ string = 'test'
+ query_type = 'contains'
+ item_links = client.filtered_item_search(key, string, query_type,
+ selected_collections='')
+ assert '1234' in item_links
+
+
+def test_get_uuid_from_handle(client):
+ """Test get_uuid_from_handle method."""
+ id = client.get_uuid_from_handle('111.1111')
+ assert id == 'a1b2'
+
+
+def test_get_record(client):
+ """Test get_record method."""
+ rec_obj = client.get_record('123', 'items')
+ assert attr.asdict(rec_obj)['metadata'] == {'title': 'Sample title'}
+
+
+def test_post_bitstream(client, input_dir):
+ """Test post_bitstream method."""
+ item_uuid = 'e5f6'
+ bitstream = models.Bitstream(name='test_01.pdf',
+ file_path=f'{input_dir}test_01.pdf')
+ bit_uuid = client.post_bitstream(item_uuid, bitstream)
+ assert bit_uuid == 'g7h8'
+
+
+def test_post_coll_to_comm(client):
+ """Test post_coll_to_comm method."""
+ comm_handle = '111.1111'
+ coll_name = 'Test Collection'
+ coll_uuid = client.post_coll_to_comm(comm_handle, coll_name)
+ assert coll_uuid == 'c3d4'
+
+
+def test_post_item_to_collection(client, input_dir):
+ """Test post_item_to_collection method."""
+ item = models.Item()
+ item.bitstreams = [
+ models.Bitstream(name='test_01.pdf',
+ file_path=f'{input_dir}test_01.pdf')
+ ]
+ item.metadata = [
+ models.MetadataEntry(key='file_identifier', value='test'),
+ models.MetadataEntry(key='dc.title',
+ value='Monitoring Works: Getting Teachers',
+ language='en_US'),
+ models.MetadataEntry(key='dc.relation.isversionof',
+ value='repo/0/ao/123')
+ ]
+ coll_uuid = 'c3d4'
+ item_uuid, item_handle = client.post_item_to_collection(coll_uuid, item)
+ assert item_uuid == 'e5f6'
+ assert item_handle == '222.2222'
+
+
+def test__pop_inst(client):
+ """Test _pop_inst method."""
+ class_type = models.Collection
+ rec_obj = {'name': 'Test title', 'type': 'collection', 'items': []}
+ rec_obj = client._pop_inst(class_type, rec_obj)
+ assert type(rec_obj) == class_type
+ assert rec_obj.name == 'Test title'
+
+
+def test__build_uuid_list(client):
+ """Test _build_uuid_list method."""
+ rec_obj = {'items': [{'uuid': '1234'}]}
+ children = 'items'
+ child_list = client._build_uuid_list(rec_obj, children)
+ assert '1234' in child_list
+
+
+def test_collection_from_csv(aspace_delimited_csv, aspace_mapping):
+ collection = models.Collection.from_csv(
+ aspace_delimited_csv, aspace_mapping
+ )
+ assert 2 == len(collection.items)
+
+
+def test_collection_post_items(client, input_dir, aspace_delimited_csv,
+ aspace_mapping):
+ collection = models.Collection.from_csv(
+ aspace_delimited_csv, aspace_mapping
+ )
+ collection.uuid = 'c3d4'
+ items = collection.post_items(client)
+ for item in items:
+ assert item.handle == '222.2222'
+ assert item.uuid == 'e5f6'
+
+
+def test_item_bitstreams_from_directory(input_dir):
+ item = models.Item(file_identifier='test')
+ item.bitstreams_from_directory(input_dir)
+ assert 3 == len(item.bitstreams)
+ assert item.bitstreams[0].name == 'test_01.jpg'
+ assert item.bitstreams[1].name == 'test_01.pdf'
+ assert item.bitstreams[2].name == 'test_02.pdf'
+ item.bitstreams_from_directory(input_dir, 'pdf')
+ assert 2 == len(item.bitstreams)
+ assert item.bitstreams[0].name == 'test_01.pdf'
+ assert item.bitstreams[1].name == 'test_02.pdf'
+
+
+def test_item_from_row(aspace_delimited_csv, aspace_mapping):
+ row = next(aspace_delimited_csv)
+ item = models.Item.from_row(row, aspace_mapping)
+ assert attr.asdict(item)['metadata'] == [
+ {'key': 'dc.title', 'value': 'Tast Item', 'language': 'en_US'},
+ {'key': 'dc.contributor.author', 'value': 'Smith, John',
+ 'language': None},
+ {'key': 'dc.contributor.author', 'value': 'Smith, Jane',
+ 'language': None},
+ {'key': 'dc.description', 'value': 'More info at /repo/0/ao/456',
+ 'language': 'en_US'},
+ {'key': 'dc.rights', 'value': 'Totally Free', 'language': 'en_US'},
+ {'key': 'dc.rights.uri', 'value': 'http://free.gov', 'language': None}
+ ]
diff --git a/updateLanguageTagsForKey.py b/updateLanguageTagsForKey.py
deleted file mode 100644
index c8453c0..0000000
--- a/updateLanguageTagsForKey.py
+++ /dev/null
@@ -1,108 +0,0 @@
-import json
-import requests
-import time
-import csv
-from datetime import datetime
-import urllib3
-import argparse
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-k', '--key', help='the key to be updated. optional - if '
- 'not provided, the script will ask for input')
-args = parser.parse_args()
-
-if args.key:
- key = args.key
-else:
- key = input('Enter the key to be updated: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-f = csv.writer(open(filePath + 'languageTagUpdate' + key
- + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w'))
-f.writerow(['itemID'] + ['key'])
-offset = 0
-recordsEdited = 0
-items = ''
-itemLinks = []
-while items != []:
- endpoint = baseURL + '/rest/filtered-items?query_field[]=' + key
- endpoint += '&query_op[]=exists&query_val[]=&limit=200&offset='
- endpoint += str(offset)
- print(endpoint)
- response = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
- items = response['items']
- for item in items:
- itemMetadataProcessed = []
- itemLink = item['link']
- itemLinks.append(itemLink)
- offset = offset + 200
- print(offset)
-for itemLink in itemLinks:
- itemMetadataProcessed = []
- print(itemLink)
- metadata = requests.get(baseURL + itemLink + '/metadata', headers=header,
- cookies=cookies, verify=verify).json()
- for l in range(0, len(metadata)):
- metadata[l].pop('schema', None)
- metadata[l].pop('element', None)
- metadata[l].pop('qualifier', None)
- if metadata[l]['key'] == key and metadata[l]['language'] is None:
- updatedMetadataElement = {}
- updatedMetadataElement['key'] = metadata[l]['key']
- updatedMetadataElement['value'] = metadata[l]['value']
- updatedMetadataElement['language'] = 'en_US'
- itemMetadataProcessed.append(updatedMetadataElement)
- provNote = 'The language tag for \'' + metadata[l]['key'] + ': '
- provNote += metadata[l]['value']
- provNote += '\' was changed from \'null\' to \'en_US\' '
- provNote += 'through a batch process on '
- provNote += datetime.now().strftime('%Y-%m-%d %H:%M:%S')
- provNote += '.'
- provNoteElement = {}
- provNoteElement['key'] = 'dc.description.provenance'
- provNoteElement['value'] = provNote
- provNoteElement['language'] = 'en_US'
- itemMetadataProcessed.append(provNoteElement)
- else:
- itemMetadataProcessed.append(metadata[l])
- itemMetadataProcessed = json.dumps(itemMetadataProcessed)
- delete = requests.delete(baseURL + itemLink + '/metadata', headers=header,
- cookies=cookies, verify=verify)
- print(delete)
- post = requests.put(baseURL + itemLink + '/metadata', headers=header,
- cookies=cookies, verify=verify,
- data=itemMetadataProcessed)
- print(post)
- f.writerow([itemLink] + [key])
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')
diff --git a/updateLanguageTagsForKeyInCollection.py b/updateLanguageTagsForKeyInCollection.py
deleted file mode 100644
index afebe8f..0000000
--- a/updateLanguageTagsForKeyInCollection.py
+++ /dev/null
@@ -1,110 +0,0 @@
-import json
-import requests
-import time
-import csv
-from datetime import datetime
-import urllib3
-import dsFunc
-
-inst = input('To edit production server, enter the name of the secrets file: ')
-secrets = dsFunc.instSelect(inst)
-
-baseURL = secrets.baseURL
-email = secrets.email
-password = secrets.password
-filePath = secrets.filePath
-verify = secrets.verify
-skipColl = secrets.skipColl
-
-key = input('Enter key: ')
-collectionHandle = input('Enter collection handle: ')
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-startTime = time.time()
-data = {'email': email, 'password': password}
-header = {'content-type': 'application/json', 'accept': 'application/json'}
-session = requests.post(baseURL + '/rest/login', headers=header, verify=verify,
- params=data).cookies['JSESSIONID']
-cookies = {'JSESSIONID': session}
-
-
-status = requests.get(baseURL + '/rest/status', headers=header,
- cookies=cookies, verify=verify).json()
-userFullName = status['fullname']
-print('authenticated', userFullName)
-
-itemList = []
-endpoint = baseURL + '/rest/handle/' + collectionHandle
-collection = requests.get(endpoint, headers=header, cookies=cookies,
- verify=verify).json()
-collectionID = collection['uuid']
-offset = 0
-items = ''
-while items != []:
- items = requests.get(baseURL + '/rest/collections/' + str(collectionID)
- + '/items?limit=200&offset=' + str(offset),
- headers=header, cookies=cookies, verify=verify)
- while items.status_code != 200:
- time.sleep(5)
- items = requests.get(baseURL + '/rest/collections/' + str(collectionID)
- + '/items?limit=200&offset=' + str(offset),
- headers=header, cookies=cookies, verify=verify)
- items = items.json()
- for k in range(0, len(items)):
- itemID = items[k]['uuid']
- itemList.append(itemID)
- offset = offset + 200
-
-dsFunc.elapsedTime(startTime, 'Item list creation time')
-
-f = csv.writer(open(filePath + 'languageTagUpdate' + key
- + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w'))
-f.writerow(['itemID'] + ['key'])
-for number, itemID in enumerate(itemList):
- itemMetadataProcessed = []
- itemsRemaining = len(itemList) - number
- print('Items remaining: ', itemsRemaining, 'ItemID: ', itemID)
- metadata = requests.get(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header, cookies=cookies,
- verify=verify).json()
- for l in range(0, len(metadata)):
- if metadata[l]['key'] == key and metadata[l]['language'] == '':
- updatedMetadataElement = {}
- updatedMetadataElement['key'] = metadata[l]['key']
- updatedMetadataElement['value'] = metadata[l]['value']
- updatedMetadataElement['language'] = 'en_US'
- itemMetadataProcessed.append(updatedMetadataElement)
- provNote = 'The language tag for \'' + metadata[l]['key'] + ': '
- provNote += metadata[l]['value']
- provNote += '\' was changed from \'null\' to \'en_US\' '
- provNote += 'through a batch process on '
- provNote += datetime.now().strftime('%Y-%m-%d %H:%M:%S')
- provNote += '.'
- provNoteElement = {}
- provNoteElement['key'] = 'dc.description.provenance'
- provNoteElement['value'] = provNote
- provNoteElement['language'] = 'en_US'
- itemMetadataProcessed.append(provNoteElement)
- else:
- itemMetadataProcessed.append(metadata[l])
- if 'The language tag for \'' + key in json.dumps(itemMetadataProcessed):
- itemMetadataProcessed = json.dumps(itemMetadataProcessed)
- print('updated', itemID)
- delete = requests.delete(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header,
- cookies=cookies, verify=verify)
- print(delete)
- post = requests.put(baseURL + '/rest/items/' + str(itemID)
- + '/metadata', headers=header, cookies=cookies,
- verify=verify, data=itemMetadataProcessed)
- print(post)
- f.writerow([itemID] + [key])
- else:
- print('not updated', itemID)
-
-logout = requests.post(baseURL + '/rest/logout', headers=header,
- cookies=cookies, verify=verify)
-
-# print script run time
-dsFunc.elapsedTime(startTime, 'Script run time')