From dcaec831801c72b6eb0e16bfb93cbf15e643d4d2 Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Wed, 13 Nov 2019 18:06:22 -0500 Subject: [PATCH 01/22] clean up --- Pipfile | 14 - Pipfile.lock | 73 ----- README.md | 173 ----------- addKeyValuePairOnHandleCSV.py | 84 ------ addKeyValuePairToCollection.py | 135 --------- addKeyValuePairToCommunity.py | 151 ---------- addNewItemsToCollection.py | 199 ------------- checkInventory.py | 60 ---- compareTwoKeysInCommunity.py | 129 --------- countInitialedNamesByCollection.py | 116 -------- createItemMetadataFromCSV.py | 88 ------ data/.keep | 0 deleteBitstreamsFromItem.py | 73 ----- deleteKeyFromCollection.py | 120 -------- deleteKeyFromCommunity.py | 126 -------- deleteKeyValuePairFromCollection.py | 130 --------- dsFunc.py | 48 --- editBitstreamsNames.py | 104 ------- exportCollectionMetadataToCSV.py | 108 ------- exportSelectedRecordMetadataToCSV.py | 97 ------- fileListMetadataReconcile.py | 112 ------- findBogusUris.py | 74 ----- findDuplicateKeys.py | 75 ----- generateCollectionLevelAbstract.py | 93 ------ getBitstreams.py | 273 ------------------ getCollectionMetadataJson.py | 63 ---- getCompleteAndUniqueValuesForAllKeys.py | 119 -------- ...eteAndUniqueValuesForAllKeysInCommunity.py | 120 -------- getFacultyNamesFromETDs.py | 98 ------- getGlobalLanguageValues.py | 89 ------ getHandlesAndBitstreamsFromCollection.py | 95 ------ getLanguageValuesForKeys.py | 93 ------ getRecordsAndValuesForKey.py | 77 ----- getRecordsAndValuesForKeyInCollection.py | 92 ------ getRecordsWithKeyAndValue.py | 84 ------ identifyItemsMissingKeyInCommunity.py | 97 ------- metadataCollectionsKeysMatrix.py | 154 ---------- metadataOverview.py | 151 ---------- overwriteExistingMetadata.py | 116 -------- postCollection.py | 215 -------------- pull-request-template.md | 5 - removeDuplicateKeyValuePairsFromItems.py | 124 -------- replaceKey.py | 120 -------- replaceKeyForCollection.py | 132 --------- replaceKeyForCommunity.py | 139 --------- replaceKeyValuePairOnItemIdCSV.py | 92 ------ replaceKeyValuePairsFromCSV.py | 129 --------- replaceUnnecessarySpaces.py | 115 -------- replaceValueInCollection.py | 145 ---------- replaceValueInCommunityFromCSV.py | 157 ---------- repositoryMetadataBackup.py | 97 ------- repositoryMetadataRestore.py | 76 ----- splitFieldIntoMultipleFields.py | 141 --------- tests.py | 48 --- updateLanguageTagsForKey.py | 108 ------- updateLanguageTagsForKeyInCollection.py | 110 ------- 56 files changed, 6056 deletions(-) delete mode 100644 Pipfile delete mode 100644 Pipfile.lock delete mode 100644 README.md delete mode 100644 addKeyValuePairOnHandleCSV.py delete mode 100644 addKeyValuePairToCollection.py delete mode 100644 addKeyValuePairToCommunity.py delete mode 100644 addNewItemsToCollection.py delete mode 100644 checkInventory.py delete mode 100644 compareTwoKeysInCommunity.py delete mode 100644 countInitialedNamesByCollection.py delete mode 100644 createItemMetadataFromCSV.py delete mode 100644 data/.keep delete mode 100644 deleteBitstreamsFromItem.py delete mode 100644 deleteKeyFromCollection.py delete mode 100644 deleteKeyFromCommunity.py delete mode 100644 deleteKeyValuePairFromCollection.py delete mode 100644 dsFunc.py delete mode 100644 editBitstreamsNames.py delete mode 100644 exportCollectionMetadataToCSV.py delete mode 100644 exportSelectedRecordMetadataToCSV.py delete mode 100644 fileListMetadataReconcile.py delete mode 100644 findBogusUris.py delete mode 100644 findDuplicateKeys.py delete mode 100644 generateCollectionLevelAbstract.py delete mode 100644 getBitstreams.py delete mode 100644 getCollectionMetadataJson.py delete mode 100644 getCompleteAndUniqueValuesForAllKeys.py delete mode 100644 getCompleteAndUniqueValuesForAllKeysInCommunity.py delete mode 100644 getFacultyNamesFromETDs.py delete mode 100644 getGlobalLanguageValues.py delete mode 100644 getHandlesAndBitstreamsFromCollection.py delete mode 100644 getLanguageValuesForKeys.py delete mode 100644 getRecordsAndValuesForKey.py delete mode 100644 getRecordsAndValuesForKeyInCollection.py delete mode 100644 getRecordsWithKeyAndValue.py delete mode 100644 identifyItemsMissingKeyInCommunity.py delete mode 100644 metadataCollectionsKeysMatrix.py delete mode 100644 metadataOverview.py delete mode 100644 overwriteExistingMetadata.py delete mode 100644 postCollection.py delete mode 100644 pull-request-template.md delete mode 100644 removeDuplicateKeyValuePairsFromItems.py delete mode 100644 replaceKey.py delete mode 100644 replaceKeyForCollection.py delete mode 100644 replaceKeyForCommunity.py delete mode 100644 replaceKeyValuePairOnItemIdCSV.py delete mode 100644 replaceKeyValuePairsFromCSV.py delete mode 100644 replaceUnnecessarySpaces.py delete mode 100644 replaceValueInCollection.py delete mode 100644 replaceValueInCommunityFromCSV.py delete mode 100644 repositoryMetadataBackup.py delete mode 100644 repositoryMetadataRestore.py delete mode 100644 splitFieldIntoMultipleFields.py delete mode 100644 tests.py delete mode 100644 updateLanguageTagsForKey.py delete mode 100644 updateLanguageTagsForKeyInCollection.py diff --git a/Pipfile b/Pipfile deleted file mode 100644 index 4639c37..0000000 --- a/Pipfile +++ /dev/null @@ -1,14 +0,0 @@ -[[source]] -name = "pypi" -url = "https://pypi.org/simple" -verify_ssl = true - -[dev-packages] - -[packages] -requests = "*" -click = "*" -attrs = "*" - -[requires] -python_version = "3.7" diff --git a/Pipfile.lock b/Pipfile.lock deleted file mode 100644 index 1b7c2af..0000000 --- a/Pipfile.lock +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_meta": { - "hash": { - "sha256": "01dc55cd69a2df69f74a7428d6c916635a02376ce9d212768bbb2065001068d1" - }, - "pipfile-spec": 6, - "requires": { - "python_version": "3.7" - }, - "sources": [ - { - "name": "pypi", - "url": "https://pypi.org/simple", - "verify_ssl": true - } - ] - }, - "default": { - "attrs": { - "hashes": [ - "sha256:69c0dbf2ed392de1cb5ec704444b08a5ef81680a61cb899dc08127123af36a79", - "sha256:f0b870f674851ecbfbbbd364d6b5cbdff9dcedbc7f3f5e18a6891057f21fe399" - ], - "index": "pypi", - "version": "==19.1.0" - }, - "certifi": { - "hashes": [ - "sha256:046832c04d4e752f37383b628bc601a7ea7211496b4638f6514d0e5b9acc4939", - "sha256:945e3ba63a0b9f577b1395204e13c3a231f9bc0223888be653286534e5873695" - ], - "version": "==2019.6.16" - }, - "chardet": { - "hashes": [ - "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", - "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691" - ], - "version": "==3.0.4" - }, - "click": { - "hashes": [ - "sha256:2335065e6395b9e67ca716de5f7526736bfa6ceead690adf616d925bdc622b13", - "sha256:5b94b49521f6456670fdb30cd82a4eca9412788a93fa6dd6df72c94d5a8ff2d7" - ], - "index": "pypi", - "version": "==7.0" - }, - "idna": { - "hashes": [ - "sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407", - "sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c" - ], - "version": "==2.8" - }, - "requests": { - "hashes": [ - "sha256:11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4", - "sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31" - ], - "index": "pypi", - "version": "==2.22.0" - }, - "urllib3": { - "hashes": [ - "sha256:b246607a25ac80bedac05c6f282e3cdaf3afb65420fd024ac94435cabe6e18d1", - "sha256:dbe59173209418ae49d485b87d1681aefa36252ee85884c31346debd19463232" - ], - "version": "==1.25.3" - } - }, - "develop": {} -} diff --git a/README.md b/README.md deleted file mode 100644 index 1bdff78..0000000 --- a/README.md +++ /dev/null @@ -1,173 +0,0 @@ -# dspace-api - -This repository was created from the merger of https://github.com/ehanson8/dspace-editing and https://github.com/ehanson8/dspace-data-collection, both of which have been archived. All further development will occur in this repository. - -**Note**: Upgraded to Python 3 in 02/2019. - -**Note**: These scripts were updated in 05/2018 for the new authentication method used by DSpace 6.x - -All of these scripts require a secrets.py file in the same directory that must contain the following text: -``` - baseURL='https://dspace.myuni.edu' - email='dspace_user@.myuni.edu' - password='my_dspace_password' - filePath = '/Users/dspace_user/dspace-data-collection/data/' - verify = True or False (no quotes). Use False if using an SSH tunnel to connect to the DSpace API - skipColl = A list of the 'uuid' of any collections that you wish the script to skip. (e.g. ['45794375-6640-4efe-848e-082e60bae375']) -``` -The 'filePath' is directory into which output files will be written and 'handlePrefix' may or may not vary from your DSpace URL depending on your configuration. This secrets.py file will be ignored according to the repository's .gitignore file so that DSpace login details will not be inadvertently exposed through GitHub. - -If you are using both a development server and a production server, you can create a separate secrets.py file with a different name (e.g. secretsProd.py) and containing the production server information. When running each of these scripts, you will be prompted to enter the file name (e.g 'secretsProd' without '.py') of an alternate secrets file. If you skip the prompt or incorrectly type the file name, the scripts will default to the information in the secrets.py file. This ensures that you will only edit the production server if you really intend to. - -#### [addKeyValuePairOnHandleCSV.py](addKeyValuePairOnHandleCSV.py) -Based on user input, adds key-value pairs from a specified CSV file of DSpace item handles and the value to be added to that item using the specified key. A CSV log is written with all of the changes made and a 'dc.description.provenance' note describing the change is added to the metadata of each item that is updated. - -#### [addKeyValuePairToCollection.py](addKeyValuePairToCollection.py) -Based on user input, adds a specified key-value pair with a specified language value to every item in the collection with the specified handle. - -#### [addKeyValuePairToCommunity.py](addKeyValuePairToCommunity.py) -Based on user input, adds a specified key-value pair with a specified language value to every item in every collection in the community with the specified handle. - -#### [addNewItemsToCollection.py](addNewItemsToCollection.py) -Based on user input, adds new items to the specified collection. In the specified directory, the script creates items and associated metadata based on a 'metadataNewFiles.json' file in the directory. The script then posts files for the appropriate items, which is determined by having the file name (minus the file extension) in a 'dc.identifier.other' field in the item metadata record. - -#### [compareTwoKeysInCommunity.py](compareTwoKeysInCommunity.py) -Based on user input, extracts the values of two specified keys from a specified community to a CSV file for comparison. - -#### [countInitialedNamesByCollection.py](countInitialedNamesByCollection.py) -Based on [mjanowiecki's](https://github.com/mjanowiecki) [findInitialedNamesByCollection.py](https://github.com/mjanowiecki/dspace-data-collection/blob/master/findInitialedNamesByCollection.py), find values in name fields that appear to have first initials that could be expanded to full names and provides a count for each collection when the count is more than zero. - -#### [createItemMetadataFromCSV.py](createItemMetadataFromCSV.py) -Based on user input, creates a JSON file of metadata that can be added to a DSpace item from the specified CSV file or from values directly specified in the script. The 'createMetadataElementCSV' function in the script is used to create a metadata element from the specified CSV file and has three variables: - -- 'key' - The Dublin Core property to be used for the element. -- 'value' - The column in the specified CSV file that contains the data for the element. -- 'language' - The desired language value for the element - -The 'createMetadataElementDirect' function in the script is used to create a metadata element without a CSV file (intended for metadata elements that will be constant across all items in a collection) and has three variables: - -- 'key' - The Dublin Core property to be used for the element. -- 'value' - The actual value of the element. -- 'language' - The desired language value for the element. - -#### [deleteBitstreamsFromItem.py](deleteBitstreamsFromItem.py) -Based on user input, removes all bitstreams associated with an item with the specified handle. - -#### [deleteKeyFromCollection.py](deleteKeyFromCollection.py) -Based on user input, removes all key-value pairs with the specified key for every item in the collection with the specified handle. - -#### [deleteKeyFromCommunity.py](deleteKeyFromCommunity.py) -Based on user input, removes all key-value pairs with the specified key for every item in every collection in the community with the specified handle. - -#### [deleteKeyValuePairFromCollection.py](deleteKeyValuePairFromCollection.py) -Based on user input, removes all key-value pairs with the specified key and value for every item in the collection with the specified handle. - -#### [editBitstreamsNames.py](editBitstreamsNames.py) -Based on a specified CSV file of DSpace item handles and replacement file names, replaces the name of bitstreams attached to the specified items. - -#### [exportSelectedRecordMetadataToCSV.py](exportSelectedRecordMetadataToCSV.py) -Based a CSV of item handles, extracts all metadata (except 'dc.description.provenance' values) from the selected items to a CSV file. - -#### [findBogusUris.py](findBogusUris.py) -Extracts the item ID and the value of the key 'dc.identifier.uri' to a CSV file when the value does not begin with the handlePrefix specified in the secrets.py file. - -#### [findDuplicateKeys.py](findDuplicateKeys.py) -Based on user input, extracts item IDs to a CSV file where there are multiple instances of the specified key in the item metadata. - -#### [generateCollectionLevelAbstract.py](generateCollectionLevelAbstract.py) -Based on user input, creates an HTML collection-level abstract that contains hyperlinks to all of the items in each series, as found in the metadata CSV. This assumes that the series title is recorded in 'dc.relation.ispartof' or a similar property in the DSpace item records. The abstract is then posted to the collection in DSpace. - -#### [getCollectionMetadataJson.py](getCollectionMetadataJson.py) -Based on user input, extracts all of the item metadata from the specified collection to a JSON file. - -#### [getCompleteAndUniqueValuesForAllKeys.py](getCompleteAndUniqueValuesForAllKeys.py) -Creates a 'completeValueLists' folder and for all keys used in the repository, extracts all values for a particular key to a CSV with item IDs. It also creates a 'uniqueValueLists' folder, that writes a CSV file for each key with all unique values and a count of how many times the value appears. - -#### [getCompleteAndUniqueValuesForAllKeysInCommunity.py](getCompleteAndUniqueValuesForAllKeysInCommunity.py) -Creates a 'completeValueLists' folder and for all keys used in the specified community, extracts all values for a particular key to a CSV with item IDs. It also creates a 'uniqueValueLists' folder, that writes a CSV file for each key with all unique values and a count of how many times the value appears. - -#### [getFacultyNamesFromETDs.py](getFacultyNamesFromETDs.py) -Based on user input, extracts all values from 'dc.contributor.advisor' and 'dc.contributor.committeeMember' fields from items in collections in the specified community. - -#### [getGlobalLanguageValues.py](getGlobalLanguageValues.py) -Extracts all unique language values used by metadata entries in the repository to a CSV file. - -#### [getHandlesAndBitstreamsFromCollection.py](getHandlesAndBitstreamsFromCollection.py) -Based on user input, extracts all the handles and bitstreams associated with the items in the specified collection to a CSV file. - -#### [getLanguageValuesForKeys.py](getLanguageValuesForKeys.py) -Extracts all unique pairs of keys and language values used by metadata entries in the repository to a CSV file. - -#### [getRecordsAndValuesForKey.py](getRecordsAndValuesForKey.py) -Based on user input, extracts the ID and URI for all items in the repository with the specified key, as well as the value of the specified key, to a CSV file. - -#### [getRecordsAndValuesForKeyInCollection.py](getRecordsAndValuesForKeyInCollection.py) -Based on user input, extracts the ID and URI for all items in the specified collection with the specified key, as well as the value of the specified key, to a CSV file. - -#### [getRecordsWithKeyAndValue.py](getRecordsWithKeyAndValue.py) -Based on user input, extracts the ID and URI for all items in the repository with the specified key-value pair to a CSV file. - -#### [identifyItemsMissingKeyInCommunity.py](identifyItemsMissingKeyInCommunity.py) -Based on user input, extracts the IDs of items from a specified community that do not have the specified key. - -#### [metadataCollectionsKeysMatrix.py](metadataCollectionsKeysMatrix.py) -Creates a matrix containing a count of each time a key appears in each collection in the repository. - -#### [metadataOverview.py](metadataOverview.py) -Produces several CSV files containing different information about the structure and metadata of the repository: - -|File Name |Description| -|--------------------------|--------------------------------------------------------------------------| -|collectionMetadataKeys.csv | A list of all keys used in each collection with collection name, ID, and handle.| -|dspaceIDs.csv | A list of every item ID along with the IDs of the collection and community that contains that item.| -|dspaceTypes.csv | A list of all unique values for the key 'dc.type.'| -|keyCount.csv | A list of all unique keys used in the repository, as well as a count of how many times it appear.| -|collectionStats.csv | A list of all collections in the repository with the collection name, ID, handle, and number of items.| - -#### [overwriteExistingMetadata.py](overwriteExistingMetadata.py) -Based on a specified CSV file of DSpace item handles and file identifiers, replaces the metadata of the items with specified handles with the set of metadata elements associated with the corresponding file identifier in a JSON file of metadata entries named 'metadataOverwrite.json.' - -#### [postCollection.py](postCollection.py) -Based on user input, creates a collection with a specified name within the specified community. In the specified directory, the script creates items and associated metadata based on a 'collectionMetadata.json' file in the directory. Based on the specified file extension, the script then posts each file in the directory with that extension as a bitstream for the appropriate item, which is determined by having the file name (minus the file extension) in a 'dc.identifier.other' field in the item metadata record. - -#### [removeDuplicateKeyValuePairsFromItems.py](removeDuplicateKeyValuePairsFromItems.py) -Finds all items with duplicate key-value pairs and removes the duplicates. A CSV log is written with all of the changes made and a 'dc.description.provenance' note describing the change is added to the metadata of each item that is updated. - -#### [replaceKey.py](replaceKey.py) -Based on user input, replaces one specified key with another specified key in all item metadata across the repository. A CSV log is written with all of the changes made and a 'dc.description.provenance' note describing the change is added to the metadata of each item that is updated. - -#### [replaceKeyForCollection.py](replaceKeyForCollection.py) -Based on user input, replaces one specified key with another specified key in all item metadata across the specified collection. A CSV log is written with all of the changes made and a 'dc.description.provenance' note describing the change is added to the metadata of each item that is updated. - -#### [replaceKeyForCommunity.py](replaceKeyForCommunity.py) -Based on user input, replaces one specified key with another specified key in all item metadata across the specified community. A CSV log is written with all of the changes made and a 'dc.description.provenance' note describing the change is added to the metadata of each item that is updated. - -#### [replaceKeyValuePairOnItemIdCSV.py](replaceKeyValuePairOnItemIdCSV.py) -Based on user input, updates key-value pairs on the specified items from the specified CSV file with the columns: 'replacedKey,' 'replacementKey,' 'replacedValue,' 'replacementValue,' and 'itemID.' A CSV log is written with all of the changes made and a 'dc.description.provenance' note describing the change is added to the metadata of each item that is updated. - -#### [replaceKeyValuePairsFromCSV.py](replaceKeyValuePairsFromCSV.py) -Based on user input, updates key-value pairs from the specified CSV file with the columns: 'replacedKey,' 'replacementKey,' 'replacedValue,' and 'replacementValue.' A CSV log is written with all of the changes made and a 'dc.description.provenance' note describing the change is added to the metadata of each item that is updated. - -#### [replaceUnnecessarySpaces.py](replaceUnnecessarySpaces.py) -Based on user input, removes double spaces, triple spaces, and spaces before commas in the values from the specified key in the specified community. - -#### [replaceValueInCollection.py](replaceValueInCollection.py) -Based on user input, replaces a specified value with another specified value in all item metadata across the specified collection. A CSV log is written with all of the changes made and a 'dc.description.provenance' note describing the change is added to the metadata of each item that is updated. - -#### [replaceValueInCommunityFromCSV.py](replaceValueInCommunityFromCSV.py) -Based on a user specified CSV, replaces specified values in the specified community with specified replacement values. A CSV log is written with all of the changes made and a 'dc.description.provenance' note describing the change is added to the metadata of each item that is updated. - -#### [repositoryMetadataBackup.py](repositoryMetadataBackup.py) -Creates a folder with a timestamp in the folder name and creates a JSON file for every collection in the repository with the metadata for all of the items in that collection. - -#### [repositoryMetadataRestore.py](repositoryMetadataRestore.py) -Based on user input, restores the metadata from a specified backup folder that was created by the repositoryMetadataBackup.py script. - -#### [splitFieldIntoMultipleFields.py](splitFieldIntoMultipleFields.py) -Based on a user specified CSV, replaces a single field with multiple values into multiple fields which each contain a single value. - -#### [updateLanguageTagsForKey.py](updateLanguageTagsForKey.py) -Based on user input, updates the language value for the specified key to 'en_us' for all items with that key in the repository. A CSV log is written with all of the changes made and a 'dc.description.provenance' note describing the change is added to the metadata of each item that is updated. - -#### [updateLanguageTagsForKeyInCollection.py](updateLanguageTagsForKeyInCollection.py) -Based on user input, updates the language value for the specified key to 'en_us' for all items with that key in the specified collection. A CSV log is written with all of the changes made and a 'dc.description.provenance' note describing the change is added to the metadata of each item that is updated. diff --git a/addKeyValuePairOnHandleCSV.py b/addKeyValuePairOnHandleCSV.py deleted file mode 100644 index bb7a024..0000000 --- a/addKeyValuePairOnHandleCSV.py +++ /dev/null @@ -1,84 +0,0 @@ -import json -import requests -import time -import csv -from datetime import datetime -import urllib3 -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, - verify=verify, params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -fileName = filePath + input('Enter fileName (including \'.csv\'): ') -addedKey = input('Enter key: ') -startTime = time.time() - -date = datetime.now().strftime('%Y-%m-%d %H.%M.%S') -f = csv.writer(open(filePath + 'addKeyValuePair' + date + '.csv', 'w')) -f.writerow(['itemID'] + ['addedKey'] + ['addedValue'] + ['delete'] + ['post']) - -with open(fileName) as csvfile: - reader = csv.DictReader(csvfile) - for row in reader: - addedValue = row['value'] - handle = row['handle'].strip() - addedMetadataElement = {} - addedMetadataElement['key'] = addedKey - addedMetadataElement['value'] = addedValue - addedMetadataElement['language'] = 'en_us' - endpoint = baseURL + '/rest/handle/' + handle - item = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() - itemID = item['uuid'] - itemMetadata = requests.get(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, - cookies=cookies, verify=verify).json() - itemMetadata.append(addedMetadataElement) - itemMetadataProcessed = itemMetadata - - date = datetime.now().strftime('%Y-%m-%d %H:%M:%S') - provNote = '\'' + addedKey + ': ' + addedValue - provNote += '\' was added through a batch process on ' - provNote += date + '.' - provNoteElement = {} - provNoteElement['key'] = 'dc.description.provenance' - provNoteElement['value'] = provNote - provNoteElement['language'] = 'en_US' - itemMetadataProcessed.append(provNoteElement) - - itemMetadataProcessed = json.dumps(itemMetadataProcessed) - delete = requests.delete(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, - cookies=cookies, verify=verify) - print(delete) - post = requests.put(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, cookies=cookies, - verify=verify, data=itemMetadataProcessed) - print(post) - f.writerow([itemID] + [addedMetadataElement['key']] - + [addedMetadataElement['value']] + [delete] + [post]) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/addKeyValuePairToCollection.py b/addKeyValuePairToCollection.py deleted file mode 100644 index d5a5367..0000000 --- a/addKeyValuePairToCollection.py +++ /dev/null @@ -1,135 +0,0 @@ -import json -import requests -import time -import csv -from datetime import datetime -import urllib3 -import argparse -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-k', '--key', help='the key to be added. optional - if ' - 'not provided, the script will ask for input') -parser.add_argument('-v', '--value', help='the value to be added. optional - ' - 'if not provided, the script will ask for input') -parser.add_argument('-l', '--language', help='the language tag to be added. ' - 'optional - if not provided, the script will ask for ' - 'input') -parser.add_argument('-i', '--handle', help='handle of the collection. ' - 'optional - if not provided, the script will ask for ' - 'input') -args = parser.parse_args() - -if args.key: - addedKey = args.key -else: - addedKey = input('Enter the key: ') -if args.value: - addedValue = args.value -else: - addedValue = input('Enter the value: ') -if args.language: - addedLanguage = args.language -else: - addedLanguage = input('Enter the language tag: ') -if args.handle: - handle = args.handle -else: - handle = input('Enter collection handle: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, - verify=verify, params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -itemList = [] -endpoint = baseURL + '/rest/handle/' + handle -collection = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -collectionID = collection['uuid'] -offset = 0 -items = '' -while items != []: - items = requests.get(baseURL + '/rest/collections/' + str(collectionID) - + '/items?limit=200&offset=' + str(offset), - headers=header, cookies=cookies, verify=verify) - while items.status_code != 200: - time.sleep(5) - items = requests.get(baseURL + '/rest/collections/' + str(collectionID) - + '/items?limit=200&offset=' + str(offset), - headers=header, cookies=cookies, verify=verify) - items = items.json() - for k in range(0, len(items)): - itemID = items[k]['uuid'] - itemList.append(itemID) - offset = offset + 200 - -dsFunc.elapsedTime(startTime, 'Item list creation time') - -recordsEdited = 0 -f = csv.writer(open(filePath + 'addKeyValuePair' - + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w')) -f.writerow(['itemID'] + ['addedKey'] + ['addedValue'] + ['delete'] + ['post']) -for number, itemID in enumerate(itemList): - itemsRemaining = len(itemList) - number - print('Items remaining: ', itemsRemaining, 'ItemID: ', itemID) - metadata = requests.get(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, cookies=cookies, - verify=verify).json() - itemMetadataProcessed = [] - for l in range(0, len(metadata)): - metadata[l].pop('schema', None) - metadata[l].pop('element', None) - metadata[l].pop('qualifier', None) - itemMetadataProcessed.append(metadata[l]) - addedMetadataElement = {} - addedMetadataElement['key'] = addedKey - addedMetadataElement['value'] = addedValue - addedMetadataElement['language'] = addedLanguage - itemMetadataProcessed.append(addedMetadataElement) - provNote = ('\'' + addedKey + ': ' + addedValue + '\' was added through a ' - + 'batch process on ' - + datetime.now().strftime('%Y-%m-%d %H:%M:%S') + '.') - provNoteElement = {} - provNoteElement['key'] = 'dc.description.provenance' - provNoteElement['value'] = provNote - provNoteElement['language'] = 'en_US' - itemMetadataProcessed.append(provNoteElement) - recordsEdited = recordsEdited + 1 - itemMetadataProcessed = json.dumps(itemMetadataProcessed) - print('updated', itemID, recordsEdited) - delete = requests.delete(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, cookies=cookies, - verify=verify) - print(delete) - post = requests.put(baseURL + '/rest/items/' + str(itemID) + '/metadata', - headers=header, cookies=cookies, verify=verify, - data=itemMetadataProcessed) - print(post) - f.writerow([itemID] + [addedKey] + [addedValue] + [delete] + [post]) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/addKeyValuePairToCommunity.py b/addKeyValuePairToCommunity.py deleted file mode 100644 index ec679bb..0000000 --- a/addKeyValuePairToCommunity.py +++ /dev/null @@ -1,151 +0,0 @@ -import json -import requests -import time -import csv -from datetime import datetime -import urllib3 -import argparse -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-k', '--key', help='the key to be added. optional - if ' - 'not provided, the script will ask for input') -parser.add_argument('-v', '--value', help='the value to be added. optional - ' - 'if not provided, the script will ask for input') -parser.add_argument('-l', '--language', help='the language tag to be added. ' - 'optional - if not provided, the script will ask for ' - 'input') -parser.add_argument('-i', '--handle', help='handle of the community. optional ' - '- if not provided, the script will ask for input') -args = parser.parse_args() - -if args.key: - addedKey = args.key -else: - addedKey = input('Enter the key: ') -if args.value: - addedValue = args.value -else: - addedValue = input('Enter the value: ') -if args.language: - addedLanguage = args.language -else: - addedLanguage = input('Enter the language tag: ') -if args.handle: - handle = args.handle -else: - handle = input('Enter community handle: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -itemList = [] -endpoint = baseURL + '/rest/handle/' + handle -community = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -communityID = community['uuid'] - -collections = requests.get(baseURL + '/rest/communities/' + str(communityID) - + '/collections', headers=header, cookies=cookies, - verify=verify).json() -for j in range(0, len(collections)): - collectionID = collections[j]['uuid'] - if collectionID not in skipColl: - offset = 0 - items = '' - while items != []: - items = requests.get(baseURL + '/rest/collections/' - + str(collectionID) - + '/items?limit=200&offset=' + str(offset), - headers=header, cookies=cookies, - verify=verify) - while items.status_code != 200: - time.sleep(5) - items = requests.get(baseURL + '/rest/collections/' - + str(collectionID) - + '/items?limit=200&offset=' - + str(offset), headers=header, - cookies=cookies, verify=verify) - items = items.json() - for k in range(0, len(items)): - itemID = items[k]['uuid'] - itemList.append(itemID) - offset = offset + 200 - -dsFunc.elapsedTime(startTime, 'Item list creation time') - -recordsEdited = 0 -f = csv.writer(open(filePath + 'addKeyValuePair' - + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w')) -f.writerow(['itemID'] + ['addedKey'] + ['addedValue'] + ['delete'] + ['post']) -for number, itemID in enumerate(itemList): - itemsRemaining = len(itemList) - number - print('Items remaining: ', itemsRemaining, 'ItemID: ', itemID) - metadata = requests.get(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, cookies=cookies, - verify=verify).json() - itemMetadataProcessed = [] - changeRecord = True - for metadataElement in metadata: - key = metadataElement['key'] - value = metadataElement['value'] - if key == addedKey and value == addedValue: - changeRecord = False - metadataElement.pop('schema', None) - metadataElement.pop('element', None) - metadataElement.pop('qualifier', None) - itemMetadataProcessed.append(metadataElement) - if changeRecord is True: - addedMetadataElement = {} - addedMetadataElement['key'] = addedKey - addedMetadataElement['value'] = addedValue - addedMetadataElement['language'] = addedLanguage - itemMetadataProcessed.append(addedMetadataElement) - provNote = '\'' + addedKey + ': ' + addedValue - provNote += '\' was added through a batch process on ' - provNote += datetime.now().strftime('%Y-%m-%d %H:%M:%S') + '.' - provNoteElement = {} - provNoteElement['key'] = 'dc.description.provenance' - provNoteElement['value'] = provNote - provNoteElement['language'] = 'en_US' - itemMetadataProcessed.append(provNoteElement) - recordsEdited = recordsEdited + 1 - itemMetadataProcessed = json.dumps(itemMetadataProcessed) - print('updated', itemID, recordsEdited) - delete = requests.delete(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, - cookies=cookies, verify=verify) - print(delete) - post = requests.put(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, cookies=cookies, - verify=verify, data=itemMetadataProcessed) - print(post) - f.writerow([itemID] + [addedKey] + [addedValue] + [delete] + [post]) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/addNewItemsToCollection.py b/addNewItemsToCollection.py deleted file mode 100644 index 15a7e9a..0000000 --- a/addNewItemsToCollection.py +++ /dev/null @@ -1,199 +0,0 @@ -import json -import requests -import datetime -import time -import os -import csv -import urllib3 -import collections -import argparse -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-d', '--directory', help='the directory of files to be ' - 'ingested. optional - if not provided, the script will ' - 'ask for input') -parser.add_argument('-e', '--fileExtension', help='the extension of files to ' - 'be ingested. optional - if not provided, the script will ' - 'ask for input') -parser.add_argument('-i', '--handle', help='handle of the object to retreive. ' - 'optional - if not provided, the script will ask for ' - 'input') -args = parser.parse_args() - -if args.directory: - directory = args.directory -else: - directory = input('Enter directory name: ') -if args.fileExtension: - fileExtension = args.fileExtension -else: - fileExtension = '.' + input('Enter file extension: ') -if args.handle: - handle = args.handle -else: - handle = input('Enter handle: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() - -# ccreate file list and export csv -fileList = {} -for root, dirs, files in os.walk(directory, topdown=True): - for file in files: - if file.endswith(fileExtension): - fullFilePath = os.path.join(root, file).replace('\\', '/') - fileList[file[:file.index('.')]] = fullFilePath - -dsFunc.elapsedTime(startTime, 'File list creation time') - -f = csv.writer(open(handle.replace('/', '-') + 'addedFilesList.csv', 'w')) -f.writerow(['itemID']) - -for k, v in fileList.items(): - f.writerow([v[v.rindex('/') + 1:]]) -counter = len(fileList) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) -headerFileUpload = {'accept': 'application/json'} - -# Get collection ID -endpoint = baseURL + '/rest/handle/' + handle -collection = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -collectionID = str(collection['uuid']) -print(collectionID) - -# Post items -collectionMetadata = json.load(open(directory + '/' + 'metadataNewFiles.json')) -for itemMetadata in collectionMetadata: - counter = counter - 1 - print('Items remaining: ', counter) - fileExists = '' - updatedItemMetadata = {} - updatedItemMetadataList = [] - for element in itemMetadata['metadata']: - if element['key'] == 'fileIdentifier': - fileIdentifier = element['value'] - else: - updatedItemMetadataList.append(element) - updatedItemMetadata['metadata'] = updatedItemMetadataList - updatedItemMetadata = json.dumps(updatedItemMetadata) - for k in fileList: - if fileIdentifier in k: - fileExists = True - if fileExists is True: - print(fileIdentifier) - post = requests.post(baseURL + '/rest/collections/' + collectionID - + '/items', headers=header, cookies=cookies, - verify=verify, data=updatedItemMetadata).json() - print(json.dumps(post)) - itemID = post['link'] - - # #Post bitstream - front and back - # for k, v in fileList.items(): - # if k == fileIdentifier + '-Front': - # bitstream = fileList[k] - # fileName = bitstream[bitstream.rfind('/') + 1:] - # data = open(bitstream, 'rb') - # post = requests.post(baseURL + itemID + '/bitstreams?name=' - # + fileName, headers=headerFileUpload, - # cookies=cookies, verify=verify, - # data=data).json() - # print(post) - # - # for k, v in fileList.items(): - # if k == fileIdentifier + '-Back': - # bitstream = fileList[k] - # fileName = bitstream[bitstream.rfind('/') + 1:] - # data = open(bitstream, 'rb') - # post = requests.post(baseURL + itemID + '/bitstreams?name=' - # + fileName, headers=headerFileUpload, - # cookies=cookies, verify=verify, - # data=data).json() - # print(post) - - # Post bitstream - starts with file identifier - orderedFileList = collections.OrderedDict(sorted(fileList.items())) - for k, v in orderedFileList.items(): - if k.startswith(fileIdentifier): - bitstream = orderedFileList[k] - fileName = bitstream[bitstream.rfind('/') + 1:] - print(fileName) - data = open(bitstream, 'rb') - post = requests.post(baseURL + itemID + '/bitstreams?name=' - + fileName, headers=headerFileUpload, - cookies=cookies, verify=verify, - data=data).json() - print(post) - - # Create provenance notes - provNote = {} - provNote['key'] = 'dc.description.provenance' - provNote['language'] = 'en_US' - utc = datetime.datetime.utcnow() - utcTime = utc.strftime('%Y-%m-%dT%H:%M:%SZ') - bitstreams = requests.get(baseURL + itemID + '/bitstreams', headers=header, - cookies=cookies, verify=verify).json() - bitstreamCount = len(bitstreams) - provNoteValue = 'Submitted by ' + userFullName + ' (' + email + ') on ' - provNoteValue = provNoteValue + utcTime + ' (GMT). No. of bitstreams: ' - provNoteValue = provNoteValue + str(bitstreamCount) - for bitstream in bitstreams: - fileName = bitstream['name'] - size = str(bitstream['sizeBytes']) - checksum = bitstream['checkSum']['value'] - algorithm = bitstream['checkSum']['checkSumAlgorithm'] - provNoteValue = provNoteValue + ' ' + fileName + ': ' + size - provNoteValue = provNoteValue + ' bytes, checkSum: ' + checksum - provNoteValue = provNoteValue + ' (' + algorithm + ')' - provNote['value'] = provNoteValue - - provNote2 = {} - provNote2['key'] = 'dc.description.provenance' - provNote2['language'] = 'en_US' - provNote2Value = 'Made available in DSpace on ' + utcTime - provNote2Value = provNote2Value + ' (GMT). No. of bitstreams: ' - provNote2Value = provNote2Value + str(bitstreamCount) - for bitstream in bitstreams: - fileName = bitstream['name'] - size = str(bitstream['sizeBytes']) - checksum = bitstream['checkSum']['value'] - algorithm = bitstream['checkSum']['checkSumAlgorithm'] - provNote2Value = provNote2Value + ' ' + fileName + ': ' + size - provNote2Value = provNote2Value + ' bytes, checkSum: ' + checksum - provNote2Value = provNote2Value + ' (' + algorithm + ')' - provNote2['value'] = provNote2Value - - # Post provenance notes - provNote = json.dumps([provNote, provNote2]) - post = requests.put(baseURL + itemID + '/metadata', headers=header, - cookies=cookies, verify=verify, data=provNote) - print(post) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/checkInventory.py b/checkInventory.py deleted file mode 100644 index a2a8e2a..0000000 --- a/checkInventory.py +++ /dev/null @@ -1,60 +0,0 @@ -import argparse -import pandas as pd -import os - - -def main(): - """Define main function.""" - # begin: argument parsing - parser = argparse.ArgumentParser() - - parser.add_argument('-i', '--inventory', required=True, - help='csv file containing the inventory. the path, if ' - 'given, can be absolute or relative to this script') - - parser.add_argument('-d', '--dataDir', - help='directory containing the data. if omitted, data ' - 'will be read from the directory containing the ' - 'inventory file') - - parser.add_argument('-f', '--field', - help='field in the csv containing the fileNames. ' - 'default: name') - - parser.add_argument('-v', '--verbose', action='store_true', - help='increase output verbosity') - - args = parser.parse_args() - - if not args.dataDir: - (args.dataDir, null) = os.path.split(args.inventory) - - if not args.field: - args.field = 'name' - - if args.verbose: - print('verbosity turned on') - print('reading inventory from {}'.format(args.inventory)) - print('fileNames read from field named {}'.format(args.field)) - print('searching for files in {}'.format(args.dataDir)) - # end: argument parsing - - inventory = pd.read_csv(args.inventory, usecols=[args.field]) - fileNames = inventory[args.field] - foundfiles = 0 - missingfiles = 0 - for fileName in fileNames: - if os.path.isfile(args.dataDir + '/' + fileName): - if args.verbose: - print('{} is not missing'.format(fileName)) - foundfiles += 1 - else: - print('{} is missing'.format(fileName)) - missingfiles += 1 - - print('{} files found and {} files \ - missing'.format(foundfiles, missingfiles)) - - -if __name__ == "__main__": - main() diff --git a/compareTwoKeysInCommunity.py b/compareTwoKeysInCommunity.py deleted file mode 100644 index 7d68180..0000000 --- a/compareTwoKeysInCommunity.py +++ /dev/null @@ -1,129 +0,0 @@ -import requests -import csv -import time -import urllib3 -import argparse -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-1', '--key', help='the first key to be output. ' - 'optional - if not provided, the script will ask for ' - 'input') -parser.add_argument('-2', '--key2', help='the second key to be output. ' - 'optional - if not provided, the script will ask for ' - 'input') -parser.add_argument('-i', '--handle', help='handle of the community to ' - 'retreive. optional - if not provided, the script will ' - 'ask for input') -args = parser.parse_args() - -if args.key: - key = args.key -else: - key = input('Enter first key: ') -if args.key2: - key2 = args.key2 -else: - key2 = input('Enter second key: ') -if args.handle: - handle = args.handle -else: - handle = input('Enter community handle: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -endpoint = baseURL + '/rest/handle/' + handle -community = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -communityID = community['uuid'] - -itemList = [] -endpoint = baseURL + '/rest/communities' -collections = requests.get(baseURL + '/rest/communities/' + str(communityID) - + '/collections', headers=header, cookies=cookies, - verify=verify).json() -for j in range(0, len(collections)): - collectionID = collections[j]['uuid'] - print(collectionID) - if collectionID not in skipColl: - offset = 0 - items = '' - while items != []: - items = requests.get(baseURL + '/rest/collections/' - + str(collectionID) - + '/items?limit=200&offset=' + str(offset), - headers=header, cookies=cookies, - verify=verify) - while items.status_code != 200: - time.sleep(5) - items = requests.get(baseURL + '/rest/collections/' - + str(collectionID) - + '/items?limit=200&offset=' - + str(offset), headers=header, - cookies=cookies, verify=verify) - items = items.json() - for k in range(0, len(items)): - itemID = items[k]['uuid'] - itemList.append(itemID) - offset = offset + 200 - print(offset) - -dsFunc.elapsedTime(startTime, 'Item list creation time') - -valueList = [] -for number, itemID in enumerate(itemList): - itemsRemaining = len(itemList) - number - print('Items remaining: ', itemsRemaining, 'ItemID: ', itemID) - metadata = requests.get(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, cookies=cookies, - verify=verify).json() - itemTuple = (itemID,) - tupleValue1 = '' - tupleValue2 = '' - for l in range(0, len(metadata)): - if metadata[l]['key'] == key: - metadataValue = metadata[l]['value'] - tupleValue1 = metadataValue - if metadata[l]['key'] == key2: - metadataValue = metadata[l]['value'] - tupleValue2 = metadataValue - itemTuple = itemTuple + (tupleValue1, tupleValue2) - valueList.append(itemTuple) - print(itemTuple) -print(valueList) - -dsFunc.elapsedTime(startTime, 'Value list creation time') - -f = csv.writer(open(filePath + key + '-' + key2 + 'Values.csv', 'w')) -f.writerow(['itemID'] + [key] + [key2]) -for i in range(0, len(valueList)): - f.writerow([valueList[i][0]] + [valueList[i][1]] + [valueList[i][2]]) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/countInitialedNamesByCollection.py b/countInitialedNamesByCollection.py deleted file mode 100644 index 64e79ca..0000000 --- a/countInitialedNamesByCollection.py +++ /dev/null @@ -1,116 +0,0 @@ -import requests -import csv -import re -import time -import urllib3 -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -collectionIds = [] -endpoint = baseURL + '/rest/communities' -communities = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -for community in communities: - communityID = community['uuid'] - collections = requests.get(baseURL + '/rest/communities/' - + str(communityID) + '/collections', - headers=header, cookies=cookies, - verify=verify).json() - for collection in collections: - collectionID = collection['uuid'] - if collectionID not in skipColl: - collectionIds.append(collectionID) - -names = [] -keys = ['dc.contributor.advisor', 'dc.contributor.author', - 'dc.contributor.committeeMember', 'dc.contributor.editor', - 'dc.contributor.illustrator', 'dc.contributor.other', 'dc.creator'] - -f = csv.writer(open('initialCountInCollection.csv', 'w')) -f.writerow(['collectionName'] + ['handle'] + ['initialCount']) - -for number, collectionID in enumerate(collectionIds): - initialCount = 0 - collectionsRemaining = len(collectionIds) - number - print(collectionID, 'Collections remaining: ', collectionsRemaining) - collection = requests.get(baseURL + '/rest/collections/' - + str(collectionID), headers=header, - cookies=cookies, verify=verify).json() - collectionName = collection['name'] - collectionHandle = collection['handle'] - collSels = '&collSel[]=' + collectionID - offset = 0 - recordsEdited = 0 - items = '' - regexCI = r'(\s|,|[A-Z]|([A-Z]\.))[A-Z](\s|$|\.|,)' - regexMI = r'((\w{2,},\s)|(\w{2,},))\w[a-z] + ' - regexPR = r'\(|\)' - while items != []: - for key in keys: - endpoint = baseURL + '/rest/filtered-items?query_field[]=' + key - endpoint += '&query_op[]=exists&query_val[]=' + collSels - endpoint += '&limit=100&offset=' + str(offset) - print(endpoint) - response = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() - items = response['items'] - for item in items: - itemLink = item['link'] - metadata = requests.get(baseURL + itemLink + '/metadata', - headers=header, cookies=cookies, - verify=verify).json() - for metadata_element in metadata: - if metadata_element['key'] == key: - indvdl_nm = metadata_element['value'] - for metadata_element in metadata: - if metadata_element['key'] == 'dc.identifier.uri': - uri = metadata_element['value'] - contains_initials = re.search(regexCI, - indvdl_nm) - contains_middleinitial = re.search(regexMI, - indvdl_nm) - contains_parentheses = re.search(regexPR, - indvdl_nm) - if contains_middleinitial: - continue - elif contains_parentheses: - continue - elif contains_initials: - initialCount += 1 - else: - continue - offset = offset + 200 - print(offset) - if initialCount > 0: - f.writerow([collectionName] + [baseURL + '/' + collectionHandle] - + [str(initialCount).zfill(6)]) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/createItemMetadataFromCSV.py b/createItemMetadataFromCSV.py deleted file mode 100644 index 8d6b7bd..0000000 --- a/createItemMetadataFromCSV.py +++ /dev/null @@ -1,88 +0,0 @@ -# -*- coding: utf-8 -*- -import json -import csv - - -def createMetadataElementCSV(key, valueSource, language): - """Create metadata element.""" - value = row[valueSource] - if value != '': - if language != '': - metadataElement = {'key': key, 'language': language, - 'value': value} - metadata.append(metadataElement) - else: - metadataElement = {'key': key, 'value': value} - metadata.append(metadataElement) - else: - pass - - -def createMetadataElementCSVSplitField(key, valueSource, language): - """Create multiple metadata elements from one field.""" - if row[valueSource] != '': - if '|' in row[valueSource]: - values = row[valueSource].split('|') - for value in values: - if language != '': - metadataElement = {'key': key, 'language': language, - 'value': value} - metadata.append(metadataElement) - else: - metadataElement = {'key': key, 'value': value} - metadata.append(metadataElement) - else: - value = row[valueSource] - if language != '': - metadataElement = {'key': key, 'language': language, - 'value': value} - metadata.append(metadataElement) - else: - metadataElement = {'key': key, 'value': value} - metadata.append(metadataElement) - else: - pass - - -def createMetadataElementDirect(key, value, language): - """Create metadata element with specified value.""" - if language != '': - metadataElement = {'key': key, 'language': language, 'value': value} - metadata.append(metadataElement) - else: - metadataElement = {'key': key, 'value': value} - metadata.append(metadataElement) - - -fileName = input('Enter fileName (including \'.csv\'): ') - -with open(fileName) as csvfile: - reader = csv.DictReader(csvfile) - counter = 0 - metadataGroup = [] - for row in reader: - metadata = [] - createMetadataElementCSV('fileIdentifier', '????', '') - createMetadataElementCSV('dc.contributor.author', '????', '') - createMetadataElementCSV('dc.contributor.other', '????', '') - createMetadataElementCSV('dc.date.issued', '????', '') - createMetadataElementCSV('dc.description.abstract', '????', 'en_US') - createMetadataElementCSV('dc.format.extent', '????', '') - createMetadataElementDirect('dc.format.mimetype', '????', 'en_US') - createMetadataElementDirect('dc.identifier.other', '????', '') - createMetadataElementDirect('dc.language.iso', '????', 'en_US') - createMetadataElementDirect('dc.publisher', '????', 'en_US') - createMetadataElementDirect('dc.relation', 'Access other ?????.', '') - createMetadataElementCSV('dc.relation.ispartof', '????', 'en_US') - createMetadataElementDirect('dc.rights', '????', 'en_US') - createMetadataElementDirect('dc.subject', '????', 'en_US') - createMetadataElementCSV('dc.title', '????', 'en_US') - createMetadataElementDirect('dc.type', '????', 'en_US') - - item = {'metadata': metadata} - metadataGroup.append(item) - counter = counter + 1 - print(counter) - -f = open('metadata.json', 'w') -json.dump(metadataGroup, f) diff --git a/data/.keep b/data/.keep deleted file mode 100644 index e69de29..0000000 diff --git a/deleteBitstreamsFromItem.py b/deleteBitstreamsFromItem.py deleted file mode 100644 index 7649ea1..0000000 --- a/deleteBitstreamsFromItem.py +++ /dev/null @@ -1,73 +0,0 @@ -import requests -import time -import csv -from datetime import datetime -import urllib3 -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -itemHandle = input('Enter item handle: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -bitstreamList = [] -endpoint = baseURL + '/rest/handle/' + itemHandle -item = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -itemID = item['uuid'] -print('itemID = %s' % itemID) -bitstreams = '' -url = baseURL + '/rest/items/' + str(itemID) + '/bitstreams?expand=bitstreams' -bitstreams = requests.get(url, headers=header, cookies=cookies, verify=verify) -while bitstreams.status_code != 200: - time.sleep(5) - bitstreams = requests.get(url, headers=header, cookies=cookies, - verify=verify) -bitstreams = bitstreams.json() -print('found %d bitstreams' % len(bitstreams)) -for k in range(0, len(bitstreams)): - bitstreamID = bitstreams[k]['uuid'] - bitstreamList.append(bitstreamID) - -dsFunc.elapsedTime(startTime, 'Bitstream list creation time') -print(bitstreamList) - -f = csv.writer(open(filePath + 'deletedBitstreams' - + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w')) -f.writerow(['bitstreamID'] + ['delete']) -for number, bitstreamID in enumerate(bitstreamList): - bitstreamsRemaining = len(bitstreamList) - number - print('Bitstreams remaining: ', bitstreamsRemaining, 'bitstreamID: ', - bitstreamID) - delete = requests.delete(baseURL + '/rest/bitstreams/' + str(bitstreamID), - headers=header, cookies=cookies, verify=verify) - print(delete) - f.writerow([bitstreamID] + [delete]) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/deleteKeyFromCollection.py b/deleteKeyFromCollection.py deleted file mode 100644 index 685bbc6..0000000 --- a/deleteKeyFromCollection.py +++ /dev/null @@ -1,120 +0,0 @@ -import json -import requests -import time -import csv -from datetime import datetime -import urllib3 -import argparse -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-k', '--deletedKey', help='the key to be deleted. ' - 'optional - if not provided, the script will ask for ' - 'input') -parser.add_argument('-i', '--handle', help='handle of the collection to ' - 'retreive. optional - if not provided, the script will ' - 'ask for input') -args = parser.parse_args() - -if args.deletedKey: - deletedKey = args.deletedKey -else: - deletedKey = input('Enter the key to be deleted: ') - -if args.handle: - handle = args.handle -else: - handle = input('Enter collection handle: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -endpoint = baseURL + '/rest/handle/' + handle -collection = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -collectionID = collection['uuid'] -collSels = '&collSel[]=' + collectionID - -f = csv.writer(open(filePath + 'deletedValues' - + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w')) -f.writerow(['handle'] + ['deletedValue'] + ['delete'] + ['post']) -offset = 0 -recordsEdited = 0 -items = '' -itemLinks = [] -while items != []: - endpoint = baseURL + '/rest/filtered-items?query_field[]=' + deletedKey - endpoint += '&query_op[]=exists&query_val[]=' + collSels - endpoint += '&limit=200&offset=' + str(offset) - print(endpoint) - response = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() - items = response['items'] - for item in items: - itemMetadataProcessed = [] - itemLink = item['link'] - itemLinks.append(itemLink) - offset = offset + 200 - print(offset) -for itemLink in itemLinks: - itemMetadataProcessed = [] - print(itemLink) - metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, - cookies=cookies, verify=verify).json() - for l in range(0, len(metadata)): - metadata[l].pop('schema', None) - metadata[l].pop('element', None) - metadata[l].pop('qualifier', None) - languageValue = metadata[l]['language'] - if metadata[l]['key'] == deletedKey: - provNote = '\'' + deletedKey - provNote += '\' was deleted through a batch process on ' - provNote += datetime.now().strftime('%Y-%m-%d %H:%M:%S') - provNote += '.' - provNoteElement = {} - provNoteElement['key'] = 'dc.description.provenance' - provNoteElement['value'] = provNote - provNoteElement['language'] = 'en_US' - itemMetadataProcessed.append(provNoteElement) - else: - itemMetadataProcessed.append(metadata[l]) - recordsEdited = recordsEdited + 1 - itemMetadataProcessed = json.dumps(itemMetadataProcessed) - print('updated', itemLink, recordsEdited) - delete = requests.delete(baseURL + itemLink + '/metadata', headers=header, - cookies=cookies, verify=verify) - print(delete) - post = requests.put(baseURL + itemLink + '/metadata', headers=header, - cookies=cookies, verify=verify, - data=itemMetadataProcessed) - print(post) - f.writerow([itemLink] + [deletedKey] + [delete] + [post]) - - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/deleteKeyFromCommunity.py b/deleteKeyFromCommunity.py deleted file mode 100644 index a57c777..0000000 --- a/deleteKeyFromCommunity.py +++ /dev/null @@ -1,126 +0,0 @@ -import json -import requests -import time -import csv -from datetime import datetime -import urllib3 -import argparse -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-k', '--deletedKey', help='the key to be deleted. ' - 'optional - if not provided, the script will ask for ' - 'input') -parser.add_argument('-i', '--handle', help='handle of the community to ' - 'retreive. optional - if not provided, the script will ' - 'ask for input') -args = parser.parse_args() - -if args.deletedKey: - deletedKey = args.deletedKey -else: - deletedKey = input('Enter the key to be deleted: ') - -if args.handle: - handle = args.handle -else: - handle = input('Enter collection handle: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -endpoint = baseURL + '/rest/handle/' + handle -community = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -communityID = community['uuid'] -collections = requests.get(baseURL + '/rest/communities/' + str(communityID) - + '/collections', headers=header, cookies=cookies, - verify=verify).json() -collSels = '' -for j in range(0, len(collections)): - collectionID = collections[j]['uuid'] - collSel = '&collSel[]=' + collectionID - collSels = collSels + collSel - -f = csv.writer(open(filePath + 'deletedValues' - + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w')) -f.writerow(['handle'] + ['deletedValue'] + ['delete'] + ['post']) -offset = 0 -recordsEdited = 0 -items = '' -itemLinks = [] -while items != []: - endpoint = baseURL + '/rest/filtered-items?query_field[]=' + deletedKey - endpoint += '&query_op[]=exists&query_val[]=' + collSels - endpoint += '&limit=200&offset=' + str(offset) - print(endpoint) - response = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() - items = response['items'] - for item in items: - itemMetadataProcessed = [] - itemLink = item['link'] - itemLinks.append(itemLink) - offset = offset + 200 - print(offset) -for itemLink in itemLinks: - itemMetadataProcessed = [] - print(itemLink) - metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, - cookies=cookies, verify=verify).json() - for l in range(0, len(metadata)): - metadata[l].pop('schema', None) - metadata[l].pop('element', None) - metadata[l].pop('qualifier', None) - languageValue = metadata[l]['language'] - if metadata[l]['key'] == deletedKey: - provNote = '\'' + deletedKey - provNote += '\' was deleted through a batch process on ' - provNote += datetime.now().strftime('%Y-%m-%d %H:%M:%S') - provNote += '.' - provNoteElement = {} - provNoteElement['key'] = 'dc.description.provenance' - provNoteElement['value'] = provNote - provNoteElement['language'] = 'en_US' - itemMetadataProcessed.append(provNoteElement) - else: - itemMetadataProcessed.append(metadata[l]) - recordsEdited = recordsEdited + 1 - itemMetadataProcessed = json.dumps(itemMetadataProcessed) - print('updated', itemLink, recordsEdited) - delete = requests.delete(baseURL + itemLink + '/metadata', headers=header, - cookies=cookies, verify=verify) - print(delete) - post = requests.put(baseURL + itemLink + '/metadata', headers=header, - cookies=cookies, verify=verify, - data=itemMetadataProcessed) - print(post) - f.writerow([itemLink] + [deletedKey] + [delete] + [post]) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/deleteKeyValuePairFromCollection.py b/deleteKeyValuePairFromCollection.py deleted file mode 100644 index a27a851..0000000 --- a/deleteKeyValuePairFromCollection.py +++ /dev/null @@ -1,130 +0,0 @@ -import json -import requests -import time -import csv -from datetime import datetime -import urllib3 -import argparse -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-k', '--deletedKey', help='the key to be deleted. ' - 'optional - if not provided, the script will ask for ' - 'input') -parser.add_argument('-v', '--deletedValue', help='the value to be deleted. ' - 'optional - if not provided, the script will ask for ' - 'input') -parser.add_argument('-i', '--handle', help='handle of the community to ' - 'retreive. optional - if not provided, the script will ' - 'ask for input') -args = parser.parse_args() - -if args.deletedKey: - deletedKey = args.deletedKey -else: - deletedKey = input('Enter the key to be deleted: ') -if args.deletedValue: - deletedValue = args.deletedValue -else: - deletedValue = input('Enter the value to be deleted: ') -if args.handle: - handle = args.handle -else: - handle = input('Enter collection handle: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -endpoint = baseURL + '/rest/handle/' + handle -collection = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -collectionID = collection['uuid'] -collSels = '&collSel[]=' + collectionID - -f = csv.writer(open(filePath + 'deletedKey' - + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w')) -f.writerow(['itemID'] + ['deletedKey'] + ['deletedValue'] + ['delete'] - + ['post']) -recordsEdited = 0 -offset = 0 -items = '' -itemLinks = [] -while items != []: - endpoint = baseURL + '/rest/filtered-items?query_field[]=' + deletedKey - endpoint += '&query_op[]=exists&query_val[]=' + collSels - endpoint += '&limit=200&offset=' + str(offset) - print(endpoint) - response = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() - items = response['items'] - for item in items: - itemMetadataProcessed = [] - itemLink = item['link'] - itemLinks.append(itemLink) - offset = offset + 200 - print(offset) -for itemLink in itemLinks: - itemMetadataProcessed = [] - print(itemLink) - metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, - cookies=cookies, verify=verify).json() - for l in range(0, len(metadata)): - metadata[l].pop('schema', None) - metadata[l].pop('element', None) - metadata[l].pop('qualifier', None) - key = metadata[l]['key'] - value = metadata[l]['value'] - if key == deletedKey and value == deletedValue: - provNote = '\'' + deletedKey + ':' + deletedValue - provNote += '\' was deleted through a batch process on ' - provNote += datetime.now().strftime('%Y-%m-%d %H:%M:%S') - provNote += '.' - provNoteElement = {} - provNoteElement['key'] = 'dc.description.provenance' - provNoteElement['value'] = provNote - provNoteElement['language'] = 'en_US' - itemMetadataProcessed.append(provNoteElement) - else: - itemMetadataProcessed.append(metadata[l]) - if itemMetadataProcessed != metadata: - recordsEdited = recordsEdited + 1 - itemMetadataProcessed = json.dumps(itemMetadataProcessed) - print('updated', itemLink, recordsEdited) - delete = requests.delete(baseURL + itemLink + '/metadata', - headers=header, cookies=cookies, - verify=verify) - print(delete) - post = requests.put(baseURL + itemLink + '/metadata', headers=header, - cookies=cookies, verify=verify, - data=itemMetadataProcessed) - print(post) - f.writerow([itemLink] + [deletedKey] + [deletedValue] + [delete] - + [post]) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/dsFunc.py b/dsFunc.py deleted file mode 100644 index f3f7cac..0000000 --- a/dsFunc.py +++ /dev/null @@ -1,48 +0,0 @@ -import datetime -import time -import requests - - -def auth(email, password, baseURL, verify): - """Authenticate the user to the DSpace API.""" - data = {'email': email, 'password': password} - header = {'content-type': 'application/json', - 'accept': 'application/json'} - session = requests.post(baseURL + '/rest/login', headers=header, - verify=verify, - params=data).cookies['JSESSIONID'] - cookies = {'JSESSIONID': session} - return(cookies, header) - - -def authConfirm(cookies, baseURL, header, verify): - """Confirm user was successfully authenticated to the DSpace API.""" - status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() - uName = status['fullname'] - authEmail = status['email'] - print('authenticated', uName, authEmail) - return(uName, authEmail) - - -def elapsedTime(startTime, label): - """Generate elapsed time.""" - td = datetime.timedelta(seconds=time.time() - startTime) - print(label + ': {}'.format(td)) - return td - - -def instSelect(instance): - """Select secrets.py file for the appropriate DSpace instance.""" - if instance != '': - try: - secrets = __import__(instance) - print('Editing ' + secrets.baseURL) - except ImportError: - secrets = __import__('secrets') - print('Editing ' + secrets.baseURL) - else: - secrets = __import__('secrets') - print('Editing ' + secrets.baseURL) - - return secrets diff --git a/editBitstreamsNames.py b/editBitstreamsNames.py deleted file mode 100644 index ed20863..0000000 --- a/editBitstreamsNames.py +++ /dev/null @@ -1,104 +0,0 @@ -import json -import requests -import time -import urllib3 -import csv -from datetime import datetime -import argparse -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-f', '--fileName', help='the name of the CSV with the ' - 'bitstream name changes. optional - if not provided, the ' - 'script will ask for input') -args = parser.parse_args() -if args.uri: - fileName = args.fileName -else: - fileName = input('Enter file name: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, - verify=verify, params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -f = csv.writer(open(filePath + 'editBitstreamName' - + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w')) -f.writerow(['itemID'] + ['oldBitstreamName'] + ['newBitstreamName'] + ['post']) -with open(fileName) as csvfile: - reader = csv.DictReader(csvfile) - for row in reader: - oldValue = row['oldFileId'] - newValue = row['newFileId'] - handle = row['handle'] - endpoint = baseURL + '/rest/handle/' + handle - item = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() - itemID = str(item['uuid']) - bitstreams = requests.get(baseURL + '/rest/items/' + itemID - + '/bitstreams', headers=header, - cookies=cookies, verify=verify).json() - for bitstream in bitstreams: - oldBitstreamName = bitstream['name'] - bitstreamID = bitstream['link'] - updatedBitstream = json.dumps(bitstream) - print(json.dumps(bitstream)) - updatedBitstream = updatedBitstream.replace(oldValue, newValue) - post = requests.put(baseURL + bitstreamID, headers=header, - cookies=cookies, verify=verify, - data=updatedBitstream) - print(post) - f.writerow([itemID] + [oldValue] + [newValue] + [post]) - updatedItemMetadataList = [] - metadata = requests.get(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, - cookies=cookies, verify=verify).json() - for l in range(0, len(metadata)): - metadata[l].pop('schema', None) - metadata[l].pop('element', None) - metadata[l].pop('qualifier', None) - updatedItemMetadataList.append(metadata[l]) - provNote = 'Bitstream name changed from ' + oldValue + ' to ' - provNote += newValue + ' through a batch process on ' - provNote += datetime.now().strftime('%Y-%m-%d %H:%M:%S') - provNote += '.' - provNoteElement = {} - provNoteElement['key'] = 'dc.description.provenance' - provNoteElement['value'] = provNote - provNoteElement['language'] = 'en_US' - updatedItemMetadataList.append(provNoteElement) - updatedItemMetadata = json.dumps(updatedItemMetadataList) - delete = requests.delete(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, - cookies=cookies, verify=verify) - print(delete) - post = requests.put(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, cookies=cookies, - verify=verify, data=updatedItemMetadata) - print(post) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/exportCollectionMetadataToCSV.py b/exportCollectionMetadataToCSV.py deleted file mode 100644 index 8709c3a..0000000 --- a/exportCollectionMetadataToCSV.py +++ /dev/null @@ -1,108 +0,0 @@ -import requests -import time -import csv -import urllib3 -import argparse -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-i', '--handle', help='handle of the collection to ' - 'retreive. optional - if not provided, the script will ' - 'ask for input') -args = parser.parse_args() - -if args.handle: - handle = args.handle -else: - handle = input('Enter collection handle: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -# authentication -startTime = time.time() -cookies, header = dsFunc.auth(email, password, baseURL, verify) - -uName, authEmail = dsFunc.authConfirm(cookies, baseURL, header, verify) - -endpoint = baseURL + '/rest/handle/' + handle -collection = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -collectionID = collection['uuid'] - -itemList = {} -offset = 0 -items = '' -while items != []: - items = requests.get(baseURL + '/rest/collections/' + str(collectionID) - + '/items?limit=200&offset=' + str(offset), - headers=header, cookies=cookies, verify=verify) - while items.status_code != 200: - time.sleep(5) - items = requests.get(baseURL + '/rest/collections/' + str(collectionID) - + '/items?limit=200&offset=' + str(offset), - headers=header, cookies=cookies, verify=verify) - items = items.json() - for k in range(0, len(items)): - itemID = items[k]['uuid'] - itemHandle = items[k]['handle'] - itemList[itemID] = itemHandle - offset = offset + 200 - print(offset) - -keyList = [] -for itemID in itemList: - print(baseURL + '/rest/items/' + str(itemID) + '/metadata') - metadata = requests.get(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, cookies=cookies, - verify=verify).json() - for metadataElement in metadata: - key = metadataElement['key'] - if key not in keyList and key != 'dc.description.provenance': - keyList.append(key) - print(itemID, key) - -keyListHeader = ['itemID'] -keyListHeader = keyListHeader + keyList -print(keyListHeader) -f = csv.writer(open(filePath + handle.replace('/', '-') + 'Metadata.csv', 'w')) -f.writerow(keyListHeader) - -itemRows = [] -for itemID in itemList: - itemRow = dict.fromkeys(keyListHeader, '') - itemRow['itemID'] = itemID - print(itemID) - metadata = requests.get(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, cookies=cookies, - verify=verify).json() - for metadataElement in metadata: - for key in keyListHeader: - if metadataElement['key'] == key: - try: - value = metadataElement['value'] + '|' - except ValueError: - value = '' + '|' - try: - itemRow[key] = itemRow[key] + value - except ValueError: - itemRow[key] = value - itemList = [] - for key in keyListHeader: - itemList.append(itemRow[key][:len(itemRow[key]) - 1]) - f.writerow(itemList) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/exportSelectedRecordMetadataToCSV.py b/exportSelectedRecordMetadataToCSV.py deleted file mode 100644 index bedf21c..0000000 --- a/exportSelectedRecordMetadataToCSV.py +++ /dev/null @@ -1,97 +0,0 @@ -import requests -import time -import csv -import urllib3 -import argparse -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-f', '--fileName', help='the CSV file of record handles. ' - 'optional - if not provided, the script will ask for ' - 'input') -args = parser.parse_args() - -if args.fileName: - fileName = filePath + args.fileName -else: - fileName = filePath + input('Enter the CSV file of record handles ' - '(including \'.csv\'): ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -# authentication -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - - -handles = [] -with open(fileName) as csvfile: - reader = csv.DictReader(csvfile) - for row in reader: - handles.append(row['handle']) - -itemList = [] -for handle in handles: - endpoint = baseURL + '/rest/handle/' + handle - item = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() - itemID = item['uuid'] - itemList.append(itemID) - -keyList = [] -for itemID in itemList: - metadata = requests.get(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, cookies=cookies, - verify=verify).json() - for metadataElement in metadata: - key = metadataElement['key'] - if key not in keyList and key != 'dc.description.provenance': - keyList.append(key) - print(itemID, key) - -keyListHeader = ['itemID'] -keyListHeader = keyListHeader + keyList -print(keyListHeader) -f = csv.writer(open(filePath + 'selectedRecordMetadata.csv', 'w')) -f.writerow(keyListHeader) - -itemRows = [] -for itemID in itemList: - itemRow = dict.fromkeys(keyListHeader, '') - itemRow['itemID'] = itemID - print(itemRow) - metadata = requests.get(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, cookies=cookies, - verify=verify).json() - for metadataElement in metadata: - for key in keyListHeader: - if metadataElement['key'] == key: - value = metadataElement['value'] + '|' - try: - itemRow[key] = itemRow[key] + value - except ValueError: - itemRow[key] = value - print(itemRow) - for key in keyListHeader: - itemList.append(itemRow[key][:len(itemRow[key]) - 1]) - f.writerow(itemList) diff --git a/fileListMetadataReconcile.py b/fileListMetadataReconcile.py deleted file mode 100644 index 7a648f6..0000000 --- a/fileListMetadataReconcile.py +++ /dev/null @@ -1,112 +0,0 @@ -# -*- coding: utf-8 -*- -import csv -import time -import os -import argparse -import dsFunc - -parser = argparse.ArgumentParser() -parser.add_argument('-d', '--directory', help='the directory of the files. ' - 'optional - if not provided, the script will ask for ' - 'input') -parser.add_argument('-f', '--fileNameCSV', help='the metadata CSV file. ' - 'optional - if not provided, the script will ask for ' - 'input') -parser.add_argument('-e', '--fileExtension', help='the file extension. ' - 'optional - if not provided, the script will ask for ' - 'input') -args = parser.parse_args() - -if args.directory: - directory = args.directory -else: - directory = input('Enter directory (C:/Test/): ') -if args.fileNameCSV: - fileNameCSV = args.fileNameCSV -else: - fileNameCSV = input('Enter metadata CSV file: ') -if args.fileExtension: - fileExtension = args.fileExtension -else: - fileExtension = input('Enter file extension: ') - -startTime = time.time() -fileIdentifierList = [] -for root, dirs, files in os.walk(directory, topdown=True): - for file in files: - if file.endswith(fileExtension): - file.replace('.' + fileExtension, '') - fileIdentifierList.append(file) - -dsFunc.elapsedTime(startTime, 'File list creation time') - -f = csv.writer(open('collectionfileList.csv', 'w')) -f.writerow(['fileName']) - -for file in fileIdentifierList: - f.writerow([file]) - -metadataIdentifierList = [] -f = csv.writer(open('metadataFileList.csv', 'w')) -f.writerow(['metadataItemID']) -with open(fileNameCSV) as csvfile: - reader = csv.DictReader(csvfile) - for row in reader: - value = row['fileIdentifier'] - f.writerow([value]) - metadataIdentifierList.append(value) - -fileMatches = [] -for fileID in fileIdentifierList: - for metadataID in metadataIdentifierList: - if fileID.startswith(metadataID): - fileMatches.append(fileID) - -f = csv.writer(open('filesNotInMetadata.csv', 'w')) -f.writerow(['fileItemID']) -filesNotInMetadata = set(fileIdentifierList) - set(fileMatches) -for file in filesNotInMetadata: - f.writerow([file]) - -metadataMatches = [] -for metadataID in metadataIdentifierList: - for fileID in fileIdentifierList: - if fileID.startswith(metadataID): - metadataMatches.append(metadataID) - -metadataWithNoFiles = set(metadataIdentifierList) - set(metadataMatches) - -with open(fileNameCSV) as csvfile: - f = csv.writer(open('metadataWithNoFiles.csv', 'w')) - reader = csv.DictReader(csvfile) - header = next(reader) - headerRow = [] - for k, v in header.iteritems(): - headerRow.append(k) - f.writerow(headerRow) - for row in reader: - csvRow = [] - for metadata in metadataWithNoFiles: - if metadata == row['fileIdentifier']: - for value in headerRow: - csvRow.append(row[value]) - f.writerow(csvRow) - -with open(fileNameCSV) as csvfile: - f = csv.writer(open('metadataWithFiles.csv', 'w')) - reader = csv.DictReader(csvfile) - header = next(reader) - headerRow = [] - for k, v in header.iteritems(): - headerRow.append(k) - f.writerow(headerRow) - for row in reader: - csvRow = [] - for metadata in metadataMatches: - if metadata == row['fileIdentifier']: - for value in headerRow: - csvRow.append(row[value]) - f.writerow(csvRow) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/findBogusUris.py b/findBogusUris.py deleted file mode 100644 index a8c5eb3..0000000 --- a/findBogusUris.py +++ /dev/null @@ -1,74 +0,0 @@ -import requests -import csv -import time -import urllib3 -import dsFunc -import argparse - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-p', '--handlePrefix', help='Enter the handle prefix') -args = parser.parse_args() - -if args.handlePrefix: - handlePrefix = args.handlePrefix -else: - handlePrefix = input('Enter the handle prefix: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, - verify=verify, params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -f = csv.writer(open(filePath + 'bogusUris.csv', 'w')) -f.writerow(['itemID'] + ['uri']) -offset = 0 -recordsEdited = 0 -items = '' -while items != []: - endpoint = baseURL + '/rest/filtered-items?query_field[]=' - endpoint += 'dc.identifier.uri&query_op[]=doesnt_contain' - endpoint += '&query_val[]=' + handlePrefix - endpoint += '&limit=200&offset=' + str(offset) - print(endpoint) - response = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() - items = response['items'] - for item in items: - itemMetadataProcessed = [] - itemLink = item['link'] - metadata = requests.get(baseURL + itemLink + '/metadata', - headers=header, cookies=cookies, - verify=verify).json() - for l in range(0, len(metadata)): - if metadata[l]['key'] == 'dc.identifier.uri': - uri = str(metadata[l]['value']) - if uri.startswith(handlePrefix) is False: - f.writerow([itemLink] + [uri]) - offset = offset + 200 - print(offset) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/findDuplicateKeys.py b/findDuplicateKeys.py deleted file mode 100644 index 4350889..0000000 --- a/findDuplicateKeys.py +++ /dev/null @@ -1,75 +0,0 @@ -import json -import requests -import time -import csv -import urllib3 -import argparse -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-k', '--key', help='the key to be searched. optional - ' - 'if not provided, the script will ask for input') -args = parser.parse_args() - -if args.key: - key = args.key -else: - key = input('Enter the key: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -searchString = "\"" + key + "\"" - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, - verify=verify, params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -f = csv.writer(open(filePath + 'recordsWithDuplicate-' + key + '.csv', 'w')) -f.writerow(['itemID']) -offset = 0 -recordsEdited = 0 -items = '' -while items != []: - endpoint = baseURL + '/rest/filtered-items?query_field[]=' + key - endpoint += '&query_op[]=exists&query_val[]=&limit=200&offset=' - endpoint += str(offset) - print(endpoint) - response = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() - items = response['items'] - for item in items: - itemMetadataProcessed = [] - itemLink = item['link'] - metadata = requests.get(baseURL + itemLink + '/metadata', - headers=header, cookies=cookies, - verify=verify).json() - metadata = json.dumps(metadata) - if metadata.find(searchString) != metadata.rfind(searchString): - f.writerow([itemLink]) - offset = offset + 200 - print(offset) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/generateCollectionLevelAbstract.py b/generateCollectionLevelAbstract.py deleted file mode 100644 index 37262a0..0000000 --- a/generateCollectionLevelAbstract.py +++ /dev/null @@ -1,93 +0,0 @@ -import json -import requests -import csv -import argparse -import urllib3 -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-f', '--fileNameCSV', help='the metadata CSV file. ' - 'optional - if not provided, the script will ask for ' - 'input') -parser.add_argument('-i', '--handle', help='handle of the collection. ' - 'optional - if not provided, the script will ask for ' - 'input') -args = parser.parse_args() - -if args.fileNameCSV: - fileNameCSV = args.fileNameCSV -else: - fileNameCSV = input('Enter the metadata CSV file (including \'.csv\'): ') -if args.handle: - handle = args.handle -else: - handle = input('Enter collection handle: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -endpoint = baseURL + '/rest/handle/' + handle -collection = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -collectionID = collection['uuid'] -print(collection) - -# Enter abstract text here -abstractText = '' - -seriesTitles = [] - -with open(fileNameCSV) as csvfile: - reader = csv.DictReader(csvfile) - for row in reader: - seriesTitle = row['Series title'] - if seriesTitle not in seriesTitles: - seriesTitles.append(seriesTitle) - -seriesLinks = '' - -for seriesTitle in seriesTitles: - handleEdited = handle.replace('/', '%2F') - editedSeriesTitle = seriesTitle.replace(' ', ' + ') - seriesLink = '
  • ' - seriesLinks += seriesTitle + '
  • ' - seriesLinks += seriesLink - -abstractText = '

    ' + abstractText + '

    ' -seriesLinks = '' -introductoryText = abstractText + seriesLinks - -collection['introductoryText'] = introductoryText -collection = json.dumps(collection) -print(collection) -post = requests.put(baseURL + '/rest/collections/' + collectionID, - headers=header, cookies=cookies, verify=verify, - data=collection) -print(post) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) diff --git a/getBitstreams.py b/getBitstreams.py deleted file mode 100644 index cbdf467..0000000 --- a/getBitstreams.py +++ /dev/null @@ -1,273 +0,0 @@ -import requests -import time -import csv -import urllib3 -import argparse -import os -import re -from six.moves import input -import dsFunc - - -def main(): - """Define function.""" - # NOTE: this is the secrets file, not a module - import secrets - - # define defaults - default_response_timeout = 1 - default_limit = 100 - - # define globals for requests, so we needn't pass too many arguments to our - # functions - global header - global cookies - - # begin: argument parsing - parser = argparse.ArgumentParser() - - parser.add_argument('-v', '--verbose', action='store_true', - help='increase output verbosity') - - parser.add_argument('-i', '--handle', - help='handle of the object to retreive. optional - if ' - 'not provided, the script will ask for input') - - # bitstream formats: - # REM: set number of args - # ' + ' == 1 or more. - # '*' == 0 or more. - # '?' == 0 or 1. - # An int is an explicit number of arguments to accept. - parser.add_argument('-f', '--formats', nargs='*', - help='optional list of bitstream formats. will return '' - 'all formats if not provided') - - parser.add_argument('-b', '--bundles', nargs='*', - help='optional list of bundles (e.g. ORIGINAL or ' - 'LICENSE). will return all bundles if not provided') - - parser.add_argument('-dl', '--download', action='store_true', - help='download bitstreams (rather than just retreive ' - 'metadata about them). default: false') - - parser.add_argument('-rt', '--rtimeout', type=int, - help='response timeout - number of seconds to wait ' - 'for a response. not a timeout for a download or run ' - 'of the entire script. default: ' - + str(default_response_timeout)) - - parser.add_argument('-l', '--limit', type=int, - help='limit to the number of objects to return in a ' - 'given request. default: ' + str(default_limit)) - - parser.add_argument('-u', '--baseURL', - help='url of the dspace instance. can be read from ' - 'the secrets file') - - parser.add_argument('-e', '--email', - help='email of an authorized dspace user. can be ' - 'read from the secrets file') - - parser.add_argument('-p', '--password', - help='password of an authorized dspace user. can be ' - 'read from the secrets file') - - parser.add_argument('-d', '--filePath', - help='directory into which output files will be ' - 'written. can be read from the secrets file') - - parser.add_argument('-s', '--verify', - help='ssl verification enabled (boolean) OR the path ' - 'to a CA_BUNDLE file or directory with certificates ' - 'of trusted CAs. use false if using an ssh tunnel to ' - 'connect to the dspace api. can be read from the ' - secrets file') - - args = parser.parse_args() - - inst = input('To edit production server, enter the name of the secrets ' - 'file: ') - - secrets = dsFunc.instSelect(inst) - - baseURL = secrets.baseURL - email = secrets.email - password = secrets.password - filePath = secrets.filePath - verify = secrets.verify - skipColl = secrets.skipColl - - if not args.rtimeout: - args.rtimeout = default_response_timeout - - if not args.limit: - args.limit = default_limit - - if not args.baseURL: - args.baseURL = secrets.baseURL - - if not args.email: - args.email = secrets.email - - if not args.password: - args.password = secrets.password - - if not args.filePath: - args.filePath = secrets.filePath - - if not args.verify: - args.verify = secrets.verify - - if args.handle: - handle = args.handle - else: - handle = input('Enter handle: ') - - if args.verbose: - print('verbosity turned on') - - if args.handle: - print('retreiving object with handle {}'.format(args.handle)) - - if args.formats: - print('filtering results to the following bitstream ' - 'formats: {}'.format(args.formats)) - else: - print('returning bitstreams of any format') - - if args.bundles: - print('filtering results to the following bundles: ' - '{}'.format(args.bundles)) - else: - print('returning bitstreams from any bundle') - - if args.download: - print('downloading bitstreams') - - if args.rtimeout: - print('response_timeout set to {}'.format(args.rtimeout)) - - # end: argument parsing - - urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - - startTime = time.time() - data = {'email': args.email, 'password': args.password} - header = {'content-type': 'application/json', 'accept': 'application/json'} - session = requests.post(args.baseURL + '/rest/login', headers=header, - verify=args.verify, params=data, - timeout=args.rtimeout).cookies['JSESSIONID'] - cookies = {'JSESSIONID': session} - print('authenticated') - - # NOTE: expanding items (of collections) and bitstreams (of items) to get - # the count - endpoint = args.baseURL + '/rest/handle/' + handle - endpoint += '?expand=items,bitstreams' - dsObject = requests.get(endpoint, headers=header, cookies=cookies, - verify=args.verify, timeout=args.rtimeout) - dsObject.raise_for_status() # ensure we notice bad responses - dsObject = dsObject.json() - if args.verbose: - print(dsObject) - dsObjectID = dsObject['uuid'] - # TODO: extend - if dsObject['type'] == 'collection': - if args.verbose: - print(dsObject['type']) - - itemCount = len(dsObject['items']) - print('{} items'.format(itemCount)) - for collItem in dsObject['items']: - endpoint = args.baseURL + collItem['link'] + '?expand=bitstreams' - item = requests.get(endpoint, headers=header, cookies=cookies, - verify=args.verify, timeout=args.rtimeout) - item.raise_for_status() # ensure we notice bad responses - item = item.json() - processItem(item, args) - - elif dsObject['type'] == 'item': - processItem(dsObject, args) - - else: - print('object is of an invalid type for this script ({}). please ' - 'enter the handle of an item or a ' - 'collection.'.format(dsObject['type'])) - - logout = requests.post(args.baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=args.verify, - timeout=args.rtimeout) - - elapsedTime = time.time() - startTime - m, s = divmod(elapsedTime, 60) - h, m = divmod(m, 60) - print('Total script run time: {:01.0f}:{:02.0f}:{:02.0f}'.format(h, m, s)) - - -def processItem(dsObject, args): - if args.verbose: - print(dsObject['type']) - - itemHandle = dsObject['handle'] - handleID = re.sub(r'.*\/', '', itemHandle) - itemPath = args.filePath + '/' + handleID + '/' - if not os.path.exists(itemPath): - os.makedirs(itemPath) - - f = csv.writer(open(itemPath + handleID + '_bitstreams.csv', 'w')) - f.writerow(['sequenceId'] + ['name'] + ['format'] + ['bundleName']) - - itemID = dsObject['uuid'] - bitstreamCount = len(dsObject['bitstreams']) - dlBitstreams = [] - offset = 0 - limit = args.limit - bitstreams = '' - # while bitstreams != []: - while bitstreamCount > 0: - # don't retreive more bitstreams than we have left - if limit > bitstreamCount: - limit = bitstreamCount - print('bitstreamCount: {0} offset: {1} ' - 'limit: {2}'.format(bitstreamCount, offset, limit)) - bitstreams = requests.get(args.baseURL + '/rest/items/' + str(itemID) - + '/bitstreams?limit=' + str(limit) - + '&offset=' + str(offset), headers=header, - cookies=cookies, verify=args.verify, - timeout=args.rtimeout) - bitstreams.raise_for_status() # ensure we notice bad responses - bitstreams = bitstreams.json() - for bitstream in bitstreams: - if ((args.formats and bitstream['format'] in args.formats - or not args.formats) - and (args.bundles and bitstream['bundleName'] in args.bundles - or not args.bundles)): - if args.verbose: - print(bitstream) - sequenceId = str(bitstream['sequenceId']) - fileName = bitstream['name'] - fileFormat = bitstream['format'] - bundleName = bitstream['bundleName'] - f.writerow([sequenceId] + [fileName] + [fileFormat] - + [bundleName]) - - if args.download: - dlBitstreams.append(bitstream) - offset += limit - bitstreamCount -= limit - - for dlBitstream in dlBitstreams: - if not os.path.isfile(itemPath + dlBitstream['name']): - response = requests.get(args.baseURL - + str(dlBitstream['retrieveLink']), - headers=header, cookies=cookies, - verify=args.verify, timeout=args.rtimeout) - response.raise_for_status() # ensure we notice bad responses - file = open(itemPath + dlBitstream['name'], 'wb') - file.write(response.content) - file.close() - - -if __name__ == "__main__": - main() diff --git a/getCollectionMetadataJson.py b/getCollectionMetadataJson.py deleted file mode 100644 index 24b1ace..0000000 --- a/getCollectionMetadataJson.py +++ /dev/null @@ -1,63 +0,0 @@ -import json -import requests -import time -import urllib3 -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -handle = input('Enter handle: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, - verify=verify, params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -endpoint = baseURL + '/rest/handle/' + handle -collection = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -collectionID = collection['uuid'] -collectionTitle = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -endpoint = baseURL + '/rest/collections/' + str(collectionID) + '/items' -output = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() - -itemList = [] -for i in range(0, len(output)): - name = output[i]['name'] - itemID = output[i]['uuid'] - itemList.append(itemID) - -f = open(filePath + handle.replace('/', '-') + '.json', 'w') -metadataGroup = [] -for itemID in itemList: - metadata = requests.get(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, cookies=cookies, - verify=verify).json() - metadataGroup.append(metadata) -json.dump(metadataGroup, f) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/getCompleteAndUniqueValuesForAllKeys.py b/getCompleteAndUniqueValuesForAllKeys.py deleted file mode 100644 index 9ea91ea..0000000 --- a/getCompleteAndUniqueValuesForAllKeys.py +++ /dev/null @@ -1,119 +0,0 @@ -import requests -import csv -import time -import os.path -from collections import Counter -from datetime import datetime -import urllib3 -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -date = datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '/' -filePathComplete = filePath + 'completeValueLists' + date -filePathUnique = filePath + 'uniqueValueLists' + date - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, - verify=verify, params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -collectionIds = [] -endpoint = baseURL + '/rest/communities' -communities = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -for i in range(0, len(communities)): - communityID = communities[i]['uuid'] - collections = requests.get(baseURL + '/rest/communities/' - + str(communityID) + '/collections', - headers=header, cookies=cookies, - verify=verify).json() - for j in range(0, len(collections)): - collectionID = collections[j]['uuid'] - if collectionID not in skipColl: - collectionIds.append(collectionID) - -os.mkdir(filePathComplete) -os.mkdir(filePathUnique) - -for number, collectionID in enumerate(collectionIds): - collectionsRemaining = len(collectionIds) - number - print(collectionID, 'Collections remaining: ', collectionsRemaining) - collSels = '&collSel[]=' + collectionID - offset = 0 - recordsEdited = 0 - items = '' - while items != []: - setTime = time.time() - endpoint = baseURL - + '/rest/filtered-items?query_field[]=*&query_op[]=exists&query_val[]=' - + collSels + '&expand=metadata&limit=20&offset=' + str(offset) - response = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() - items = response['items'] - for item in items: - metadata = item['metadata'] - for i in range(0, len(metadata)): - if metadata[i]['key'] != 'dc.description.provenance': - key = metadata[i]['key'] - try: - value = metadata[i]['value'] - except ValueError: - value = '' - for i in range(0, len(metadata)): - if metadata[i]['key'] == 'dc.identifier.uri': - uri = metadata[i]['value'] - if os.path.isfile(filePathComplete + key - + 'ValuesComplete.csv') is False: - f = csv.writer(open(filePathComplete + key - + 'ValuesComplete.csv', 'w')) - f.writerow(['handle'] + ['value']) - f.writerow([uri] + [value]) - else: - f = csv.writer(open(filePathComplete + key - + 'ValuesComplete.csv', 'a')) - f.writerow([uri] + [value]) - offset = offset + 20 - print(offset) - - dsFunc.elapsedTime(setTime, 'Set run time') - - dsFunc.elapsedTime(startTime, 'Collection run time') - -dsFunc.elapsedTime(startTime, 'Complete value list creation time') -# -for fileName in os.listdir(filePathComplete): - reader = csv.DictReader(open(filePathComplete + fileName)) - fileName = fileName.replace('Complete', 'Unique') - valueList = [] - for row in reader: - valueList.append(row['value']) - valueListCount = Counter(valueList) - f = csv.writer(open(filePathUnique + fileName, 'w')) - f.writerow(['value'] + ['count']) - for key, value in valueListCount.items(): - f.writerow([key] + [str(value).zfill(6)]) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/getCompleteAndUniqueValuesForAllKeysInCommunity.py b/getCompleteAndUniqueValuesForAllKeysInCommunity.py deleted file mode 100644 index 80d37cb..0000000 --- a/getCompleteAndUniqueValuesForAllKeysInCommunity.py +++ /dev/null @@ -1,120 +0,0 @@ -import requests -import csv -import time -import os.path -from collections import Counter -from datetime import datetime -import urllib3 -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -handle = input('Enter community handle: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -itemList = [] -endpoint = baseURL + '/rest/handle/' + handle -community = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -communityName = community['name'].replace(' ', '') -communityID = community['uuid'] - -date = datetime.now().strftime('%Y-%m-%d %H.%M.%S') -filePathComplete = filePath + 'completeValueLists' + communityName + date + '/' -filePathUnique = filePath + 'uniqueValueLists' + communityName + date + '/' - -collections = requests.get(baseURL + '/rest/communities/' + str(communityID) - + '/collections', headers=header, cookies=cookies, - verify=verify).json() -for j in range(0, len(collections)): - collectionID = collections[j]['uuid'] - if collectionID not in skipColl: - offset = 0 - items = '' - while items != []: - items = requests.get(baseURL + '/rest/collections/' - + str(collectionID) - + '/items?limit=100&offset=' + str(offset), - headers=header, cookies=cookies, - verify=verify) - while items.status_code != 200: - time.sleep(5) - items = requests.get(baseURL + '/rest/collections/' - + str(collectionID) - + '/items?limit=100&offset=' - + str(offset), headers=header, - cookies=cookies, verify=verify) - items = items.json() - for k in range(0, len(items)): - itemID = items[k]['uuid'] - itemList.append(itemID) - offset = offset + 100 - -dsFunc.elapsedTime(startTime, 'Item list creation time') - -os.mkdir(filePathComplete) -os.mkdir(filePathUnique) -for number, itemID in enumerate(itemList): - itemsRemaining = len(itemList) - number - print('Items remaining: ', itemsRemaining, 'ItemID: ', itemID) - metadata = requests.get(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, cookies=cookies, - verify=verify).json() - for l in range(0, len(metadata)): - if metadata[l]['key'] != 'dc.description.provenance': - key = metadata[l]['key'] - try: - value = metadata[l]['value'] - except ValueError: - value = '' - fileName = filePathComplete + key + 'ValuesComplete.csv' - if os.path.isfile(fileName) is False: - f = csv.writer(open(fileName, 'w')) - f.writerow(['itemID'] + ['value']) - f.writerow([itemID] + [value]) - else: - f = csv.writer(open(filePathComplete + key - + 'ValuesComplete.csv', 'a')) - f.writerow([itemID] + [value]) - -dsFunc.elapsedTime(startTime, 'Complete value list creation time') - -for fileName in os.listdir(filePathComplete): - reader = csv.DictReader(open(filePathComplete + fileName)) - fileName = fileName.replace('Complete', 'Unique') - valueList = [] - for row in reader: - valueList.append(row['value']) - valueListCount = Counter(valueList) - f = csv.writer(open(filePathUnique + fileName, 'w')) - f.writerow(['value'] + ['count']) - for key, value in valueListCount.items(): - f.writerow([key] + [str(value).zfill(6)]) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/getFacultyNamesFromETDs.py b/getFacultyNamesFromETDs.py deleted file mode 100644 index f1dd4de..0000000 --- a/getFacultyNamesFromETDs.py +++ /dev/null @@ -1,98 +0,0 @@ -import requests -import time -import csv -from datetime import datetime -import urllib3 -import argparse -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-i', '--handle', help='handle of the community to ' - 'retreive. optional - if not provided, the script will ' - 'ask for input') -args = parser.parse_args() - -if args.handle: - handle = args.handle -else: - handle = input('Enter community handle: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -endpoint = baseURL + '/rest/handle/' + handle -community = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -communityID = community['uuid'] -collections = requests.get(baseURL + '/rest/communities/' + str(communityID) - + '/collections', headers=header, cookies=cookies, - verify=verify).json() -collSels = '' -for j in range(0, len(collections)): - collectionID = collections[j]['uuid'] - collSel = '&collSel[]=' + collectionID - collSels = collSels + collSel - -date = datetime.now().strftime('%Y-%m-%d %H.%M.%S') - -f = csv.writer(open(filePath + 'EtdFacultyNames' + date + '.csv', 'w')) -f.writerow(['name']) - -nameFields = ['dc.contributor.advisor', 'dc.contributor.committeeMember'] - -facultyNames = [] - -offset = 0 -recordsEdited = 0 -items = '' -while items != []: - endpoint = baseURL + '/rest/filtered-items?&query_val[]=' + collSels - endpoint += '&limit=200&offset=' + str(offset) - print(endpoint) - response = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() - items = response['items'] - for item in items: - itemMetadataProcessed = [] - itemLink = item['link'] - metadata = requests.get(baseURL + itemLink + '/metadata', - headers=header, cookies=cookies, - verify=verify).json() - for metadataElement in metadata: - if metadataElement['key'] in nameFields: - facultyName = metadataElement['value'] - if facultyName not in facultyNames: - facultyNames.append(facultyName) - offset = offset + 200 - print(offset) - -for facultyName in facultyNames: - f.writerow([facultyName]) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/getGlobalLanguageValues.py b/getGlobalLanguageValues.py deleted file mode 100644 index 0abfc7c..0000000 --- a/getGlobalLanguageValues.py +++ /dev/null @@ -1,89 +0,0 @@ -import requests -import csv -import time -import urllib3 -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -itemList = [] -endpoint = baseURL + '/rest/communities' -communities = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -for i in range(0, len(communities)): - communityID = communities[i]['uuid'] - collections = requests.get(baseURL + '/rest/communities/' - + str(communityID) + '/collections', - headers=header, cookies=cookies, - verify=verify).json() - for j in range(0, len(collections)): - collectionID = collections[j]['uuid'] - if collectionID not in skipColl: - offset = 0 - items = '' - while items != []: - items = requests.get(baseURL + '/rest/collections/' - + str(collectionID) - + '/items?limit=100&offset=' - + str(offset), headers=header, - cookies=cookies, verify=verify) - while items.status_code != 200: - time.sleep(5) - items = requests.get(baseURL + '/rest/collections/' - + str(collectionID) - + '/items?limit=100&offset=' - + str(offset), headers=header, - cookies=cookies, verify=verify) - items = items.json() - for k in range(0, len(items)): - itemID = items[k]['uuid'] - itemList.append(itemID) - offset = offset + 100 - -dsFunc.elapsedTime(startTime, 'Item list creation time') - -valueList = [] -for number, itemID in enumerate(itemList): - itemsRemaining = len(itemList) - number - print('Items remaining: ', itemsRemaining, 'ItemID: ', itemID) - metadata = requests.get(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, cookies=cookies, - verify=verify).json() - for l in range(0, len(metadata)): - metadataValue = metadata[l]['language'] - if metadataValue not in valueList: - valueList.append(metadataValue) - -f = csv.writer(open(filePath + 'globalLanguageValues.csv', 'w')) -f.writerow(['language']) -for m in range(0, len(valueList)): - f.writerow([valueList[m]]) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/getHandlesAndBitstreamsFromCollection.py b/getHandlesAndBitstreamsFromCollection.py deleted file mode 100644 index e6d7b26..0000000 --- a/getHandlesAndBitstreamsFromCollection.py +++ /dev/null @@ -1,95 +0,0 @@ -import requests -import time -import csv -import urllib3 -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -handle = input('Enter handle: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -endpoint = baseURL + '/rest/handle/' + handle -collection = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -collectionID = collection['uuid'] -collectionTitle = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -itemList = {} -offset = 0 -items = '' -while items != []: - items = requests.get(baseURL + '/rest/collections/' + str(collectionID) - + '/items?limit=200&offset=' + str(offset), - headers=header, cookies=cookies, verify=verify) - while items.status_code != 200: - time.sleep(5) - items = requests.get(baseURL + '/rest/collections/' - + str(collectionID) + '/items?limit=200&offset=' - + str(offset), headers=header, cookies=cookies, - verify=verify) - items = items.json() - for k in range(0, len(items)): - itemID = items[k]['uuid'] - itemID = '/rest/items/' + itemID - itemHandle = items[k]['handle'] - itemList[itemID] = itemHandle - offset = offset + 200 - print(offset) - -handle = handle.replace('/', '-') -f = csv.writer(open(filePath + handle + 'handlesAndBitstreams.csv', 'w')) -f.writerow(['bitstream'] + ['handle'] + ['title'] + ['date'] + ['description']) - -for k, v in itemList.items(): - itemID = k - itemHandle = v - print(itemID) - metadata = requests.get(baseURL + itemID + '/metadata', headers=header, - cookies=cookies, verify=verify).json() - title = '' - date = '' - description = '' - for i in range(0, len(metadata)): - if metadata[i]['key'] == 'dc.title': - title = metadata[i]['value'] - if metadata[i]['key'] == 'dc.date.issued': - date = metadata[i]['value'] - if metadata[i]['key'] == 'dc.description.abstract': - description = metadata[i]['value'] - - bitstreams = requests.get(baseURL + itemID + '/bitstreams', headers=header, - cookies=cookies, verify=verify).json() - for bitstream in bitstreams: - fileName = bitstream['name'] - fileName.replace('.jpg', '') - f.writerow([fileName] + [itemHandle] + [title] + [date] - + [description]) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/getLanguageValuesForKeys.py b/getLanguageValuesForKeys.py deleted file mode 100644 index a3ba664..0000000 --- a/getLanguageValuesForKeys.py +++ /dev/null @@ -1,93 +0,0 @@ -import requests -import csv -import time -import urllib3 -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, - verify=verify, params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -itemList = [] -endpoint = baseURL + '/rest/communities' -communities = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -for i in range(0, len(communities)): - communityID = communities[i]['uuid'] - collections = requests.get(baseURL + '/rest/communities/' - + str(communityID) + '/collections', - headers=header, cookies=cookies, - verify=verify).json() - for j in range(0, len(collections)): - collectionID = collections[j]['uuid'] - if collectionID not in skipColl: - offset = 0 - items = '' - while items != []: - items = requests.get(baseURL + '/rest/collections/' - + str(collectionID) - + '/items?limit=200&offset=' - + str(offset), headers=header, - cookies=cookies, verify=verify) - while items.status_code != 200: - time.sleep(5) - items = requests.get(baseURL + '/rest/collections/' - + str(collectionID) - + '/items?limit=200&offset=' - + str(offset), headers=header, - cookies=cookies, verify=verify) - items = items.json() - for k in range(0, len(items)): - itemID = items[k]['uuid'] - itemList.append(itemID) - offset = offset + 200 - -dsFunc.elapsedTime(startTime, 'Item list creation time') - -valueList = [] -for number, itemID in enumerate(itemList): - itemsRemaining = len(itemList) - number - print('Items remaining: ', itemsRemaining, 'ItemID: ', itemID) - metadata = requests.get(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, cookies=cookies, - verify=verify).json() - for l in range(0, len(metadata)): - metadataKeyLanguagePair = {} - metadataKey = metadata[l]['key'] - metadataLanguage = metadata[l]['language'] - metadataKeyLanguagePair[metadataKey] = metadataLanguage - if metadataKeyLanguagePair not in valueList: - valueList.append(metadataKeyLanguagePair) - -f = csv.writer(open(filePath + 'keyLanguageValues.csv', 'w')) -f.writerow(['key'] + ['language']) -for m in range(0, len(valueList)): - for k, v in valueList[m].iteritems(): - f.writerow([k] + [v]) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/getRecordsAndValuesForKey.py b/getRecordsAndValuesForKey.py deleted file mode 100644 index af79ccd..0000000 --- a/getRecordsAndValuesForKey.py +++ /dev/null @@ -1,77 +0,0 @@ -import requests -import csv -import time -import urllib3 -import argparse -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-k', '--key', help='the key to be searched. optional - ' - 'if not provided, the script will ask for input') -args = parser.parse_args() - -if args.key: - key = args.key -else: - key = input('Enter the key: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -f = csv.writer(open(filePath + 'recordsWith' + key + '.csv', 'w')) -f.writerow(['itemID'] + ['uri'] + [key]) -offset = 0 -recordsEdited = 0 -items = '' -itemLinks = [] -while items != []: - endpoint = baseURL + '/rest/filtered-items?query_field[]=' + key - endpoint += '&query_op[]=exists&query_val[]=&limit=200&offset=' - endpoint += str(offset) - print(endpoint) - response = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() - items = response['items'] - for item in items: - itemMetadataProcessed = [] - itemLink = item['link'] - itemLinks.append(itemLink) - offset = offset + 200 - print(offset) -for itemLink in itemLinks: - metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, - cookies=cookies, verify=verify).json() - for l in range(0, len(metadata)): - if metadata[l]['key'] == key: - metadataValue = metadata[l]['value'] - for l in range(0, len(metadata)): - if metadata[l]['key'] == 'dc.identifier.uri': - uri = metadata[l]['value'] - f.writerow([itemLink] + [uri] + [metadataValue]) -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/getRecordsAndValuesForKeyInCollection.py b/getRecordsAndValuesForKeyInCollection.py deleted file mode 100644 index 3f2b005..0000000 --- a/getRecordsAndValuesForKeyInCollection.py +++ /dev/null @@ -1,92 +0,0 @@ -import requests -import csv -import time -import urllib3 -import argparse -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-k', '--key', help='the key to be searched. optional - ' - 'if not provided, the script will ask for input') -parser.add_argument('-i', '--handle', help='handle of the collection to ' - 'retreive. optional - if not provided, the script will ' - 'ask for input') -args = parser.parse_args() - -if args.key: - key = args.key -else: - key = input('Enter the key: ') -if args.handle: - handle = args.handle -else: - handle = input('Enter collection handle: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -endpoint = baseURL + '/rest/handle/' + handle -collection = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -collectionID = collection['uuid'] -collSels = '&collSel[]=' + collectionID - -f = csv.writer(open(filePath + 'recordsWith' + key + handle.replace('/', '-') - + '.csv', 'w')) -f.writerow(['itemID'] + ['uri'] + [key]) -offset = 0 -recordsEdited = 0 -items = '' -itemLinks = [] -while items != []: - endpoint = baseURL + '/rest/filtered-items?query_field[]=' + key - endpoint += '&query_op[]=exists&query_val[]=' + collSels - endpoint += '&limit=200&offset=' + str(offset) - print(endpoint) - response = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() - items = response['items'] - for item in items: - itemMetadataProcessed = [] - itemLink = item['link'] - itemLinks.append(itemLink) - offset = offset + 200 - print(offset) -for itemLink in itemLinks: - metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, - cookies=cookies, verify=verify).json() - for l in range(0, len(metadata)): - if metadata[l]['key'] == key: - metadataValue = metadata[l]['value'] - for l in range(0, len(metadata)): - if metadata[l]['key'] == 'dc.identifier.uri': - uri = metadata[l]['value'] - f.writerow([itemLink] + [uri] + [metadataValue]) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/getRecordsWithKeyAndValue.py b/getRecordsWithKeyAndValue.py deleted file mode 100644 index 335e404..0000000 --- a/getRecordsWithKeyAndValue.py +++ /dev/null @@ -1,84 +0,0 @@ -import requests -import csv -import time -import urllib3 -import argparse -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-k', '--key', help='the key to be searched. optional - ' - 'if not provided, the script will ask for input') -parser.add_argument('-v', '--value', help='the value to be searched. optional ' - '- if not provided, the script will ask for input') -args = parser.parse_args() - -if args.key: - key = args.key -else: - key = input('Enter the key: ') -if args.value: - value = args.value -else: - value = input('Enter the value: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -f = csv.writer(open(filePath + 'Key=' + key + ' Value=' + value + '.csv', 'w')) -f.writerow(['itemID'] + ['uri'] + ['key'] + ['value']) -offset = 0 -recordsEdited = 0 -items = '' -itemLinks = [] -while items != []: - endpoint = baseURL + '/rest/filtered-items?query_field[]=' + key - endpoint += '&query_op[]=equals&query_val[]=' + value - endpoint += '&limit=200&offset=' + str(offset) - print(endpoint) - response = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() - items = response['items'] - for item in items: - itemMetadataProcessed = [] - itemLink = item['link'] - itemLinks.append(itemLink) - offset = offset + 200 - print(offset) -for itemLink in itemLinks: - metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, - cookies=cookies, verify=verify).json() - for i in range(0, len(metadata)): - if metadata[i]['key'] == key and metadata[i]['value'] == value: - metadataValue = metadata[i]['value'] - for i in range(0, len(metadata)): - if metadata[i]['key'] == 'dc.identifier.uri': - uri = metadata[i]['value'] - f.writerow([itemLink] + [uri] + [key] + [metadataValue]) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/identifyItemsMissingKeyInCommunity.py b/identifyItemsMissingKeyInCommunity.py deleted file mode 100644 index ffa7d0a..0000000 --- a/identifyItemsMissingKeyInCommunity.py +++ /dev/null @@ -1,97 +0,0 @@ -import requests -import time -import csv -from datetime import datetime -import urllib3 -import argparse -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-k', '--key', help='the key to be searched. optional - ' - 'if not provided, the script will ask for input') -parser.add_argument('-i', '--handle', help='handle of the community to ' - 'retreive. optional - if not provided, the script will ' - 'ask for input') -args = parser.parse_args() - -if args.key: - key = args.key -else: - key = input('Enter the key to be searched: ') - -if args.handle: - handle = args.handle -else: - handle = input('Enter collection handle: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, - verify=verify, params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -endpoint = baseURL + '/rest/handle/' + handle -community = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -communityID = community['uuid'] -collections = requests.get(baseURL + '/rest/communities/' + str(communityID) - + '/collections', headers=header, cookies=cookies, - verify=verify).json() -collSels = '' -for j in range(0, len(collections)): - collectionID = collections[j]['uuid'] - collSel = '&collSel[]=' + collectionID - collSels = collSels + collSel - -date = datetime.now().strftime('%Y-%m-%d %H.%M.%S') -f = csv.writer(open(filePath + 'recordsMissing' + key + date + '.csv', 'w')) -f.writerow(['itemID'] + ['key']) -offset = 0 -recordsEdited = 0 -items = '' -itemLinks = [] -while items != []: - endpoint = baseURL + '/rest/filtered-items?query_field[]=' + key - endpoint += '&query_op[]=doesnt_exist&query_val[]=' + collSels - endpoint += '&limit=200&offset=' + str(offset) - print(endpoint) - response = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() - items = response['items'] - for item in items: - itemMetadataProcessed = [] - itemLink = item['link'] - offset = offset + 200 - print(offset) -for itemLink in itemLinks: - metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, - cookies=cookies, verify=verify).json() - for metadataElement in metadata: - itemMetadataProcessed.append(metadataElement['key']) - if key not in itemMetadataProcessed: - f.writerow([itemLink]) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/metadataCollectionsKeysMatrix.py b/metadataCollectionsKeysMatrix.py deleted file mode 100644 index 7d1bc45..0000000 --- a/metadataCollectionsKeysMatrix.py +++ /dev/null @@ -1,154 +0,0 @@ -import requests -import time -import csv -import urllib3 -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -# authentication -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -endpoint = baseURL + '/rest/communities' -communities = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() - -# create list of all item IDs -itemList = [] -endpoint = baseURL + '/rest/communities' -communities = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -for i in range(0, len(communities)): - communityID = communities[i]['uuid'] - collections = requests.get(baseURL + '/rest/communities/' - + str(communityID) + '/collections', - headers=header, cookies=cookies, - verify=verify).json() - for j in range(0, len(collections)): - collectionID = collections[j]['uuid'] - print(collectionID) - if collectionID not in skipColl: - offset = 0 - items = '' - while items != []: - items = requests.get(baseURL + '/rest/collections/' - + str(collectionID) - + '/items?limit=200&offset=' - + str(offset), headers=header, - cookies=cookies, verify=verify) - while items.status_code != 200: - time.sleep(5) - items = requests.get(baseURL + '/rest/collections/' - + str(collectionID) - + '/items?limit=200&offset=' - + str(offset), headers=header, - cookies=cookies, verify=verify) - items = items.json() - for k in range(0, len(items)): - itemID = items[k]['uuid'] - itemList.append(itemID) - offset = offset + 200 - print(offset) - -dsFunc.elapsedTime(startTime, 'Item list creation time') - -# retrieve metadata from all items -keyList = [] -for itemID in itemList: - print(itemID) - metadata = requests.get(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, cookies=cookies, - verify=verify).json() - for i in range(0, len(metadata)): - key = metadata[i]['key'] - if key not in keyList: - keyList.append(key) - -keyListHeader = ['collectionNameColumn'] -keyList.sort() -keyListHeader = keyListHeader + keyList -f = csv.writer(open(filePath + 'collectionsKeysMatrix.csv', 'w')) -f.writerow(keyListHeader) - -for i in range(0, len(communities)): - communityID = communities[i]['uuid'] - communityName = communities[i]['name'] - collections = requests.get(baseURL + '/rest/communities/' - + str(communityID) + '/collections', - headers=header, cookies=cookies, - verify=verify).json() - for j in range(0, len(collections)): - collectionID = collections[j]['uuid'] - if collectionID not in skipColl: - print('Collection skipped') - else: - collectionItemList = [] - collectionName = collections[j]['name'] - fullName = communityName + ' - ' + collectionName - items = requests.get(baseURL + '/rest/collections/' - + str(collectionID) + '/items?limit=5000', - headers=header, cookies=cookies, - verify=verify) - while items.status_code != 200: - time.sleep(5) - items = requests.get(baseURL + '/rest/collections/' - + str(collectionID) + '/items?limit=5000', - headers=header, cookies=cookies, - verify=verify) - items = items.json() - for i in range(0, len(items)): - itemID = items[i]['uuid'] - collectionItemList.append(itemID) - - collectionKeyCount = {} - for key in keyList: - collectionKeyCount[key] = 0 - for itemID in collectionItemList: - print(itemID) - metadata = requests.get(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, - cookies=cookies, verify=verify).json() - for i in range(0, len(metadata)): - itemKey = metadata[i]['key'] - for key in keyList: - if itemKey == key: - collectionKeyCount[key] += 1 - - collectionKeyCountList = [] - for k, v in collectionKeyCount.items(): - collectionKeyCountList.append(k + ' ' + str(v)) - collectionKeyCountList.sort() - updatedCollKeyCountList = [] - for entry in collectionKeyCountList: - count = entry[entry.index(' ') + 1:] - updatedCollKeyCountList.append(count) - fullName = [fullName] - updatedCollKeyCountList = fullName + updatedCollKeyCountList - f.writerow(updatedCollKeyCountList) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/metadataOverview.py b/metadataOverview.py deleted file mode 100644 index 9da9783..0000000 --- a/metadataOverview.py +++ /dev/null @@ -1,151 +0,0 @@ -import requests -import time -import csv -from collections import Counter -import urllib3 -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -# authentication -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -f = csv.writer(open(filePath + 'collectionStats.csv', 'w')) -f.writerow(['Name'] + ['collectionID'] + ['collectionHandle'] - + ['numberOfItems']) - -itemList = [] -endpoint = baseURL + '/rest/communities' -communities = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -for i in range(0, len(communities)): - communityID = communities[i]['uuid'] - communityName = communities[i]['name'] - collections = requests.get(baseURL + '/rest/communities/' - + str(communityID) + '/collections', - headers=header, cookies=cookies, - verify=verify).json() - for j in range(0, len(collections)): - collectionID = collections[j]['uuid'] - collectionID = collections[j]['uuid'] - numberItems = collections[j]['numberItems'] - collectionName = collections[j]['name'] - collectionHandle = collections[j]['handle'] - fullName = communityName + ' - ' + collectionName - print(collectionID) - if collectionID not in skipColl: - offset = 0 - items = '' - while items != []: - f.writerow([fullName] + [collectionID] + [collectionHandle] - + [str(numberItems).zfill(6)]) - items = requests.get(baseURL + '/rest/collections/' - + str(collectionID) - + '/items?limit=200&offset=' - + str(offset), headers=header, - cookies=cookies, verify=verify) - while items.status_code != 200: - time.sleep(5) - items = requests.get(baseURL + '/rest/collections/' - + str(collectionID) - + '/items?limit=200&offset=' - + str(offset), headers=header, - cookies=cookies, verify=verify) - items = items.json() - for k in range(0, len(items)): - itemID = items[k]['uuid'] - concat = str(communityID) + ':' + str(collectionID) + '|' - + str(itemID) - itemList.append(concat) - offset = offset + 200 - print(offset) - -dsFunc.elapsedTime(startTime, 'Item list creation time') - -# retrieve metadata from all items -keyList = [] -dcTypeList = [] -keyCount = [] -f = csv.writer(open(filePath + 'dspaceIDs.csv', 'w')) -f.writerow(['communityID'] + ['collectionID'] + ['itemID']) -for concat in itemList: - communityID = concat[:concat.find(':')] - collectionID = concat[concat.find(':') + 1:concat.find('|')] - itemID = concat[concat.find('|') + 1:] - f.writerow([communityID] + [collectionID] + [itemID]) - concat = concat[:concat.find('|')] - print(itemID) - metadata = requests.get(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, cookies=cookies, - verify=verify).json() - for i in range(0, len(metadata)): - key = metadata[i]['key'] - keyCount.append(key) - keyConcat = concat + '|' + metadata[i]['key'] - if keyConcat not in keyList: - keyList.append(keyConcat) - if metadata[i]['key'] == 'dc.type': - dcType = metadata[i]['value'] - if dcType not in dcTypeList: - dcTypeList.append(dcType) - -print('writing types') -f = csv.writer(open(filePath + 'dspaceTypes.csv', 'w')) -f.writerow(['type']) -for dcType in dcTypeList: - f.writerow([dcType]) - -print('writing global key counts') -f = csv.writer(open(filePath + 'keyCount.csv', 'w')) -f.writerow(['key'] + ['count']) -countDict = Counter(keyCount) -for key, value in countDict.items(): - f.writerow([key] + [str(value).zfill(6)]) - -print('writing collection metadata keys') -f = csv.writer(open(filePath + 'collectionMetadataKeys.csv', 'w')) -f.writerow(['fullName'] + ['collectionID'] + ['collectionHandle'] + ['key']) -for concat in keyList: - communityID = concat[:concat.find(':')] - collectionID = concat[concat.find(':') + 1:concat.find('|')] - key = concat[concat.rfind('|') + 1:] - additionalDataCommunity = requests.get(baseURL + '/rest/communities/' - + str(communityID), headers=header, - cookies=cookies, - verify=verify).json() - communityName = additionalDataCommunity['name'] - additionalDataCollection = requests.get(baseURL + '/rest/collections/' - + str(collectionID), - headers=header, cookies=cookies, - verify=verify).json() - collectionName = additionalDataCollection['name'] - collectionHandle = additionalDataCollection['handle'] - fullName = communityName + ' - ' + collectionName - f.writerow([fullName] + [collectionID] + [collectionHandle] + [key]) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/overwriteExistingMetadata.py b/overwriteExistingMetadata.py deleted file mode 100644 index ab15143..0000000 --- a/overwriteExistingMetadata.py +++ /dev/null @@ -1,116 +0,0 @@ -import json -import requests -import time -import csv -from datetime import datetime -import urllib3 -import argparse -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-f', '--fileName', help='the name of the CSV with ' - 'handles and file identifiers. optional - if not ' - 'provided, the script will ask for input') -args = parser.parse_args() -if args.fileName: - fileName = args.fileName -else: - fileName = input('Enter file name: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -handleIdDict = {} -with open(fileName) as csvfile: - reader = csv.DictReader(csvfile) - for row in reader: - fileIdentifier = row['fileId'] - handle = row['handle'] - handleIdDict[fileIdentifier] = handle -print(handleIdDict) -id = input('test') - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -collectionMetadata = json.load(open('metadataOverwrite.json')) - -f = csv.writer(open(filePath + 'metadataOverwrite' - + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w')) -f.writerow(['itemID'] + ['delete'] + ['post']) - -for k, v in handleIdDict.items(): - for itemMetadata in collectionMetadata: - updatedItemMetadata = {} - updatedItemMetadataList = [] - for element in itemMetadata['metadata']: - if element['key'] == 'fileIdentifier': - fileIdentifier = element['value'] - else: - updatedItemMetadataList.append(element) - uriElement = {} - uriElement['key'] = 'dc.identifier.uri' - uriElement['value'] = 'http://jhir.library.jhu.edu/handle/' + v - updatedItemMetadataList.append(uriElement) - provNote = ('Item metadata updated through a batch process on ' - + datetime.now().strftime('%Y-%m-%d %H:%M:%S') + '.') - provNoteElement = {} - provNoteElement['key'] = 'dc.description.provenance' - provNoteElement['value'] = provNote - provNoteElement['language'] = 'en_US' - updatedItemMetadataList.append(provNoteElement) - - if fileIdentifier == k: - print(fileIdentifier) - endpoint = baseURL + '/rest/handle/' + v - item = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() - itemID = item['uuid'] - metadata = requests.get(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, - cookies=cookies, verify=verify).json() - for l in range(0, len(metadata)): - metadata[l].pop('schema', None) - metadata[l].pop('element', None) - metadata[l].pop('qualifier', None) - if metadata[l]['key'] == 'dc.description.provenance': - updatedItemMetadataList.append(metadata[l]) - if metadata[l]['key'] == 'dc.date.available': - updatedItemMetadataList.append(metadata[l]) - if metadata[l]['key'] == 'dc.date.accessioned': - updatedItemMetadataList.append(metadata[l]) - updatedItemMetadata = json.dumps(updatedItemMetadataList) - delete = requests.delete(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, - cookies=cookies, verify=verify) - print(delete) - post = requests.put(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, cookies=cookies, - verify=verify, data=updatedItemMetadata) - print(post) - f.writerow([itemID] + [delete] + [post]) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/postCollection.py b/postCollection.py deleted file mode 100644 index 845e865..0000000 --- a/postCollection.py +++ /dev/null @@ -1,215 +0,0 @@ -import json -import requests -import datetime -import time -import os -import csv -import urllib3 -import argparse -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-d', '--directory', help='the directory of the files. ' - 'optional - if not provided, the script will ask for ' - 'input') -parser.add_argument('-e', '--fileExtension', help='the file extension. ' - 'optional - if not provided, the script will ask for ' - 'input') -parser.add_argument('-i', '--communityHandle', help='handle of the community. ' - 'optional - if not provided, the script will ask for ' - 'input') -parser.add_argument('-n', '--collectionName', help='the name of the ' - 'collection. optional - if not provided, the script will ' - 'ask for input') -args = parser.parse_args() - -if args.directory: - directory = args.directory -else: - directory = input('Enter directory (C:/Test/): ') -if args.fileExtension: - fileExtension = args.fileExtension -else: - fileExtension = input('Enter file extension: ') -if args.communityHandle: - communityHandle = args.communityHandle -else: - communityHandle = input('Enter community handle: ') -if args.collectionName: - collectionName = args.collectionName -else: - collectionName = input('Enter collection name: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) -headerFileUpload = {'accept': 'application/json'} - -# create file list and export csv -fileList = {} -for root, dirs, files in os.walk(directory, topdown=True): - print('building file list') - for file in files: - if file.endswith(fileExtension): - fullFilePath = os.path.join(root, file).replace('\\', '/') - fileList[file[:file.index('.')]] = fullFilePath - -dsFunc.elapsedTime(startTime, 'File list creation time') - -f = csv.writer(open(collectionName.replace(' ', '') + 'fileList.csv', 'w')) -f.writerow(['itemID']) - -for k, v in fileList.items(): - f.writerow([v[v.rindex('/') + 1:]]) - -f2 = open('fileListDict.txt', 'w') -f2.write(json.dumps(fileList)) - -# Use this section of code if 'fileListDict.txt' has already been generated and -# comment out lines 64-83. This is useful if uploading a very large collection -# as generating the file list will take some time. -# f3=open('fileListDict.txt', 'r') -# fileList = json.load(f3) - -# Get community ID -endpoint = baseURL + '/rest/handle/' + communityHandle -community = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -communityID = str(community['uuid']) - -# Post collection -collection = json.dumps({'name': collectionName}) -post = requests.post(baseURL + '/rest/communities/' + communityID - + '/collections', headers=header, cookies=cookies, - verify=verify, data=collection).json() -collectionID = post['link'] - -# Post items -collectionMetadata = json.load(open(directory + '/metadata.json')) -for itemMetadata in collectionMetadata: - fileExists = '' - updatedItemMetadata = {} - updatedItemMetadataList = [] - for element in itemMetadata['metadata']: - if element['key'] == 'fileIdentifier': - fileIdentifier = element['value'] - else: - updatedItemMetadataList.append(element) - updatedItemMetadata['metadata'] = updatedItemMetadataList - updatedItemMetadata = json.dumps(updatedItemMetadata) - for k in fileList: - if fileIdentifier in k: - fileExists = True - if fileExists is True: - print(fileIdentifier) - post = requests.post(baseURL + collectionID + '/items', headers=header, - cookies=cookies, verify=verify, - data=updatedItemMetadata).json() - print(json.dumps(post)) - itemID = post['link'] - - # #Post bitstream - front and back. Deprecated method - # for k, v in fileList.items(): - # if k == fileIdentifier + '-Front': - # bitstream = fileList[k] - # fileName = bitstream[bitstream.rfind('/') + 1:] - # data = open(bitstream, 'rb') - # post = requests.post(baseURL + itemID + '/bitstreams?name=' - # + fileName, headers=headerFileUpload, - # verify=verify, data=data).json() - # print(post) - # - # for k, v in fileList.items(): - # if k == fileIdentifier + '-Back': - # bitstream = fileList[k] - # fileName = bitstream[bitstream.rfind('/') + 1:] - # data = open(bitstream, 'rb') - # post = requests.post(baseURL + itemID + '/bitstreams?name=' - # + fileName, headers=headerFileUpload, - # verify=verify, data=data).json() - # print(post) - - # Post bitstream - starts with file identifier - for k, v in fileList.items(): - if k.startswith(fileIdentifier): - bitstream = fileList[k] - fileName = bitstream[bitstream.rfind('/') + 1:] - data = open(bitstream, 'rb') - post = requests.post(baseURL + itemID + '/bitstreams?name=' - + fileName, headers=headerFileUpload, - cookies=cookies, verify=verify, - data=data).json() - print(json.dumps(post)) - - # Create provenance notes - provNote = {} - provNote['key'] = 'dc.description.provenance' - provNote['language'] = 'en_US' - utc = datetime.datetime.utcnow() - utcTime = utc.strftime('%Y-%m-%dT%H:%M:%SZ') - bitstreams = requests.get(baseURL + itemID + '/bitstreams', - headers=header, cookies=cookies, - verify=verify).json() - bitstreamCount = len(bitstreams) - provNoteValue = ('Submitted by ' + userFullName + ' (' + email + ')' - + ' on ' + utcTime + ' (GMT). No. of bitstreams: ' - + str(bitstreamCount)) - for bitstream in bitstreams: - fileName = bitstream['name'] - size = str(bitstream['sizeBytes']) - checksum = bitstream['checkSum']['value'] - algorithm = bitstream['checkSum']['checkSumAlgorithm'] - provNoteValue = (provNoteValue + ' ' + fileName + ': ' + size - + ' bytes, checkSum: ' + checksum + ' (' - + algorithm + ')') - provNote['value'] = provNoteValue - - provNote2 = {} - provNote2['key'] = 'dc.description.provenance' - provNote2['language'] = 'en_US' - - provNote2Value = ('Made available in DSpace on ' + utcTime - + ' (GMT). No. of bitstreams: ' - + str(bitstreamCount)) - for bitstream in bitstreams: - fileName = bitstream['name'] - size = str(bitstream['sizeBytes']) - checksum = bitstream['checkSum']['value'] - algorithm = bitstream['checkSum']['checkSumAlgorithm'] - provNote2Value = (provNote2Value + ' ' + fileName + ': ' + size - + ' bytes, checkSum: ' + checksum + ' (' - + algorithm + ')') - provNote2['value'] = provNote2Value - - # Post provenance notes - provNote = json.dumps([provNote, provNote2]) - post = requests.put(baseURL + itemID + '/metadata', headers=header, - cookies=cookies, verify=verify, data=provNote) - print(post) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/pull-request-template.md b/pull-request-template.md deleted file mode 100644 index 4106c26..0000000 --- a/pull-request-template.md +++ /dev/null @@ -1,5 +0,0 @@ -#### What does this PR do? - - -#### Includes new or updated dependencies? -YES|NO diff --git a/removeDuplicateKeyValuePairsFromItems.py b/removeDuplicateKeyValuePairsFromItems.py deleted file mode 100644 index c1eda9f..0000000 --- a/removeDuplicateKeyValuePairsFromItems.py +++ /dev/null @@ -1,124 +0,0 @@ -import json -import requests -import time -import csv -from datetime import datetime -import urllib3 -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -itemList = [] -endpoint = baseURL + '/rest/communities' -communities = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -for i in range(0, len(communities)): - communityID = communities[i]['uuid'] - collections = requests.get(baseURL + '/rest/communities/' - + str(communityID) + '/collections', - headers=header, cookies=cookies, - verify=verify).json() - for j in range(0, len(collections)): - collectionID = collections[j]['uuid'] - if collectionID not in skipColl: - offset = 0 - items = '' - while items != []: - items = requests.get(baseURL + '/rest/collections/' - + str(collectionID) - + '/items?limit=200&offset=' - + str(offset), headers=header, - cookies=cookies, verify=verify) - while items.status_code != 200: - time.sleep(5) - items = requests.get(baseURL + '/rest/collections/' - + str(collectionID) - + '/items?limit=200&offset=' - + str(offset), headers=header, - cookies=cookies, verify=verify) - items = items.json() - for k in range(0, len(items)): - itemID = items[k]['uuid'] - itemList.append(itemID) - offset = offset + 200 - -dsFunc.elapsedTime(startTime, 'Item list creation time') - -f = csv.writer(open(filePath + 'DuplicateKeysRemoved' - + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w')) -f.writerow(['itemID'] + ['key:value']) -for number, itemID in enumerate(itemList): - itemMetadataProcessed = [] - keyValueList = [] - itemsRemaining = len(itemList) - number - print('Items remaining: ', itemsRemaining, 'ItemID: ', itemID) - metadata = requests.get(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, cookies=cookies, - verify=verify).json() - changeRecord = False - for metadataElement in metadata: - metadataElement.pop('schema', None) - metadataElement.pop('element', None) - metadataElement.pop('qualifier', None) - key = metadataElement['key'] - try: - value = metadataElement['value'] - except ValueError: - value = '' - if key != 'dc.description.provenance': - keyValue = {'key': key, 'value': value} - if keyValue not in keyValueList: - itemMetadataProcessed.append(metadataElement) - keyValueList.append(keyValue) - else: - f.writerow([itemID] + [keyValue]) - currTime = datetime.now().strftime('%Y-%m-%d %H:%M:%S') - provNote = 'A duplicate element, \'' + key + ': ' + value - provNote += ',\' was removed through a batch process' - provNote += 'on ' + currTime + '.' - provNoteElement = {} - provNoteElement['key'] = 'dc.description.provenance' - provNoteElement['value'] = provNote - provNoteElement['language'] = 'en_US' - itemMetadataProcessed.append(provNoteElement) - changeRecord = True - if changeRecord is True: - itemMetadataProcessed = json.dumps(itemMetadataProcessed) - print(itemID) - delete = requests.delete(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, - cookies=cookies, verify=verify) - print(delete) - post = requests.put(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, cookies=cookies, - verify=verify, data=itemMetadataProcessed) - print(post) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/replaceKey.py b/replaceKey.py deleted file mode 100644 index 01ffc7e..0000000 --- a/replaceKey.py +++ /dev/null @@ -1,120 +0,0 @@ -import json -import requests -import time -import csv -from datetime import datetime -import urllib3 -import argparse -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-1', '--replacedKey', help='the key to be replaced. ' - 'optional - if not provided, the script will ask for ' - 'input') -parser.add_argument('-2', '--replacementKey', help='the replacement key. ' - 'optional - if not provided, the script will ask for ' - 'input') -args = parser.parse_args() - -if args.replacedKey: - replacedKey = args.replacedKey -else: - replacedKey = input('Enter the key to be replaced: ') -if args.replacementKey: - replacementKey = args.replacementKey -else: - replacementKey = input('Enter the replacement key: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -date = datetime.now().strftime('%Y-%m-%d %H.%M.%S') -f = csv.writer(open(filePath + 'replaceKey' + date + '.csv', 'w')) -f.writerow(['itemID'] + ['replacedKey'] + ['replacedValue'] + ['delete'] - + ['post']) -offset = 0 -recordsEdited = 0 -items = '' -itemLinks = [] -while items != []: - endpoint = baseURL + '/rest/filtered-items?query_field[]=' - endpoint += replacedKey - endpoint += '&query_op[]=exists&query_val[]=&limit=200&offset=' - endpoint += str(offset) - print(endpoint) - response = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() - items = response['items'] - for item in items: - itemMetadataProcessed = [] - itemLink = item['link'] - itemLinks.append(itemLink) - offset = offset + 200 - print(offset) -for itemLink in itemLinks: - itemMetadataProcessed = [] - print(itemLink) - metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, - cookies=cookies, verify=verify).json() - for l in range(0, len(metadata)): - metadata[l].pop('schema', None) - metadata[l].pop('element', None) - metadata[l].pop('qualifier', None) - if metadata[l]['key'] == replacedKey: - replacedElement = metadata[l] - updatedMetadataElement = {} - updatedMetadataElement['key'] = replacementKey - updatedMetadataElement['value'] = replacedElement['value'] - updatedMetadataElement['language'] = replacedElement['language'] - print(updatedMetadataElement) - itemMetadataProcessed.append(updatedMetadataElement) - provNote = '\'' + replacedKey + '\' was replaced by \'' - provNote += replacementKey - date = datetime.now().strftime('%Y-%m-%d %H:%M:%S') - provNote += '\' through a batch process on ' + date + '.' - provNoteElement = {} - provNoteElement['key'] = 'dc.description.provenance' - provNoteElement['value'] = provNote - provNoteElement['language'] = 'en_US' - itemMetadataProcessed.append(provNoteElement) - else: - if metadata[l] not in itemMetadataProcessed: - itemMetadataProcessed.append(metadata[l]) - itemMetadataProcessed = json.dumps(itemMetadataProcessed) - delete = requests.delete(baseURL + itemLink + '/metadata', headers=header, - cookies=cookies, verify=verify) - print(delete) - post = requests.put(baseURL + itemLink + '/metadata', headers=header, - cookies=cookies, verify=verify, - data=itemMetadataProcessed) - print(post) - f.writerow([itemLink] + [replacedElement['key']] - + [replacedElement['value']] + [delete] + [post]) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/replaceKeyForCollection.py b/replaceKeyForCollection.py deleted file mode 100644 index f91e58c..0000000 --- a/replaceKeyForCollection.py +++ /dev/null @@ -1,132 +0,0 @@ -import json -import requests -import time -import csv -from datetime import datetime -import urllib3 -import argparse -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-1', '--replacedKey', help='the key to be replaced. ' - 'optional - if not provided, the script will ask for ' - 'input') -parser.add_argument('-2', '--replacementKey', help='the replacement key. ' - 'optional - if not provided, the script will ask for ' - 'input') -parser.add_argument('-i', '--handle', help='handle of the collection to ' - 'retreive. optional - if not provided, the script will ' - 'ask for input') -args = parser.parse_args() - -if args.replacedKey: - replacedKey = args.replacedKey -else: - replacedKey = input('Enter the key to be replaced: ') -if args.replacementKey: - replacementKey = args.replacementKey -else: - replacementKey = input('Enter the replacement key: ') -if args.handle: - handle = args.handle -else: - handle = input('Enter collection handle: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -endpoint = baseURL + '/rest/handle/' + handle -collection = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -collectionID = collection['uuid'] -collSels = '&collSel[]=' + collectionID -date = datetime.now().strftime('%Y-%m-%d %H.%M.%S') -f = csv.writer(open(filePath + 'replaceKey' + date + '.csv', 'w')) -f.writerow(['itemID'] + ['replacedKey'] + ['replacedValue'] + ['delete'] - + ['post']) -offset = 0 -recordsEdited = 0 -items = '' -itemLinks = [] -while items != []: - endpoint = baseURL + '/rest/filtered-items?query_field[]=' - endpoint += replacedKey + '&query_op[]=exists&query_val[]=' - endpoint += collSels + '&limit=200&offset=' + str(offset) - print(endpoint) - response = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() - items = response['items'] - for item in items: - itemMetadataProcessed = [] - itemLink = item['link'] - itemLinks.append(itemLink) - offset = offset + 200 - print(offset) -for itemLink in itemLinks: - itemMetadataProcessed = [] - print(itemLink) - metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, - cookies=cookies, verify=verify).json() - for l in range(0, len(metadata)): - metadata[l].pop('schema', None) - metadata[l].pop('element', None) - metadata[l].pop('qualifier', None) - if metadata[l]['key'] == replacedKey: - replacedElement = metadata[l] - updatedMetadataElement = {} - updatedMetadataElement['key'] = replacementKey - updatedMetadataElement['value'] = replacedElement['value'] - updatedMetadataElement['language'] = replacedElement['language'] - print(updatedMetadataElement) - itemMetadataProcessed.append(updatedMetadataElement) - provNote = '\'' + replacedKey + '\' was replaced by \'' - provNote += replacementKey - provNote += '\' through a batch process on ' - date = datetime.now().strftime('%Y-%m-%d %H:%M:%S') + '.' - provNote += date - provNoteElement = {} - provNoteElement['key'] = 'dc.description.provenance' - provNoteElement['value'] = provNote - provNoteElement['language'] = 'en_US' - itemMetadataProcessed.append(provNoteElement) - else: - if metadata[l] not in itemMetadataProcessed: - itemMetadataProcessed.append(metadata[l]) - itemMetadataProcessed = json.dumps(itemMetadataProcessed) - delete = requests.delete(baseURL + itemLink + '/metadata', headers=header, - cookies=cookies, verify=verify) - print(delete) - post = requests.put(baseURL + itemLink + '/metadata', headers=header, - cookies=cookies, verify=verify, - data=itemMetadataProcessed) - print(post) - f.writerow([itemLink] + [replacedElement['key']] - + [replacedElement['value']] + [delete] + [post]) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/replaceKeyForCommunity.py b/replaceKeyForCommunity.py deleted file mode 100644 index e98957b..0000000 --- a/replaceKeyForCommunity.py +++ /dev/null @@ -1,139 +0,0 @@ -import json -import requests -import time -import csv -from datetime import datetime -import urllib3 -import argparse -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-1', '--replacedKey', help='the key to be replaced. ' - 'optional - if not provided, the script will ask for ' - 'input') -parser.add_argument('-2', '--replacementKey', help='the replacement key. ' - 'optional - if not provided, the script will ask for ' - 'input') -parser.add_argument('-i', '--handle', help='handle of the community to ' - 'retreive. optional - if not provided, the script will ' - 'ask for input') -args = parser.parse_args() - -if args.replacedKey: - replacedKey = args.replacedKey -else: - replacedKey = input('Enter the key to be replaced: ') -if args.replacementKey: - replacementKey = args.replacementKey -else: - replacementKey = input('Enter the replacement key: ') -if args.handle: - handle = args.handle -else: - handle = input('Enter community handle: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -endpoint = baseURL + '/rest/handle/' + handle -community = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -communityID = community['uuid'] -collections = requests.get(baseURL + '/rest/communities/' + str(communityID) - + '/collections', headers=header, cookies=cookies, - verify=verify).json() -collSels = '' -for j in range(0, len(collections)): - collectionID = collections[j]['uuid'] - collSel = '&collSel[]=' + collectionID - collSels = collSels + collSel -date = datetime.now().strftime('%Y-%m-%d %H.%M.%S') -f = csv.writer(open(filePath + 'replaceKey' + date + '.csv', 'w')) -f.writerow(['itemID'] + ['replacedKey'] + ['replacedValue'] + ['delete'] - + ['post']) -offset = 0 -recordsEdited = 0 -items = '' -itemLinks = [] -while items != []: - endpoint = baseURL + '/rest/filtered-items?query_field[]=' + replacedKey - endpoint += '&query_op[]=exists&query_val[]=' + collSels - endpoint += '&limit=200&offset=' + str(offset) - print(endpoint) - response = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() - items = response['items'] - for item in items: - itemMetadataProcessed = [] - itemLink = item['link'] - itemLinks.append(itemLink) - offset = offset + 200 - print(offset) -for itemLink in itemLinks: - itemMetadataProcessed = [] - print(itemLink) - metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, - cookies=cookies, verify=verify).json() - for l in range(0, len(metadata)): - metadata[l].pop('schema', None) - metadata[l].pop('element', None) - metadata[l].pop('qualifier', None) - if metadata[l]['key'] == replacedKey: - replacedElement = metadata[l] - updatedMetadataElement = {} - updatedMetadataElement['key'] = replacementKey - updatedMetadataElement['value'] = replacedElement['value'] - updatedMetadataElement['language'] = replacedElement['language'] - print(updatedMetadataElement) - itemMetadataProcessed.append(updatedMetadataElement) - provNote = '\'' + replacedKey + '\' was replaced by \'' - provNote += replacementKey - provNote += '\' through a batch process on ' - date = datetime.now().strftime('%Y-%m-%d %H:%M:%S') - provNote += date + '.' - provNoteElement = {} - provNoteElement['key'] = 'dc.description.provenance' - provNoteElement['value'] = provNote - provNoteElement['language'] = 'en_US' - itemMetadataProcessed.append(provNoteElement) - else: - if metadata[l] not in itemMetadataProcessed: - itemMetadataProcessed.append(metadata[l]) - itemMetadataProcessed = json.dumps(itemMetadataProcessed) - delete = requests.delete(baseURL + itemLink + '/metadata', headers=header, - cookies=cookies, verify=verify) - print(delete) - post = requests.put(baseURL + itemLink + '/metadata', headers=header, - cookies=cookies, verify=verify, - data=itemMetadataProcessed) - print(post) - f.writerow([itemLink] + [replacedElement['key']] - + [replacedElement['value']] + [delete] + [post]) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/replaceKeyValuePairOnItemIdCSV.py b/replaceKeyValuePairOnItemIdCSV.py deleted file mode 100644 index f9da491..0000000 --- a/replaceKeyValuePairOnItemIdCSV.py +++ /dev/null @@ -1,92 +0,0 @@ -import json -import requests -import time -import csv -from datetime import datetime -import urllib3 -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -fileName = filePath + input('Enter fileName (including \'.csv\'): ') -replacedKey = input('Enter key: ') -replacementKey = replacedKey - -f = csv.writer(open(filePath + 'replacedKeyValuePair' - + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w')) -f.writerow(['itemID'] + ['replacedKey'] + ['replacedValue'] - + ['replacementValue'] + ['delete'] + ['post']) - -with open(fileName) as csvfile: - reader = csv.DictReader(csvfile) - for row in reader: - itemMetadataProcessed = [] - itemID = row['itemID'] - replacedValue = row['replacedValue'] - replacementValue = row['replacementValue'] - itemMetadata = requests.get(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, - cookies=cookies, verify=verify).json() - for element in itemMetadata: - languageValue = element['language'] - key = element['key'] == replacedKey - value = element['value'] - if key and value == replacedValue: - updatedMetadataElement = {} - updatedMetadataElement['key'] = replacementKey - updatedMetadataElement['value'] = replacementValue - updatedMetadataElement['language'] = languageValue - itemMetadataProcessed.append(updatedMetadataElement) - - provNote = '\'' + replacedKey + ': ' + replacedValue - provNote += '\' was replaced by \'' + replacementKey - provNote += ': ' + replacementValue - provNote += '\' through a batch process on ' - currTime = datetime.now().strftime('%Y-%m-%d %H:%M:%S') - provNote += currTime + '.' - provNoteElement = {} - provNoteElement['key'] = 'dc.description.provenance' - provNoteElement['value'] = provNote - provNoteElement['language'] = 'en_US' - itemMetadataProcessed.append(provNoteElement) - else: - itemMetadataProcessed.append(element) - print(itemMetadata) - itemMetadataProcessed = json.dumps(itemMetadataProcessed) - - delete = requests.delete(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, - cookies=cookies, verify=verify) - print(delete) - post = requests.put(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, cookies=cookies, - verify=verify, data=itemMetadataProcessed) - print(post) - f.writerow([itemID] + [replacedKey] + [replacedValue] - + [replacementValue] + [delete] + [post]) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/replaceKeyValuePairsFromCSV.py b/replaceKeyValuePairsFromCSV.py deleted file mode 100644 index fd7618b..0000000 --- a/replaceKeyValuePairsFromCSV.py +++ /dev/null @@ -1,129 +0,0 @@ -import json -import requests -import time -import csv -from datetime import datetime -import urllib3 -import argparse -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-f', '--fileName', help='the CSV file of changes. ' - 'optional - if not provided, the script will ask for ' - 'input') -args = parser.parse_args() - -if args.fileName: - fileName = filePath + args.fileName -else: - fileName = filePath + input('Enter the CSV of changes ' - '(including \'.csv\'): ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -f = csv.writer(open(filePath + 'searchAndReplace' - + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w')) -f.writerow(['itemID'] + ['replacedKey'] + ['replacedValue'] + ['delete'] - + ['post']) -with open(fileName) as csvfile: - reader = csv.DictReader(csvfile) - for row in reader: - replacedKey = row['replacedKey'] - replacementKey = row['replacementKey'] - replacedValue = row['replacedValue'] - replacementValue = row['replacementValue'] - offset = 0 - recordsEdited = 0 - items = '' - itemLinks = [] - while items != []: - endpoint = baseURL + '/rest/filtered-items?query_field[]=' - endpoint += replacedKey - endpoint += '&query_op[]=equals&query_val[]=' - endpoint += replacedValue + '&limit=200&offset=' - endpoint += str(offset) - print(endpoint) - response = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() - items = response['items'] - for item in items: - itemMetadataProcessed = [] - itemLink = item['link'] - itemLinks.append(itemLink) - offset = offset + 200 - print(offset) - for itemLink in itemLinks: - itemMetadataProcessed = [] - print(itemLink) - metadata = requests.get(baseURL + itemLink + '/metadata', - headers=header, cookies=cookies, - verify=verify).json() - for l in range(0, len(metadata)): - metadata[l].pop('schema', None) - metadata[l].pop('element', None) - metadata[l].pop('qualifier', None) - languageValue = metadata[l]['language'] - key = metadata[l]['key'] - value = metadata[l]['value'] - if key == replacedKey and value == replacedValue: - replacedElement = metadata[l] - updatedMetadataElement = {} - updatedMetadataElement['key'] = replacementKey - updatedMetadataElement['value'] = replacementValue - updatedMetadataElement['language'] = languageValue - itemMetadataProcessed.append(updatedMetadataElement) - provNote = '\'' + replacedKey + ': ' + replacedValue - provNote += '\' was replaced by \'' - provNote += replacementKey + ': ' - provNote += replacementValue - provNote += '\' through a batch process on ' - currTime = datetime.now().strftime('%Y-%m-%d %H:%M:%S') - provNote += currTime + '.' - provNoteElement = {} - provNoteElement['key'] = 'dc.description.provenance' - provNoteElement['value'] = provNote - provNoteElement['language'] = 'en_US' - itemMetadataProcessed.append(provNoteElement) - else: - if metadata[l] not in itemMetadataProcessed: - itemMetadataProcessed.append(metadata[l]) - itemMetadataProcessed = json.dumps(itemMetadataProcessed) - delete = requests.delete(baseURL + itemLink + '/metadata', - headers=header, cookies=cookies, - verify=verify) - print(delete) - post = requests.put(baseURL + itemLink + '/metadata', - headers=header, cookies=cookies, verify=verify, - data=itemMetadataProcessed) - print(post) - f.writerow([itemLink] + [replacedElement['key']] - + [replacedElement['value']] + [delete] + [post]) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/replaceUnnecessarySpaces.py b/replaceUnnecessarySpaces.py deleted file mode 100644 index fd4865c..0000000 --- a/replaceUnnecessarySpaces.py +++ /dev/null @@ -1,115 +0,0 @@ -import json -import requests -import csv -import time -from datetime import datetime -import urllib3 -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -communityHandle = input('Enter community handle: ') -key = input('Enter key: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -itemList = [] -endpoint = baseURL + '/rest/communities' -communities = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -for i in range(0, len(communities)): - communityID = communities[i]['uuid'] - collections = requests.get(baseURL + '/rest/communities/' - + str(communityID) + '/collections', - headers=header, cookies=cookies, - verify=verify).json() - for j in range(0, len(collections)): - collectionID = collections[j]['uuid'] - if collectionID not in skipColl: - offset = 0 - items = '' - while items != []: - items = requests.get(baseURL + '/rest/collections/' - + str(collectionID) - + '/items?limit=200&offset=' - + str(offset), headers=header, - cookies=cookies, verify=verify) - while items.status_code != 200: - time.sleep(5) - items = requests.get(baseURL + '/rest/collections/' - + str(collectionID) - + '/items?limit=200&offset=' - + str(offset), headers=header, - cookies=cookies, verify=verify) - items = items.json() - for k in range(0, len(items)): - itemID = items[k]['uuid'] - itemList.append(itemID) - offset = offset + 200 - -dsFunc.elapsedTime(startTime, 'Item list creation time') - -f = csv.writer(open(filePath + 'removeUnnecessarySpaces' - + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w')) -f.writerow(['itemID'] + ['replacedKey'] + ['replacedValue'] + ['delete'] - + ['post']) -for itemID in itemList: - itemMetadataProcessed = [] - metadata = requests.get(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, cookies=cookies, - verify=verify).json() - for i in range(0, len(metadata)): - if metadata[i]['key'] == key: - metadataItem = json.dumps(metadata[i]) - if ' ' in metadataItem or ' ,' in metadataItem: - uptdMetadataElement = json.loads(metadataItem) - uptdMetadataElement = uptdMetadataElement.replace(' ', ' ') - uptdMetadataElement = uptdMetadataElement.replace(' ', ' ') - uptdMetadataElement = uptdMetadataElement.replace(' ,', ',') - itemMetadataProcessed.append(uptdMetadataElement) - f.writerow([itemID] + [metadata[i]['key']] - + [metadata[i]['value']]) - else: - itemMetadataProcessed.append(metadata[i]) - else: - itemMetadataProcessed.append(metadata[i]) - if json.dumps(itemMetadataProcessed) != json.dumps(metadata): - itemMetadataProcessed = json.dumps(itemMetadataProcessed) - print('updated', itemID) - delete = requests.delete(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, - cookies=cookies, verify=verify) - print(delete) - post = requests.put(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, cookies=cookies, - verify=verify, data=itemMetadataProcessed) - print(post) - else: - print('not updated', itemID) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/replaceValueInCollection.py b/replaceValueInCollection.py deleted file mode 100644 index 2a37fd7..0000000 --- a/replaceValueInCollection.py +++ /dev/null @@ -1,145 +0,0 @@ -import json -import requests -import csv -import time -import urllib3 -import argparse -from datetime import datetime -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-k', '--key', help='the key to be searched. optional - ' - 'if not provided, the script will ask for input') -parser.add_argument('-1', '--replacedValue', help='the value to be replaced. ' - 'optional - if not provided, the script will ask for' - 'input') -parser.add_argument('-2', '--replacementValue', help='the replacement value. ' - 'optional - if not provided, the script will ask for ' - 'input') -parser.add_argument('-i', '--handle', help='handle of the collection to ' - 'retreive. optional - if not provided, the script will ' - 'ask for input') -args = parser.parse_args() - -if args.key: - key = args.key -else: - key = input('Enter the key: ') -if args.replacedValue: - replacedValue = args.replacedValue -else: - replacedValue = input('Enter the value to be replaced: ') -if args.replacementValue: - replacementValue = args.replacementValue -else: - replacementValue = input('Enter the replacement value: ') -if args.handle: - handle = args.handle -else: - handle = input('Enter collection handle: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -endpoint = baseURL + '/rest/handle/' + handle -collection = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -collectionID = collection['uuid'] -collSels = '&collSel[]=' + collectionID - -f = csv.writer(open(filePath + 'replacedValues' - + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w')) -f.writerow(['handle'] + ['replacedValue'] + ['replacementValue']) -offset = 0 -recordsEdited = 0 -items = '' -itemLinks = [] -while items != []: - endpoint = baseURL + '/rest/filtered-items?query_field[]=' + key - endpoint += '&query_op[]=equals&query_val[]=' + replacedValue - endpoint += collSels + '&limit=200&offset=' + str(offset) - print(endpoint) - replacedKey = key - replacementKey = key - response = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() - items = response['items'] - for item in items: - itemMetadataProcessed = [] - itemLink = item['link'] - itemLinks.append(itemLink) - offset = offset + 200 - print(offset) -for itemLink in itemLinks: - itemMetadataProcessed = [] - print(itemLink) - metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, - cookies=cookies, verify=verify).json() - for l in range(0, len(metadata)): - metadata[l].pop('schema', None) - metadata[l].pop('element', None) - metadata[l].pop('qualifier', None) - languageValue = metadata[l]['language'] - key = metadata[l]['key'] - value = metadata[l]['value'] - if key == replacedKey and key == replacedValue: - replacedElement = metadata[l] - updatedMetadataElement = {} - updatedMetadataElement['key'] = replacementKey - updatedMetadataElement['value'] = replacementValue - updatedMetadataElement['language'] = languageValue - itemMetadataProcessed.append(updatedMetadataElement) - provNote = '\'' + replacedKey + ': ' + replacedValue - provNote += '\' was replaced by \'' + replacementKey - provNote += ': ' + replacementValue - provNote += '\' through a batch process on ' - provNote += datetime.now().strftime('%Y-%m-%d %H:%M:%S') - provNote += '.' - provNoteElement = {} - provNoteElement['key'] = 'dc.description.provenance' - provNoteElement['value'] = provNote - provNoteElement['language'] = 'en_US' - itemMetadataProcessed.append(provNoteElement) - recordsEdited = recordsEdited + 1 - else: - if metadata[l] not in itemMetadataProcessed: - itemMetadataProcessed.append(metadata[l]) - itemMetadataProcessed = json.dumps(itemMetadataProcessed) - print('updated', itemLink, recordsEdited) - delete = requests.delete(baseURL + itemLink + '/metadata', headers=header, - cookies=cookies, verify=verify) - print(delete) - post = requests.put(baseURL + itemLink + '/metadata', headers=header, - cookies=cookies, verify=verify, - data=itemMetadataProcessed) - print(post) - f.writerow([itemLink] + [updatedMetadataElement['key']] - + [updatedMetadataElement['value']] + [delete] + [post]) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/replaceValueInCommunityFromCSV.py b/replaceValueInCommunityFromCSV.py deleted file mode 100644 index 15b81b4..0000000 --- a/replaceValueInCommunityFromCSV.py +++ /dev/null @@ -1,157 +0,0 @@ -# -*- coding: utf-8 -*- -import json -import requests -import csv -import time -import urllib3 -import argparse -from datetime import datetime -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-i', '--handle', help='handle of the community. optional ' - '- if not provided, the script will ask for input') -parser.add_argument('-f', '--fileName', help='the CSV file of changes. ' - 'optional - if not provided, the script will ask for ' - 'input') -args = parser.parse_args() - -if args.fileName: - fileName = args.fileName -else: - fileName = input('Enter the CSV of changes (including \'.csv\'): ') -if args.handle: - handle = args.handle -else: - handle = input('Enter community handle: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -endpoint = baseURL + '/rest/handle/' + handle -community = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -communityID = community['uuid'] -collections = requests.get(baseURL + '/rest/communities/' + str(communityID) - + '/collections', headers=header, cookies=cookies, - verify=verify).json() -collSels = '' -for j in range(0, len(collections)): - collectionID = collections[j]['uuid'] - collSel = '&collSel[]=' + collectionID - collSels = collSels + collSel - -counter = 0 -f = csv.writer(open(filePath + 'replacedValues' - + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w')) -f.writerow(['handle'] + ['replacedValue'] + ['replacementValue']) -with open(fileName) as csvfile: - reader = csv.DictReader(csvfile) - rowCount = len(list(reader)) -with open(fileName) as csvfile: - reader = csv.DictReader(csvfile) - for row in reader: - rowCount -= 1 - replacedValue = row['replacedValue'] - replacementValue = row['replacementValue'] - print('Rows remaining: ', rowCount) - print(replacedValue, ' -- ', replacementValue) - if replacedValue != replacementValue: - print(replacedValue) - offset = 0 - recordsEdited = 0 - items = '' - itemLinks = [] - while items != []: - endpoint = baseURL + '/rest/filtered-items?' - endpoint += 'query_field[]=*&query_op[]=equals' - endpoint += '&query_val[]=' + replacedValue - endpoint += collSels + '&limit=200&offset=' - endpoint += str(offset) - print(endpoint) - response = requests.get(endpoint, headers=header, - cookies=cookies, verify=verify) - print(response) - response = response.json() - items = response['items'] - print(len(items), ' search results') - for item in items: - itemLink = item['link'] - itemLinks.append(itemLink) - offset = offset + 200 - print(offset) - for itemLink in itemLinks: - itemMetadataProcessed = [] - metadata = requests.get(baseURL + itemLink + '/metadata', - headers=header, cookies=cookies, - verify=verify).json() - counter += 1 - print(counter) - for l in range(0, len(metadata)): - metadata[l].pop('schema', None) - metadata[l].pop('element', None) - metadata[l].pop('qualifier', None) - languageValue = metadata[l]['language'] - if metadata[l]['value'] == replacedValue: - key = metadata[l]['key'] - replacedElement = metadata[l] - updatedMetadataElement = {} - updatedMetadataElement['key'] = metadata[l]['key'] - updatedMetadataElement['value'] = replacementValue - updatedMetadataElement['language'] = languageValue - itemMetadataProcessed.append(updatedMetadataElement) - provNote = '\'' + key + ': ' + replacedValue - provNote += '\' was replaced by \'' + key - provNote += ': ' + replacementValue - provNote += '\' through a batch process on ' - currTime = datetime.now().strftime('%Y-%m-%d %H:%M:%S') - provNote += currTime + '.' - provNoteElement = {} - provNoteElement['key'] = 'dc.description.provenance' - provNoteElement['value'] = provNote - provNoteElement['language'] = 'en_US' - itemMetadataProcessed.append(provNoteElement) - recordsEdited = recordsEdited + 1 - else: - if metadata[l] not in itemMetadataProcessed: - itemMetadataProcessed.append(metadata[l]) - itemMetadataProcessed = json.dumps(itemMetadataProcessed) - print('updated', itemLink, recordsEdited) - delete = requests.delete(baseURL + itemLink + '/metadata', - headers=header, cookies=cookies, - verify=verify) - print(delete) - post = requests.put(baseURL + itemLink + '/metadata', - headers=header, cookies=cookies, - verify=verify, data=itemMetadataProcessed) - print(post) - f.writerow([itemLink] + [replacedValue] + [replacementValue] - + [delete] + [post]) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/repositoryMetadataBackup.py b/repositoryMetadataBackup.py deleted file mode 100644 index 0e338c4..0000000 --- a/repositoryMetadataBackup.py +++ /dev/null @@ -1,97 +0,0 @@ -import json -import requests -import time -from datetime import datetime -import urllib3 -import os -import dsFunc -import argparse - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-p', '--handlePrefix', help='Enter the handle prefix') -args = parser.parse_args() - -if args.handlePrefix: - handlePrefix = args.handlePrefix -else: - handlePrefix = input('Enter the handle prefix: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -date = datetime.now().strftime('%Y-%m-%d %H.%M.%S') -endpoint = baseURL + '/rest/communities' -communities = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -backupDirectory = filePath + 'backup' + date + '/' -os.makedirs(backupDirectory) -for i in range(0, len(communities)): - communityID = communities[i]['uuid'] - collections = requests.get(baseURL + '/rest/communities/' - + str(communityID) + '/collections', - headers=header, cookies=cookies, - verify=verify).json() - for j in range(0, len(collections)): - collectionID = collections[j]['uuid'] - if collectionID not in skipColl: - collectionHandle = collections[j]['handle'] - collectionHandle = collectionHandle.replace(handlePrefix, '') - collectionHandle = collectionHandle.replace('/', '-') - print('collectionID: ', collectionID) - itemList = [] - offset = 0 - items = '' - while items != []: - items = requests.get(baseURL + '/rest/collections/' - + str(collectionID) - + '/items?limit=1000&offset=' - + str(offset), headers=header, - cookies=cookies, verify=verify) - while items.status_code != 200: - time.sleep(5) - items = requests.get(baseURL + '/rest/collections/' - + str(collectionID) - + '/items?limit=1000&offset=' - + str(offset), headers=header, - cookies=cookies, verify=verify) - items = items.json() - for k in range(0, len(items)): - itemID = items[k]['uuid'] - itemList.append(itemID) - offset = offset + 1000 - f = open(backupDirectory + collectionHandle + '.json', 'w') - collectionMetadata = [] - for itemID in itemList: - metadata = requests.get(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, - cookies=cookies, verify=verify).json() - collectionMetadata.append(metadata) - json.dump(collectionMetadata, f) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/repositoryMetadataRestore.py b/repositoryMetadataRestore.py deleted file mode 100644 index eef640a..0000000 --- a/repositoryMetadataRestore.py +++ /dev/null @@ -1,76 +0,0 @@ -import json -import requests -import os -import time -import dsFunc -import argparse - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-p', '--handlePrefix', help='Enter the handle prefix') -args = parser.parse_args() - -if args.handlePrefix: - handlePrefix = args.handlePrefix -else: - handlePrefix = input('Enter the handle prefix: ') - -requests.packages.urllib3.disable_warnings() - -directory = filePath + input('Enter directory name: ') - -startTime = time.time() - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -for fileName in os.listdir(directory): - print(fileName) - metadataGroup = json.load(open(directory + '/' + fileName)) - for i in range(0, len(metadataGroup)): - metadata = metadataGroup[i] - itemMetadata = json.dumps(metadata) - for j in range(0, len(metadata)): - key = metadata[j]['key'] - value = metadata[j]['value'] - if key == 'dc.identifier.uri' and value.startswith(handlePrefix): - handle = metadata[j]['value'].replace(handlePrefix, '') - print(handle) - endpoint = baseURL + '/rest/handle/' + handle - item = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() - itemID = item['uuid'] - print(fileName, itemID) - delete = requests.delete(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, - cookies=cookies, verify=verify) - print(delete) - post = requests.put(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, - cookies=cookies, verify=verify, - data=itemMetadata) - print(post) -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/splitFieldIntoMultipleFields.py b/splitFieldIntoMultipleFields.py deleted file mode 100644 index 4492e3f..0000000 --- a/splitFieldIntoMultipleFields.py +++ /dev/null @@ -1,141 +0,0 @@ -# -*- coding: utf-8 -*- -import json -import requests -import csv -import time -import urllib3 -from datetime import datetime -import ast -import argparse -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-r', '--replacedKey', help='the key to be replaced. ' - 'optional - if not provided, the script will ask for ' - 'input') -parser.add_argument('-f', '--fileName', help='the CSV file of changes. ' - 'optional - if not provided, the script will ask for ' - 'input') -args = parser.parse_args() - -if args.replacedKey: - replacedKey = args.replacedKey -else: - replacedKey = input('Enter the key to be replaced: ') -if args.fileName: - fileName = filePath + args.fileName -else: - fileName = filePath + input('Enter the file name of the CSV of changes ' - '(including \'.csv\'): ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -recordsEdited = 0 -elementsEdited = 0 -f = csv.writer(open(filePath + 'splitFieldIntoMultipleFields' - + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w')) -f.writerow(['itemID'] + ['replacedKey'] + ['replacementValueList'] - + ['delete'] + ['post']) -replacedElement = '' -with open(fileName) as csvfile: - reader = csv.DictReader(csvfile) - for row in reader: - replacedValue = row['value'] - print(replacedValue) - replacementValueList = ast.literal_eval(row['structuredList']) - offset = 0 - items = '' - itemLinks = [] - while items != []: - endpoint = baseURL + '/rest/filtered-items?query_field[]=' - endpoint += replacedKey - endpoint += '&query_op[]=equals&query_val[]=' - endpoint += replacedValue + '&limit=200&offset=' - endpoint += str(offset) - response = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() - items = response['items'] - for item in items: - itemLink = item['link'] - itemLinks.append(itemLink) - offset = offset + 200 - print(offset) - for itemLink in itemLinks: - itemMetadataProcessed = [] - print(itemLink) - metadata = requests.get(baseURL + itemLink + '/metadata', - headers=header, cookies=cookies, - verify=verify).json() - for l in range(0, len(metadata)): - metadata[l].pop('schema', None) - metadata[l].pop('element', None) - metadata[l].pop('qualifier', None) - languageValue = metadata[l]['language'] - key = metadata[l]['key'] - value = metadata[l]['value'] - if key == replacedKey and value == replacedValue: - print('match') - replacedElement = metadata[l] - for replacementValue in replacementValueList: - updatedMetadataElement = {} - updatedMetadataElement['key'] = replacedKey - updatedMetadataElement['value'] = replacementValue - updatedMetadataElement['language'] = languageValue - itemMetadataProcessed.append(updatedMetadataElement) - currTime = datetime.now().strftime('%Y-%m-%d %H:%M:%S') - provNote = '\'' + replacedKey + ': ' + replacedValue - provNote += '\' split into \'' + replacedKey - provNote += ': ' + replacementValue - provNote += '\' through a batch process on ' - provNote += currTime + '.' - provNoteElement = {} - provNoteElement['key'] = 'dc.description.provenance' - provNoteElement['value'] = provNote - provNoteElement['language'] = 'en_US' - itemMetadataProcessed.append(provNoteElement) - elementsEdited = elementsEdited + 1 - else: - if metadata[l] not in itemMetadataProcessed: - itemMetadataProcessed.append(metadata[l]) - recordsEdited = recordsEdited + 1 - itemMetadataProcessed = json.dumps(itemMetadataProcessed) - print('updated', itemLink, recordsEdited, elementsEdited) - delete = requests.delete(baseURL + itemLink + '/metadata', - headers=header, cookies=cookies, - verify=verify) - print(delete) - post = requests.put(baseURL + itemLink + '/metadata', - headers=header, cookies=cookies, verify=verify, - data=itemMetadataProcessed) - print(post) - f.writerow([itemLink] + [replacedKey] + [replacementValueList] - + [delete] + [post]) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/tests.py b/tests.py deleted file mode 100644 index b576d59..0000000 --- a/tests.py +++ /dev/null @@ -1,48 +0,0 @@ -import dsFunc -import time -import unittest - - -class dsFuncTests(unittest.TestCase): - """Test dsFunc.py functions.""" - - def testElapsedTime(self): - """Test elapsed time function.""" - startTime = time.time() - sleepTime = 5 - time.sleep(sleepTime) - td = dsFunc.elapsedTime(startTime, 'Elapsed run time') - self.assertTrue(sleepTime <= int(td.seconds) <= sleepTime + 1) - - def testInstaSelect(self): - """Test instance select function.""" - instArray = ['secretsProd', '', 'secrets', '#$%#%##@'] - for inst in instArray: - if inst == 'secretsProd': - secrets = dsFunc.instSelect(inst) - self.assertTrue(secrets.__name__ == inst) - elif inst == 'secrets': - secrets = dsFunc.instSelect(inst) - self.assertTrue(secrets.__name__ == inst) - else: - secrets = dsFunc.instSelect(inst) - self.assertTrue(secrets.__name__ == 'secrets') - - def testAuth(self): - """Return email to confirm acceptance of credentials.""" - instArray = ['secretsProd', '', 'secrets', '#$%#%##@'] - for inst in instArray: - secrets = dsFunc.instSelect(inst) - email = secrets.email - baseURL = secrets.baseURL - password = secrets.password - verify = secrets.verify - cookies, header = dsFunc.auth(email, password, baseURL, verify) - - uName, authEmail = dsFunc.authConfirm(cookies, baseURL, header, - verify) - self.assertIn(email, authEmail) - - -if __name__ == '__main__': - unittest.main(warnings='ignore') diff --git a/updateLanguageTagsForKey.py b/updateLanguageTagsForKey.py deleted file mode 100644 index c8453c0..0000000 --- a/updateLanguageTagsForKey.py +++ /dev/null @@ -1,108 +0,0 @@ -import json -import requests -import time -import csv -from datetime import datetime -import urllib3 -import argparse -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -parser = argparse.ArgumentParser() -parser.add_argument('-k', '--key', help='the key to be updated. optional - if ' - 'not provided, the script will ask for input') -args = parser.parse_args() - -if args.key: - key = args.key -else: - key = input('Enter the key to be updated: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -f = csv.writer(open(filePath + 'languageTagUpdate' + key - + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w')) -f.writerow(['itemID'] + ['key']) -offset = 0 -recordsEdited = 0 -items = '' -itemLinks = [] -while items != []: - endpoint = baseURL + '/rest/filtered-items?query_field[]=' + key - endpoint += '&query_op[]=exists&query_val[]=&limit=200&offset=' - endpoint += str(offset) - print(endpoint) - response = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() - items = response['items'] - for item in items: - itemMetadataProcessed = [] - itemLink = item['link'] - itemLinks.append(itemLink) - offset = offset + 200 - print(offset) -for itemLink in itemLinks: - itemMetadataProcessed = [] - print(itemLink) - metadata = requests.get(baseURL + itemLink + '/metadata', headers=header, - cookies=cookies, verify=verify).json() - for l in range(0, len(metadata)): - metadata[l].pop('schema', None) - metadata[l].pop('element', None) - metadata[l].pop('qualifier', None) - if metadata[l]['key'] == key and metadata[l]['language'] is None: - updatedMetadataElement = {} - updatedMetadataElement['key'] = metadata[l]['key'] - updatedMetadataElement['value'] = metadata[l]['value'] - updatedMetadataElement['language'] = 'en_US' - itemMetadataProcessed.append(updatedMetadataElement) - provNote = 'The language tag for \'' + metadata[l]['key'] + ': ' - provNote += metadata[l]['value'] - provNote += '\' was changed from \'null\' to \'en_US\' ' - provNote += 'through a batch process on ' - provNote += datetime.now().strftime('%Y-%m-%d %H:%M:%S') - provNote += '.' - provNoteElement = {} - provNoteElement['key'] = 'dc.description.provenance' - provNoteElement['value'] = provNote - provNoteElement['language'] = 'en_US' - itemMetadataProcessed.append(provNoteElement) - else: - itemMetadataProcessed.append(metadata[l]) - itemMetadataProcessed = json.dumps(itemMetadataProcessed) - delete = requests.delete(baseURL + itemLink + '/metadata', headers=header, - cookies=cookies, verify=verify) - print(delete) - post = requests.put(baseURL + itemLink + '/metadata', headers=header, - cookies=cookies, verify=verify, - data=itemMetadataProcessed) - print(post) - f.writerow([itemLink] + [key]) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') diff --git a/updateLanguageTagsForKeyInCollection.py b/updateLanguageTagsForKeyInCollection.py deleted file mode 100644 index afebe8f..0000000 --- a/updateLanguageTagsForKeyInCollection.py +++ /dev/null @@ -1,110 +0,0 @@ -import json -import requests -import time -import csv -from datetime import datetime -import urllib3 -import dsFunc - -inst = input('To edit production server, enter the name of the secrets file: ') -secrets = dsFunc.instSelect(inst) - -baseURL = secrets.baseURL -email = secrets.email -password = secrets.password -filePath = secrets.filePath -verify = secrets.verify -skipColl = secrets.skipColl - -key = input('Enter key: ') -collectionHandle = input('Enter collection handle: ') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -startTime = time.time() -data = {'email': email, 'password': password} -header = {'content-type': 'application/json', 'accept': 'application/json'} -session = requests.post(baseURL + '/rest/login', headers=header, verify=verify, - params=data).cookies['JSESSIONID'] -cookies = {'JSESSIONID': session} - - -status = requests.get(baseURL + '/rest/status', headers=header, - cookies=cookies, verify=verify).json() -userFullName = status['fullname'] -print('authenticated', userFullName) - -itemList = [] -endpoint = baseURL + '/rest/handle/' + collectionHandle -collection = requests.get(endpoint, headers=header, cookies=cookies, - verify=verify).json() -collectionID = collection['uuid'] -offset = 0 -items = '' -while items != []: - items = requests.get(baseURL + '/rest/collections/' + str(collectionID) - + '/items?limit=200&offset=' + str(offset), - headers=header, cookies=cookies, verify=verify) - while items.status_code != 200: - time.sleep(5) - items = requests.get(baseURL + '/rest/collections/' + str(collectionID) - + '/items?limit=200&offset=' + str(offset), - headers=header, cookies=cookies, verify=verify) - items = items.json() - for k in range(0, len(items)): - itemID = items[k]['uuid'] - itemList.append(itemID) - offset = offset + 200 - -dsFunc.elapsedTime(startTime, 'Item list creation time') - -f = csv.writer(open(filePath + 'languageTagUpdate' + key - + datetime.now().strftime('%Y-%m-%d %H.%M.%S') + '.csv', 'w')) -f.writerow(['itemID'] + ['key']) -for number, itemID in enumerate(itemList): - itemMetadataProcessed = [] - itemsRemaining = len(itemList) - number - print('Items remaining: ', itemsRemaining, 'ItemID: ', itemID) - metadata = requests.get(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, cookies=cookies, - verify=verify).json() - for l in range(0, len(metadata)): - if metadata[l]['key'] == key and metadata[l]['language'] == '': - updatedMetadataElement = {} - updatedMetadataElement['key'] = metadata[l]['key'] - updatedMetadataElement['value'] = metadata[l]['value'] - updatedMetadataElement['language'] = 'en_US' - itemMetadataProcessed.append(updatedMetadataElement) - provNote = 'The language tag for \'' + metadata[l]['key'] + ': ' - provNote += metadata[l]['value'] - provNote += '\' was changed from \'null\' to \'en_US\' ' - provNote += 'through a batch process on ' - provNote += datetime.now().strftime('%Y-%m-%d %H:%M:%S') - provNote += '.' - provNoteElement = {} - provNoteElement['key'] = 'dc.description.provenance' - provNoteElement['value'] = provNote - provNoteElement['language'] = 'en_US' - itemMetadataProcessed.append(provNoteElement) - else: - itemMetadataProcessed.append(metadata[l]) - if 'The language tag for \'' + key in json.dumps(itemMetadataProcessed): - itemMetadataProcessed = json.dumps(itemMetadataProcessed) - print('updated', itemID) - delete = requests.delete(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, - cookies=cookies, verify=verify) - print(delete) - post = requests.put(baseURL + '/rest/items/' + str(itemID) - + '/metadata', headers=header, cookies=cookies, - verify=verify, data=itemMetadataProcessed) - print(post) - f.writerow([itemID] + [key]) - else: - print('not updated', itemID) - -logout = requests.post(baseURL + '/rest/logout', headers=header, - cookies=cookies, verify=verify) - -# print script run time -dsFunc.elapsedTime(startTime, 'Script run time') From d59078c1849dbd24684e2409f122a61fa2c0d87b Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Wed, 13 Nov 2019 19:19:59 -0500 Subject: [PATCH 02/22] init commit --- .gitignore | 138 ++++++++++++++++++++++++++++++--- LICENSE | 13 ++++ MANIFEST.in | 1 + Pipfile | 14 ++++ Pipfile.lock | 159 +++++++++++++++++++++++++++++++++++++++ README.md | 1 + dsaps/__init__.py | 0 dsaps/cli.py | 46 +++++++++++ dsaps/models.py | 120 +++++++++++++++++++++++++++++ pull-request-template.md | 5 ++ tests/__init__.py | 0 tests/test_models.py | 32 ++++++++ 12 files changed, 517 insertions(+), 12 deletions(-) create mode 100644 LICENSE create mode 100644 MANIFEST.in create mode 100644 Pipfile create mode 100644 Pipfile.lock create mode 100644 README.md create mode 100644 dsaps/__init__.py create mode 100644 dsaps/cli.py create mode 100644 dsaps/models.py create mode 100644 pull-request-template.md create mode 100644 tests/__init__.py create mode 100644 tests/test_models.py diff --git a/.gitignore b/.gitignore index cd37cec..28f8a70 100644 --- a/.gitignore +++ b/.gitignore @@ -39,6 +39,132 @@ $RECYCLE.BIN/ .Trashes .VolumeIcon.icns +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + # Directories potentially created on remote AFP share .AppleDB .AppleDesktop @@ -55,15 +181,3 @@ local/* *.json createItemMetadataFromCSV_* *.txt - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Rope project settings -.ropeproject diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..ce30b6c --- /dev/null +++ b/LICENSE @@ -0,0 +1,13 @@ +Copyright 2019 MIT Libraries + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..1aba38f --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include LICENSE diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..ea46530 --- /dev/null +++ b/Pipfile @@ -0,0 +1,14 @@ +[[source]] +name = "pypi" +url = "https://pypi.org/simple" +verify_ssl = true + +[dev-packages] +pytest = "*" + +[packages] +requests = "*" +structlog = "*" + +[requires] +python_version = "3.7" diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 0000000..d8ff959 --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,159 @@ +{ + "_meta": { + "hash": { + "sha256": "8f1c2617e2d35a7fba5feecc973dfb845aafbc0e16cbedddc3a50d7c23f768d9" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.7" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "certifi": { + "hashes": [ + "sha256:e4f3620cfea4f83eedc95b24abd9cd56f3c4b146dd0177e83a21b4eb49e21e50", + "sha256:fd7c7c74727ddcf00e9acd26bba8da604ffec95bf1c2144e67aff7a8b50e6cef" + ], + "version": "==2019.9.11" + }, + "chardet": { + "hashes": [ + "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", + "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691" + ], + "version": "==3.0.4" + }, + "idna": { + "hashes": [ + "sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407", + "sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c" + ], + "version": "==2.8" + }, + "requests": { + "hashes": [ + "sha256:11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4", + "sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31" + ], + "index": "pypi", + "version": "==2.22.0" + }, + "six": { + "hashes": [ + "sha256:1f1b7d42e254082a9db6279deae68afb421ceba6158efa6131de7b3003ee93fd", + "sha256:30f610279e8b2578cab6db20741130331735c781b56053c59c4076da27f06b66" + ], + "version": "==1.13.0" + }, + "structlog": { + "hashes": [ + "sha256:4287058cf4ce1a59bc5dea290d6386d37f29a37529c9a51cdf7387e51710152b", + "sha256:6640e6690fc31d5949bc614c1a630464d3aaa625284aeb7c6e486c3010d73e12" + ], + "index": "pypi", + "version": "==19.2.0" + }, + "urllib3": { + "hashes": [ + "sha256:a8a318824cc77d1fd4b2bec2ded92646630d7fe8619497b142c84a9e6f5a7293", + "sha256:f3c5fd51747d450d4dcf6f923c81f78f811aab8205fda64b0aba34a4e48b0745" + ], + "version": "==1.25.7" + } + }, + "develop": { + "atomicwrites": { + "hashes": [ + "sha256:03472c30eb2c5d1ba9227e4c2ca66ab8287fbfbbda3888aa93dc2e28fc6811b4", + "sha256:75a9445bac02d8d058d5e1fe689654ba5a6556a1dfd8ce6ec55a0ed79866cfa6" + ], + "version": "==1.3.0" + }, + "attrs": { + "hashes": [ + "sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c", + "sha256:f7b7ce16570fe9965acd6d30101a28f62fb4a7f9e926b3bbc9b61f8b04247e72" + ], + "version": "==19.3.0" + }, + "importlib-metadata": { + "hashes": [ + "sha256:aa18d7378b00b40847790e7c27e11673d7fed219354109d0e7b9e5b25dc3ad26", + "sha256:d5f18a79777f3aa179c145737780282e27b508fc8fd688cb17c7a813e8bd39af" + ], + "markers": "python_version < '3.8'", + "version": "==0.23" + }, + "more-itertools": { + "hashes": [ + "sha256:409cd48d4db7052af495b09dec721011634af3753ae1ef92d2b32f73a745f832", + "sha256:92b8c4b06dac4f0611c0729b2f2ede52b2e1bac1ab48f089c7ddc12e26bb60c4" + ], + "version": "==7.2.0" + }, + "packaging": { + "hashes": [ + "sha256:28b924174df7a2fa32c1953825ff29c61e2f5e082343165438812f00d3a7fc47", + "sha256:d9551545c6d761f3def1677baf08ab2a3ca17c56879e70fecba2fc4dde4ed108" + ], + "version": "==19.2" + }, + "pluggy": { + "hashes": [ + "sha256:0db4b7601aae1d35b4a033282da476845aa19185c1e6964b25cf324b5e4ec3e6", + "sha256:fa5fa1622fa6dd5c030e9cad086fa19ef6a0cf6d7a2d12318e10cb49d6d68f34" + ], + "version": "==0.13.0" + }, + "py": { + "hashes": [ + "sha256:64f65755aee5b381cea27766a3a147c3f15b9b6b9ac88676de66ba2ae36793fa", + "sha256:dc639b046a6e2cff5bbe40194ad65936d6ba360b52b3c3fe1d08a82dd50b5e53" + ], + "version": "==1.8.0" + }, + "pyparsing": { + "hashes": [ + "sha256:20f995ecd72f2a1f4bf6b072b63b22e2eb457836601e76d6e5dfcd75436acc1f", + "sha256:4ca62001be367f01bd3e92ecbb79070272a9d4964dce6a48a82ff0b8bc7e683a" + ], + "version": "==2.4.5" + }, + "pytest": { + "hashes": [ + "sha256:27abc3fef618a01bebb1f0d6d303d2816a99aa87a5968ebc32fe971be91eb1e6", + "sha256:58cee9e09242937e136dbb3dab466116ba20d6b7828c7620f23947f37eb4dae4" + ], + "index": "pypi", + "version": "==5.2.2" + }, + "six": { + "hashes": [ + "sha256:1f1b7d42e254082a9db6279deae68afb421ceba6158efa6131de7b3003ee93fd", + "sha256:30f610279e8b2578cab6db20741130331735c781b56053c59c4076da27f06b66" + ], + "version": "==1.13.0" + }, + "wcwidth": { + "hashes": [ + "sha256:3df37372226d6e63e1b1e1eda15c594bca98a22d33a23832a90998faa96bc65e", + "sha256:f4ebe71925af7b40a864553f761ed559b43544f8f71746c2d756c7fe788ade7c" + ], + "version": "==0.1.7" + }, + "zipp": { + "hashes": [ + "sha256:3718b1cbcd963c7d4c5511a8240812904164b7f381b647143a89d3b98f9bcd8e", + "sha256:f06903e9f1f43b12d371004b4ac7b06ab39a44adc747266928ae6debfa7b3335" + ], + "version": "==0.6.0" + } + } +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..fecda7b --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# dsaps diff --git a/dsaps/__init__.py b/dsaps/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dsaps/cli.py b/dsaps/cli.py new file mode 100644 index 0000000..e831623 --- /dev/null +++ b/dsaps/cli.py @@ -0,0 +1,46 @@ +import time + +import click + +from dsaps import models + + +@click.group() +@click.option('--url', envvar='DSPACE_URL') +@click.option('-e', '--email', prompt='Enter email', + help='The email of the user for authentication.') +@click.option('-p', '--password', prompt='Enter password', + envvar='TEST_PASS', hide_input=True, + help='The password for authentication.') +@click.pass_context +def main(ctx, url, email, password): + ctx.obj = {} + print('Application start') + client = models.Client(url, email, password) + start_time = time.time() + ctx.obj['client'] = client + ctx.obj['start_time'] = start_time + + +@main.command() +@click.option('-f', '--field', prompt='Enter the field to be searched', + help='The field to search.') +@click.option('-s', '--string', prompt='Enter the string', + help='The field to search.') +@click.option('-t', '--search_type', prompt='Enter the type of search', + help='The type of search.', + type=click.Choice(['exists', 'doesnt_exist', 'equals', + 'not_equals', 'contains', 'doesnt_contain']), + default='contains') +@click.pass_context +def search(ctx, field, string, search_type): + # Temp function for testing + client = ctx.obj['client'] + start_time = ctx.obj['start_time'] + item_links = client.filtered_item_search(field, string, search_type) + print(item_links) + models.elapsed_time(start_time, 'Elapsed time') + + +if __name__ == '__main__': + main() diff --git a/dsaps/models.py b/dsaps/models.py new file mode 100644 index 0000000..883af0b --- /dev/null +++ b/dsaps/models.py @@ -0,0 +1,120 @@ +import datetime +from functools import partial +import operator +import requests +import time + +import attr + +op = operator.attrgetter('name') +Field = partial(attr.ib, default=None) + + +class Client: + def __init__(self, url, email, password): + self.url = url + print('Initializing client') + data = {'email': email, 'password': password} + header = {'content-type': 'application/json', 'accept': + 'application/json'} + session = requests.post(self.url + '/rest/login', headers=header, + params=data).cookies['JSESSIONID'] + cookies = {'JSESSIONID': session} + status = requests.get(self.url + '/rest/status', headers=header, + cookies=cookies).json() + self.user_full_name = status['fullname'] + self.cookies = cookies + self.header = header + print(f'Authenticated to {self.url} as 'f'{self.user_full_name}') + + def get_record(self, uuid, rec_type): + """Retrieve an individual record of a particular type.""" + url = f'{self.url}/rest/{rec_type}/{uuid}?expand=all' + record = requests.get(url, headers=self.header, + cookies=self.cookies).json() + if rec_type == 'items': + rec_obj = self._pop_inst(Item, record) + elif rec_type == 'communities': + rec_obj = self._pop_inst(Community, record) + elif rec_type == 'collections': + rec_obj = self._pop_inst(Collection, record) + else: + print('Invalid record type.') + exit() + return rec_obj + + def filtered_item_search(self, key, string, query_type, + selected_collections=''): + offset = 0 + items = '' + item_links = [] + while items != []: + endpoint = f'{self.url}/rest/filtered-items?query_field[]=' + endpoint += f'{key}&query_op[]={query_type}&query_val[]={string}' + endpoint += f'{selected_collections}&limit=200&offset={offset}' + print(endpoint) + response = requests.get(endpoint, headers=self.header, + cookies=self.cookies).json() + items = response['items'] + for item in items: + item_links.append(item['link']) + offset = offset + 200 + return item_links + + def _pop_inst(self, class_type, rec_obj): + """Populate class instance with data from record.""" + fields = [op(field) for field in attr.fields(class_type)] + kwargs = {k: v for k, v in rec_obj.items() if k in fields} + kwargs['objtype'] = rec_obj['type'] + if class_type == Community: + collections = self._build_uuid_list(rec_obj, kwargs, 'collections') + rec_obj['collections'] = collections + elif class_type == Collection: + items = self._build_uuid_list(rec_obj, kwargs, 'items') + rec_obj['items'] = items + rec_obj = class_type(**kwargs) + return rec_obj + + def _build_uuid_list(self, rec_obj, kwargs, children): + child_list = [] + for child in rec_obj[children]: + child_list.append(child['uuid']) + return child_list + + +@attr.s +class BaseRecord: + uuid = Field() + name = Field() + handle = Field() + link = Field() + objtype = Field() + + +@attr.s +class Item(BaseRecord): + metadata = Field() + bitstreams = Field() + + +@attr.s +class Community(BaseRecord): + collections = Field() + + +@attr.s +class Collection(BaseRecord): + items = Field() + + +@attr.s +class MetadataEntry(BaseRecord): + key = Field() + value = Field() + language = Field() + + +def elapsed_time(start_time, label): + """Calculate elapsed time.""" + td = datetime.timedelta(seconds=time.time() - start_time) + print(f'{label} : {td}') diff --git a/pull-request-template.md b/pull-request-template.md new file mode 100644 index 0000000..4106c26 --- /dev/null +++ b/pull-request-template.md @@ -0,0 +1,5 @@ +#### What does this PR do? + + +#### Includes new or updated dependencies? +YES|NO diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_models.py b/tests/test_models.py new file mode 100644 index 0000000..d66fd1d --- /dev/null +++ b/tests/test_models.py @@ -0,0 +1,32 @@ +import pytest +import requests_mock + +from dsaps import models + + +@pytest.fixture +def client(): + pass + + +# def test_get_record(client): +# """Test get_record function.""" +# rec_obj = client.get_record(uuid, rec_type) +# assert False + + +# def test_filtered_item_search(client): +# """Test filtered_item_search function.""" +# item_links = client.filtered_item_search(key, string, query_type, +# selected_collections='') +# assert False +# +# +# def test__pop_inst(client): +# rec_obj = client._pop_inst(class_type, rec_obj) +# assert False +# +# +# def test__build_uuid_list(client): +# child_list = client._build_uuid_list(self, rec_obj, kwargs, children) +# assert False From 6821987e801ef77daaad1ab2a7687f6b718e57a1 Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Thu, 14 Nov 2019 10:35:24 -0500 Subject: [PATCH 03/22] Update test_models.py --- .travis.yml | 7 +++++++ Pipfile | 2 ++ Pipfile.lock | 18 +++++++++++++++++- tests/test_models.py | 23 ++++++++++++++++++----- 4 files changed, 44 insertions(+), 6 deletions(-) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..3a2a5d7 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,7 @@ +language: python +python: + - "3.7" +install: + - pipenv install --dev +script: + - pipenv run pytest diff --git a/Pipfile b/Pipfile index ea46530..57a2a7c 100644 --- a/Pipfile +++ b/Pipfile @@ -5,10 +5,12 @@ verify_ssl = true [dev-packages] pytest = "*" +requests-mock = "*" [packages] requests = "*" structlog = "*" +attr = "*" [requires] python_version = "3.7" diff --git a/Pipfile.lock b/Pipfile.lock index d8ff959..64fb70d 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "8f1c2617e2d35a7fba5feecc973dfb845aafbc0e16cbedddc3a50d7c23f768d9" + "sha256": "f0f4eb8bc3615617b266bbd7623ed39a1954441f310c8d9f28b4e3c25a8509fc" }, "pipfile-spec": 6, "requires": { @@ -16,6 +16,14 @@ ] }, "default": { + "attr": { + "hashes": [ + "sha256:0b1aaddb85bd9e9c4bd75092f4440d6616ff40b0df0437f00771871670f7c9fd", + "sha256:9091548058d17f132596e61fa7518e504f76b9a4c61ca7d86e1f96dbf7d4775d" + ], + "index": "pypi", + "version": "==0.3.1" + }, "certifi": { "hashes": [ "sha256:e4f3620cfea4f83eedc95b24abd9cd56f3c4b146dd0177e83a21b4eb49e21e50", @@ -45,6 +53,14 @@ "index": "pypi", "version": "==2.22.0" }, + "requests-mock": { + "hashes": [ + "sha256:510df890afe08d36eca5bb16b4aa6308a6f85e3159ad3013bac8b9de7bd5a010", + "sha256:88d3402dd8b3c69a9e4f9d3a73ad11b15920c6efd36bc27bf1f701cf4a8e4646" + ], + "index": "pypi", + "version": "==1.7.0" + }, "six": { "hashes": [ "sha256:1f1b7d42e254082a9db6279deae68afb421ceba6158efa6131de7b3003ee93fd", diff --git a/tests/test_models.py b/tests/test_models.py index d66fd1d..7b6cbaf 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,3 +1,4 @@ +import attr import pytest import requests_mock @@ -6,13 +7,25 @@ @pytest.fixture def client(): - pass + with requests_mock.Mocker() as m: + uri1 = 'mock://example.com/rest/login' + uri2 = 'mock://example.com/rest/status' + cookies = {'JSESSIONID': '11111111'} + json_object = {'fullname': 'User Name'} + m.post(uri1, cookies=cookies) + m.get(uri2, json=json_object) + client = models.Client('mock://example.com', 'test', 'test') + return client -# def test_get_record(client): -# """Test get_record function.""" -# rec_obj = client.get_record(uuid, rec_type) -# assert False +def test_get_record(client): + """Test get_record function.""" + with requests_mock.Mocker() as m: + uri = '/rest/items/123?expand=all' + json_object = {'metadata': {'title': 'Sample title'}, 'type': 'item'} + m.get(uri, json=json_object) + rec_obj = client.get_record('123', 'items') + assert attr.asdict(rec_obj)['metadata'] == json_object['metadata'] # def test_filtered_item_search(client): From b43188fedaeba5119d9bd38ac2cfa337214fe2c6 Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Fri, 15 Nov 2019 13:23:42 -0500 Subject: [PATCH 04/22] add structlog --- Pipfile | 4 +++ Pipfile.lock | 81 +++++++++++++++++++++++++++++++++++-------------- dsaps/cli.py | 24 +++++++++++++-- dsaps/models.py | 13 +++++--- 4 files changed, 93 insertions(+), 29 deletions(-) diff --git a/Pipfile b/Pipfile index 57a2a7c..d94a2cb 100644 --- a/Pipfile +++ b/Pipfile @@ -11,6 +11,10 @@ requests-mock = "*" requests = "*" structlog = "*" attr = "*" +click = "*" [requires] python_version = "3.7" + +[scripts] +dsaps = "python -c \"from dsaps.cli import main; main()\"" diff --git a/Pipfile.lock b/Pipfile.lock index 64fb70d..ed5ef2a 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "f0f4eb8bc3615617b266bbd7623ed39a1954441f310c8d9f28b4e3c25a8509fc" + "sha256": "b3b90945556e626398a7ce5d09116ed13635364ca3ddc5e01a9e5a6aced5f337" }, "pipfile-spec": 6, "requires": { @@ -38,6 +38,14 @@ ], "version": "==3.0.4" }, + "click": { + "hashes": [ + "sha256:2335065e6395b9e67ca716de5f7526736bfa6ceead690adf616d925bdc622b13", + "sha256:5b94b49521f6456670fdb30cd82a4eca9412788a93fa6dd6df72c94d5a8ff2d7" + ], + "index": "pypi", + "version": "==7.0" + }, "idna": { "hashes": [ "sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407", @@ -53,14 +61,6 @@ "index": "pypi", "version": "==2.22.0" }, - "requests-mock": { - "hashes": [ - "sha256:510df890afe08d36eca5bb16b4aa6308a6f85e3159ad3013bac8b9de7bd5a010", - "sha256:88d3402dd8b3c69a9e4f9d3a73ad11b15920c6efd36bc27bf1f701cf4a8e4646" - ], - "index": "pypi", - "version": "==1.7.0" - }, "six": { "hashes": [ "sha256:1f1b7d42e254082a9db6279deae68afb421ceba6158efa6131de7b3003ee93fd", @@ -85,13 +85,6 @@ } }, "develop": { - "atomicwrites": { - "hashes": [ - "sha256:03472c30eb2c5d1ba9227e4c2ca66ab8287fbfbbda3888aa93dc2e28fc6811b4", - "sha256:75a9445bac02d8d058d5e1fe689654ba5a6556a1dfd8ce6ec55a0ed79866cfa6" - ], - "version": "==1.3.0" - }, "attrs": { "hashes": [ "sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c", @@ -99,6 +92,27 @@ ], "version": "==19.3.0" }, + "certifi": { + "hashes": [ + "sha256:e4f3620cfea4f83eedc95b24abd9cd56f3c4b146dd0177e83a21b4eb49e21e50", + "sha256:fd7c7c74727ddcf00e9acd26bba8da604ffec95bf1c2144e67aff7a8b50e6cef" + ], + "version": "==2019.9.11" + }, + "chardet": { + "hashes": [ + "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", + "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691" + ], + "version": "==3.0.4" + }, + "idna": { + "hashes": [ + "sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407", + "sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c" + ], + "version": "==2.8" + }, "importlib-metadata": { "hashes": [ "sha256:aa18d7378b00b40847790e7c27e11673d7fed219354109d0e7b9e5b25dc3ad26", @@ -123,10 +137,10 @@ }, "pluggy": { "hashes": [ - "sha256:0db4b7601aae1d35b4a033282da476845aa19185c1e6964b25cf324b5e4ec3e6", - "sha256:fa5fa1622fa6dd5c030e9cad086fa19ef6a0cf6d7a2d12318e10cb49d6d68f34" + "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0", + "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d" ], - "version": "==0.13.0" + "version": "==0.13.1" }, "py": { "hashes": [ @@ -144,11 +158,27 @@ }, "pytest": { "hashes": [ - "sha256:27abc3fef618a01bebb1f0d6d303d2816a99aa87a5968ebc32fe971be91eb1e6", - "sha256:58cee9e09242937e136dbb3dab466116ba20d6b7828c7620f23947f37eb4dae4" + "sha256:63344a2e3bce2e4d522fd62b4fdebb647c019f1f9e4ca075debbd13219db4418", + "sha256:f67403f33b2b1d25a6756184077394167fe5e2f9d8bdaab30707d19ccec35427" ], "index": "pypi", - "version": "==5.2.2" + "version": "==5.3.1" + }, + "requests": { + "hashes": [ + "sha256:11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4", + "sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31" + ], + "index": "pypi", + "version": "==2.22.0" + }, + "requests-mock": { + "hashes": [ + "sha256:510df890afe08d36eca5bb16b4aa6308a6f85e3159ad3013bac8b9de7bd5a010", + "sha256:88d3402dd8b3c69a9e4f9d3a73ad11b15920c6efd36bc27bf1f701cf4a8e4646" + ], + "index": "pypi", + "version": "==1.7.0" }, "six": { "hashes": [ @@ -157,6 +187,13 @@ ], "version": "==1.13.0" }, + "urllib3": { + "hashes": [ + "sha256:a8a318824cc77d1fd4b2bec2ded92646630d7fe8619497b142c84a9e6f5a7293", + "sha256:f3c5fd51747d450d4dcf6f923c81f78f811aab8205fda64b0aba34a4e48b0745" + ], + "version": "==1.25.7" + }, "wcwidth": { "hashes": [ "sha256:3df37372226d6e63e1b1e1eda15c594bca98a22d33a23832a90998faa96bc65e", diff --git a/dsaps/cli.py b/dsaps/cli.py index e831623..af68ffc 100644 --- a/dsaps/cli.py +++ b/dsaps/cli.py @@ -1,9 +1,14 @@ +import datetime +import logging import time import click +import structlog from dsaps import models +logger = structlog.get_logger() + @click.group() @click.option('--url', envvar='DSPACE_URL') @@ -15,7 +20,22 @@ @click.pass_context def main(ctx, url, email, password): ctx.obj = {} - print('Application start') + dt = datetime.datetime.utcnow().isoformat(timespec='seconds') + log_suffix = f'{dt}.log' + structlog.configure(processors=[ + structlog.stdlib.filter_by_level, + structlog.stdlib.add_log_level, + structlog.stdlib.PositionalArgumentsFormatter(), + structlog.processors.TimeStamper(fmt="iso"), + structlog.processors.JSONRenderer() + ], + context_class=dict, + logger_factory=structlog.stdlib.LoggerFactory()) + logging.basicConfig(format="%(message)s", + handlers=[logging.FileHandler(f'logs/log-{log_suffix}', + 'w')], + level=logging.INFO) + logger.info('Application start') client = models.Client(url, email, password) start_time = time.time() ctx.obj['client'] = client @@ -38,7 +58,7 @@ def search(ctx, field, string, search_type): client = ctx.obj['client'] start_time = ctx.obj['start_time'] item_links = client.filtered_item_search(field, string, search_type) - print(item_links) + logger.info(item_links) models.elapsed_time(start_time, 'Elapsed time') diff --git a/dsaps/models.py b/dsaps/models.py index 883af0b..66a906b 100644 --- a/dsaps/models.py +++ b/dsaps/models.py @@ -5,15 +5,18 @@ import time import attr +import structlog op = operator.attrgetter('name') Field = partial(attr.ib, default=None) +logger = structlog.get_logger() + class Client: def __init__(self, url, email, password): self.url = url - print('Initializing client') + logger.info('Initializing client') data = {'email': email, 'password': password} header = {'content-type': 'application/json', 'accept': 'application/json'} @@ -25,7 +28,7 @@ def __init__(self, url, email, password): self.user_full_name = status['fullname'] self.cookies = cookies self.header = header - print(f'Authenticated to {self.url} as 'f'{self.user_full_name}') + logger.info(f'Authenticated to {self.url} as 'f'{self.user_full_name}') def get_record(self, uuid, rec_type): """Retrieve an individual record of a particular type.""" @@ -39,7 +42,7 @@ def get_record(self, uuid, rec_type): elif rec_type == 'collections': rec_obj = self._pop_inst(Collection, record) else: - print('Invalid record type.') + logger.info('Invalid record type.') exit() return rec_obj @@ -52,7 +55,7 @@ def filtered_item_search(self, key, string, query_type, endpoint = f'{self.url}/rest/filtered-items?query_field[]=' endpoint += f'{key}&query_op[]={query_type}&query_val[]={string}' endpoint += f'{selected_collections}&limit=200&offset={offset}' - print(endpoint) + logger.info(endpoint) response = requests.get(endpoint, headers=self.header, cookies=self.cookies).json() items = response['items'] @@ -117,4 +120,4 @@ class MetadataEntry(BaseRecord): def elapsed_time(start_time, label): """Calculate elapsed time.""" td = datetime.timedelta(seconds=time.time() - start_time) - print(f'{label} : {td}') + logger.info(f'{label} : {td}') From db42f61cfb29c762a17b5e41069bd9dac194c2fb Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Tue, 26 Nov 2019 13:33:57 -0500 Subject: [PATCH 05/22] add tests --- dsaps/cli.py | 3 +- dsaps/models.py | 22 +++++++++----- tests/test_models.py | 71 +++++++++++++++++++++++++++++++------------- 3 files changed, 66 insertions(+), 30 deletions(-) diff --git a/dsaps/cli.py b/dsaps/cli.py index af68ffc..e209cc1 100644 --- a/dsaps/cli.py +++ b/dsaps/cli.py @@ -36,7 +36,8 @@ def main(ctx, url, email, password): 'w')], level=logging.INFO) logger.info('Application start') - client = models.Client(url, email, password) + client = models.Client(url) + client.authenticate(email, password) start_time = time.time() ctx.obj['client'] = client ctx.obj['start_time'] = start_time diff --git a/dsaps/models.py b/dsaps/models.py index 66a906b..c6c8538 100644 --- a/dsaps/models.py +++ b/dsaps/models.py @@ -14,9 +14,11 @@ class Client: - def __init__(self, url, email, password): + def __init__(self, url): self.url = url logger.info('Initializing client') + + def authenticate(self, email, password): data = {'email': email, 'password': password} header = {'content-type': 'application/json', 'accept': 'application/json'} @@ -52,12 +54,16 @@ def filtered_item_search(self, key, string, query_type, items = '' item_links = [] while items != []: - endpoint = f'{self.url}/rest/filtered-items?query_field[]=' - endpoint += f'{key}&query_op[]={query_type}&query_val[]={string}' - endpoint += f'{selected_collections}&limit=200&offset={offset}' - logger.info(endpoint) + endpoint = f'{self.url}/rest/filtered-items?' + params = {'query_field[]': key, 'query_op[]': query_type, + 'query_val[]': string, '&collSel[]': + selected_collections, 'limit': 200, 'offset': offset} + logger.info(params) + print(endpoint) response = requests.get(endpoint, headers=self.header, - cookies=self.cookies).json() + params=params, cookies=self.cookies) + print(f'Response url: {response.url}') + response = response.json() items = response['items'] for item in items: item_links.append(item['link']) @@ -73,12 +79,12 @@ def _pop_inst(self, class_type, rec_obj): collections = self._build_uuid_list(rec_obj, kwargs, 'collections') rec_obj['collections'] = collections elif class_type == Collection: - items = self._build_uuid_list(rec_obj, kwargs, 'items') + items = self._build_uuid_list(rec_obj, 'items') rec_obj['items'] = items rec_obj = class_type(**kwargs) return rec_obj - def _build_uuid_list(self, rec_obj, kwargs, children): + def _build_uuid_list(self, rec_obj, children): child_list = [] for child in rec_obj[children]: child_list.append(child['uuid']) diff --git a/tests/test_models.py b/tests/test_models.py index 7b6cbaf..cb68397 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -7,15 +7,30 @@ @pytest.fixture def client(): + client = models.Client('mock://example.com') + client.header = {} + client.cookies = {} + client.user_full_name = '' + return client + + +def test_authenticate(client): + """Test authenticate function.""" with requests_mock.Mocker() as m: - uri1 = 'mock://example.com/rest/login' - uri2 = 'mock://example.com/rest/status' + url1 = '/rest/login' + url2 = '/rest/status' + email = 'test@test.mock' + password = '1234' + header = {'content-type': 'application/json', 'accept': + 'application/json'} cookies = {'JSESSIONID': '11111111'} json_object = {'fullname': 'User Name'} - m.post(uri1, cookies=cookies) - m.get(uri2, json=json_object) - client = models.Client('mock://example.com', 'test', 'test') - return client + m.post(url1, cookies=cookies) + m.get(url2, json=json_object) + client.authenticate(email, password) + assert client.user_full_name == 'User Name' + assert client.cookies == cookies + assert client.header == header def test_get_record(client): @@ -28,18 +43,32 @@ def test_get_record(client): assert attr.asdict(rec_obj)['metadata'] == json_object['metadata'] -# def test_filtered_item_search(client): -# """Test filtered_item_search function.""" -# item_links = client.filtered_item_search(key, string, query_type, -# selected_collections='') -# assert False -# -# -# def test__pop_inst(client): -# rec_obj = client._pop_inst(class_type, rec_obj) -# assert False -# -# -# def test__build_uuid_list(client): -# child_list = client._build_uuid_list(self, rec_obj, kwargs, children) -# assert False +def test_filtered_item_search(client): + """Test filtered_item_search function.""" + with requests_mock.Mocker() as m: + key = 'dc.title' + string = 'test' + query_type = 'contains' + endpoint = '/rest/filtered-items?' + json_object_1 = {'items': [{'link': '1234'}]} + json_object_2 = {'items': []} + m.get(endpoint, [{'json': json_object_1}, {'json': json_object_2}]) + + item_links = client.filtered_item_search(key, string, query_type, + selected_collections='') + assert '1234' in item_links + + +def test__pop_inst(client): + class_type = models.Collection + rec_obj = {'name': 'Test title', 'type': 'collection', 'items': []} + rec_obj = client._pop_inst(class_type, rec_obj) + assert type(rec_obj) == class_type + assert rec_obj.name == 'Test title' + + +def test__build_uuid_list(client): + rec_obj = {'items': [{'uuid': '1234'}]} + children = 'items' + child_list = client._build_uuid_list(rec_obj, children) + assert '1234' in child_list From be4795ece97853764b7b8c3f359f127eeee90334 Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Mon, 16 Dec 2019 12:42:40 -0500 Subject: [PATCH 06/22] add file list func --- Pipfile | 1 + Pipfile.lock | 64 +++++++++++++++++++++++++++++++++----------- dsaps/models.py | 13 +++++++++ tests/test_models.py | 20 ++++++++++++++ 4 files changed, 82 insertions(+), 16 deletions(-) diff --git a/Pipfile b/Pipfile index d94a2cb..1da3184 100644 --- a/Pipfile +++ b/Pipfile @@ -12,6 +12,7 @@ requests = "*" structlog = "*" attr = "*" click = "*" +lxml = "*" [requires] python_version = "3.7" diff --git a/Pipfile.lock b/Pipfile.lock index ed5ef2a..2581cd3 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "b3b90945556e626398a7ce5d09116ed13635364ca3ddc5e01a9e5a6aced5f337" + "sha256": "f068635c34247a99e86662b9c499882fbb86089a20d7bd584193f44cae883b76" }, "pipfile-spec": 6, "requires": { @@ -26,10 +26,10 @@ }, "certifi": { "hashes": [ - "sha256:e4f3620cfea4f83eedc95b24abd9cd56f3c4b146dd0177e83a21b4eb49e21e50", - "sha256:fd7c7c74727ddcf00e9acd26bba8da604ffec95bf1c2144e67aff7a8b50e6cef" + "sha256:017c25db2a153ce562900032d5bc68e9f191e44e9a0f762f373977de9df1fbb3", + "sha256:25b64c7da4cd7479594d035c08c2d809eb4aab3a26e5a990ea98cc450c320f1f" ], - "version": "==2019.9.11" + "version": "==2019.11.28" }, "chardet": { "hashes": [ @@ -53,6 +53,38 @@ ], "version": "==2.8" }, + "lxml": { + "hashes": [ + "sha256:00ac0d64949fef6b3693813fe636a2d56d97a5a49b5bbb86e4cc4cc50ebc9ea2", + "sha256:0571e607558665ed42e450d7bf0e2941d542c18e117b1ebbf0ba72f287ad841c", + "sha256:0e3f04a7615fdac0be5e18b2406529521d6dbdb0167d2a690ee328bef7807487", + "sha256:13cf89be53348d1c17b453867da68704802966c433b2bb4fa1f970daadd2ef70", + "sha256:217262fcf6a4c2e1c7cb1efa08bd9ebc432502abc6c255c4abab611e8be0d14d", + "sha256:223e544828f1955daaf4cefbb4853bc416b2ec3fd56d4f4204a8b17007c21250", + "sha256:277cb61fede2f95b9c61912fefb3d43fbd5f18bf18a14fae4911b67984486f5d", + "sha256:3213f753e8ae86c396e0e066866e64c6b04618e85c723b32ecb0909885211f74", + "sha256:4690984a4dee1033da0af6df0b7a6bde83f74e1c0c870623797cec77964de34d", + "sha256:4fcc472ef87f45c429d3b923b925704aa581f875d65bac80f8ab0c3296a63f78", + "sha256:61409bd745a265a742f2693e4600e4dbd45cc1daebe1d5fad6fcb22912d44145", + "sha256:678f1963f755c5d9f5f6968dded7b245dd1ece8cf53c1aa9d80e6734a8c7f41d", + "sha256:6c6d03549d4e2734133badb9ab1c05d9f0ef4bcd31d83e5d2b4747c85cfa21da", + "sha256:6e74d5f4d6ecd6942375c52ffcd35f4318a61a02328f6f1bd79fcb4ffedf969e", + "sha256:7b4fc7b1ecc987ca7aaf3f4f0e71bbfbd81aaabf87002558f5bc95da3a865bcd", + "sha256:7ed386a40e172ddf44c061ad74881d8622f791d9af0b6f5be20023029129bc85", + "sha256:8f54f0924d12c47a382c600c880770b5ebfc96c9fd94cf6f6bdc21caf6163ea7", + "sha256:ad9b81351fdc236bda538efa6879315448411a81186c836d4b80d6ca8217cdb9", + "sha256:bbd00e21ea17f7bcc58dccd13869d68441b32899e89cf6cfa90d624a9198ce85", + "sha256:c3c289762cc09735e2a8f8a49571d0e8b4f57ea831ea11558247b5bdea0ac4db", + "sha256:cf4650942de5e5685ad308e22bcafbccfe37c54aa7c0e30cd620c2ee5c93d336", + "sha256:cfcbc33c9c59c93776aa41ab02e55c288a042211708b72fdb518221cc803abc8", + "sha256:e301055deadfedbd80cf94f2f65ff23126b232b0d1fea28f332ce58137bcdb18", + "sha256:ebbfe24df7f7b5c6c7620702496b6419f6a9aa2fd7f005eb731cc80d7b4692b9", + "sha256:eff69ddbf3ad86375c344339371168640951c302450c5d3e9936e98d6459db06", + "sha256:f6ed60a62c5f1c44e789d2cf14009423cb1646b44a43e40a9cf6a21f077678a1" + ], + "index": "pypi", + "version": "==4.4.2" + }, "requests": { "hashes": [ "sha256:11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4", @@ -94,10 +126,10 @@ }, "certifi": { "hashes": [ - "sha256:e4f3620cfea4f83eedc95b24abd9cd56f3c4b146dd0177e83a21b4eb49e21e50", - "sha256:fd7c7c74727ddcf00e9acd26bba8da604ffec95bf1c2144e67aff7a8b50e6cef" + "sha256:017c25db2a153ce562900032d5bc68e9f191e44e9a0f762f373977de9df1fbb3", + "sha256:25b64c7da4cd7479594d035c08c2d809eb4aab3a26e5a990ea98cc450c320f1f" ], - "version": "==2019.9.11" + "version": "==2019.11.28" }, "chardet": { "hashes": [ @@ -115,18 +147,18 @@ }, "importlib-metadata": { "hashes": [ - "sha256:aa18d7378b00b40847790e7c27e11673d7fed219354109d0e7b9e5b25dc3ad26", - "sha256:d5f18a79777f3aa179c145737780282e27b508fc8fd688cb17c7a813e8bd39af" + "sha256:073a852570f92da5f744a3472af1b61e28e9f78ccf0c9117658dc32b15de7b45", + "sha256:d95141fbfa7ef2ec65cfd945e2af7e5a6ddbd7c8d9a25e66ff3be8e3daf9f60f" ], "markers": "python_version < '3.8'", - "version": "==0.23" + "version": "==1.3.0" }, "more-itertools": { "hashes": [ - "sha256:409cd48d4db7052af495b09dec721011634af3753ae1ef92d2b32f73a745f832", - "sha256:92b8c4b06dac4f0611c0729b2f2ede52b2e1bac1ab48f089c7ddc12e26bb60c4" + "sha256:b84b238cce0d9adad5ed87e745778d20a3f8487d0f0cb8b8a586816c7496458d", + "sha256:c833ef592a0324bcc6a60e48440da07645063c453880c9477ceb22490aec1564" ], - "version": "==7.2.0" + "version": "==8.0.2" }, "packaging": { "hashes": [ @@ -158,11 +190,11 @@ }, "pytest": { "hashes": [ - "sha256:63344a2e3bce2e4d522fd62b4fdebb647c019f1f9e4ca075debbd13219db4418", - "sha256:f67403f33b2b1d25a6756184077394167fe5e2f9d8bdaab30707d19ccec35427" + "sha256:6b571215b5a790f9b41f19f3531c53a45cf6bb8ef2988bc1ff9afb38270b25fa", + "sha256:e41d489ff43948babd0fad7ad5e49b8735d5d55e26628a58673c39ff61d95de4" ], "index": "pypi", - "version": "==5.3.1" + "version": "==5.3.2" }, "requests": { "hashes": [ diff --git a/dsaps/models.py b/dsaps/models.py index c6c8538..52b0146 100644 --- a/dsaps/models.py +++ b/dsaps/models.py @@ -1,10 +1,12 @@ import datetime from functools import partial +import os import operator import requests import time import attr +from lxml import html import structlog op = operator.attrgetter('name') @@ -123,6 +125,17 @@ class MetadataEntry(BaseRecord): language = Field() +def build_file_list_remote(directory_url, file_extension): + """Build list of files in local directory.""" + file_list = {} + response = requests.get(directory_url) + links = html.fromstring(response.content).iterlinks() + for link in links: + if link[2].endswith(file_extension): + file_list[link[2]] = f'{directory_url}{link[2]}' + return file_list + + def elapsed_time(start_time, label): """Calculate elapsed time.""" td = datetime.timedelta(seconds=time.time() - start_time) diff --git a/tests/test_models.py b/tests/test_models.py index cb68397..c3e12ae 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,3 +1,5 @@ +import os + import attr import pytest import requests_mock @@ -60,6 +62,7 @@ def test_filtered_item_search(client): def test__pop_inst(client): + """Test _pop_inst function.""" class_type = models.Collection rec_obj = {'name': 'Test title', 'type': 'collection', 'items': []} rec_obj = client._pop_inst(class_type, rec_obj) @@ -68,7 +71,24 @@ def test__pop_inst(client): def test__build_uuid_list(client): + """Test _build_uuid_list function.""" rec_obj = {'items': [{'uuid': '1234'}]} children = 'items' child_list = client._build_uuid_list(rec_obj, children) assert '1234' in child_list + + +def test_build_file_list_remote(): + """Test build_file_list_remote function.""" + content = '' + content += 'Index of /pdf

    Index of /' + content += 'pdf

    NameLast modified' + content += 'Size
    999.pdf' + content += '2001-02-16 11:59 107K
    ' + with requests_mock.Mocker() as m: + directory_url = 'mock://test.com/pdfs/' + file_extension = 'pdf' + m.get(directory_url, text=content) + file_list = models.build_file_list_remote(directory_url, + file_extension) + assert '999.pdf' in file_list From 90c9478e2b4782a38db5ea027d534a477d1bc93d Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Mon, 16 Dec 2019 12:42:40 -0500 Subject: [PATCH 07/22] Create setup.py --- dsaps/models.py | 1 - setup.py | 23 +++++++++++++++++++++++ tests/test_models.py | 2 -- 3 files changed, 23 insertions(+), 3 deletions(-) create mode 100644 setup.py diff --git a/dsaps/models.py b/dsaps/models.py index 52b0146..873174d 100644 --- a/dsaps/models.py +++ b/dsaps/models.py @@ -1,6 +1,5 @@ import datetime from functools import partial -import os import operator import requests import time diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..15bcbdf --- /dev/null +++ b/setup.py @@ -0,0 +1,23 @@ +from setuptools import setup, find_packages + +setup( + name='DSpace API Python Scripts', + version='1.0.0', + description='', + packages=find_packages(exclude=['tests']), + author='Eric Hanson', + author_email='ehanson@mit.edu', + install_requires=[ + 'requests', + 'structlog', + 'attr', + 'click', + 'lxml', + ], + entry_points={ + 'console_scripts': [ + 'dsaps=dsaps.cli:main', + ] + }, + python_requires='>=3.7.1', +) diff --git a/tests/test_models.py b/tests/test_models.py index c3e12ae..403ce8b 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,5 +1,3 @@ -import os - import attr import pytest import requests_mock From edb6c79be4b2b3b57da7a79c672fda39699f4f1b Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Tue, 14 Jan 2020 10:26:40 -0500 Subject: [PATCH 08/22] new coll func --- dsaps/cli.py | 17 +++++++++++++++++ dsaps/models.py | 28 +++++++++++++++++++++------- tests/test_models.py | 28 ++++++++++++++++++---------- 3 files changed, 56 insertions(+), 17 deletions(-) diff --git a/dsaps/cli.py b/dsaps/cli.py index e209cc1..2f4a9f5 100644 --- a/dsaps/cli.py +++ b/dsaps/cli.py @@ -63,5 +63,22 @@ def search(ctx, field, string, search_type): models.elapsed_time(start_time, 'Elapsed time') +@main.command() +@click.option('-c', '--comm_handle', prompt='Enter the community handle', + help='The handle of the community in which to create the ,' + 'collection.') +@click.option('-n', '--coll_name', prompt='Enter the name of the collection', + help='The name of the collection to be created.') +@click.pass_context +def newcoll(ctx, comm_handle, coll_name): + client = ctx.obj['client'] + coll_id = client.post_coll_to_comm(comm_handle, coll_name) + logger.info(coll_id) + # STEPS TO ADD + # post items to collections + # post bistreams to item_links + # post prov notes + + if __name__ == '__main__': main() diff --git a/dsaps/models.py b/dsaps/models.py index 873174d..d399d56 100644 --- a/dsaps/models.py +++ b/dsaps/models.py @@ -16,17 +16,20 @@ class Client: def __init__(self, url): - self.url = url + header = {'content-type': 'application/json', 'accept': + 'application/json'} + self.url = url.rstrip('/') + self.cookies = None + self.header = header logger.info('Initializing client') def authenticate(self, email, password): + header = self.header data = {'email': email, 'password': password} - header = {'content-type': 'application/json', 'accept': - 'application/json'} - session = requests.post(self.url + '/rest/login', headers=header, + session = requests.post(f'{self.url}/login', headers=header, params=data).cookies['JSESSIONID'] cookies = {'JSESSIONID': session} - status = requests.get(self.url + '/rest/status', headers=header, + status = requests.get(f'{self.url}/status', headers=header, cookies=cookies).json() self.user_full_name = status['fullname'] self.cookies = cookies @@ -35,7 +38,7 @@ def authenticate(self, email, password): def get_record(self, uuid, rec_type): """Retrieve an individual record of a particular type.""" - url = f'{self.url}/rest/{rec_type}/{uuid}?expand=all' + url = f'{self.url}/{rec_type}/{uuid}?expand=all' record = requests.get(url, headers=self.header, cookies=self.cookies).json() if rec_type == 'items': @@ -55,7 +58,7 @@ def filtered_item_search(self, key, string, query_type, items = '' item_links = [] while items != []: - endpoint = f'{self.url}/rest/filtered-items?' + endpoint = f'{self.url}/filtered-items?' params = {'query_field[]': key, 'query_op[]': query_type, 'query_val[]': string, '&collSel[]': selected_collections, 'limit': 200, 'offset': offset} @@ -71,6 +74,17 @@ def filtered_item_search(self, key, string, query_type, offset = offset + 200 return item_links + def post_coll_to_comm(self, comm_handle, coll_name): + endpoint = f'{self.url}/handle/{comm_handle}' + community = requests.get(endpoint, headers=self.header, + cookies=self.cookies).json() + comm_id = community['uuid'] + collection = {'name': coll_name} + endpoint2 = f'{self.url}/communities/{comm_id}/collections' + coll_id = requests.post(endpoint2, headers=self.header, + cookies=self.cookies, json=collection).json() + return coll_id['link'] + def _pop_inst(self, class_type, rec_obj): """Populate class instance with data from record.""" fields = [op(field) for field in attr.fields(class_type)] diff --git a/tests/test_models.py b/tests/test_models.py index 403ce8b..ba41103 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -7,7 +7,7 @@ @pytest.fixture def client(): - client = models.Client('mock://example.com') + client = models.Client('mock://example.com/') client.header = {} client.cookies = {} client.user_full_name = '' @@ -17,26 +17,21 @@ def client(): def test_authenticate(client): """Test authenticate function.""" with requests_mock.Mocker() as m: - url1 = '/rest/login' - url2 = '/rest/status' email = 'test@test.mock' password = '1234' - header = {'content-type': 'application/json', 'accept': - 'application/json'} cookies = {'JSESSIONID': '11111111'} json_object = {'fullname': 'User Name'} - m.post(url1, cookies=cookies) - m.get(url2, json=json_object) + m.post('mock://example.com/login', cookies=cookies) + m.get('mock://example.com/status', json=json_object) client.authenticate(email, password) assert client.user_full_name == 'User Name' assert client.cookies == cookies - assert client.header == header def test_get_record(client): """Test get_record function.""" with requests_mock.Mocker() as m: - uri = '/rest/items/123?expand=all' + uri = 'mock://example.com/items/123?expand=all' json_object = {'metadata': {'title': 'Sample title'}, 'type': 'item'} m.get(uri, json=json_object) rec_obj = client.get_record('123', 'items') @@ -49,7 +44,7 @@ def test_filtered_item_search(client): key = 'dc.title' string = 'test' query_type = 'contains' - endpoint = '/rest/filtered-items?' + endpoint = 'mock://example.com/filtered-items?' json_object_1 = {'items': [{'link': '1234'}]} json_object_2 = {'items': []} m.get(endpoint, [{'json': json_object_1}, {'json': json_object_2}]) @@ -59,6 +54,19 @@ def test_filtered_item_search(client): assert '1234' in item_links +def test_post_coll_to_comm(client): + with requests_mock.Mocker() as m: + comm_handle = '1234' + coll_name = 'Test Collection' + json_object_1 = {'uuid': 'a1b2'} + json_object_2 = {'link': '5678'} + m.get('mock://example.com/handle/1234', json=json_object_1) + m.post('mock://example.com/communities/a1b2/collections', + json=json_object_2) + coll_id = client.post_coll_to_comm(comm_handle, coll_name) + assert coll_id == '5678' + + def test__pop_inst(client): """Test _pop_inst function.""" class_type = models.Collection From 9cdc68b83a1ff44b8df63bbd8a140abf7d469876 Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Wed, 5 Feb 2020 11:55:25 -0500 Subject: [PATCH 09/22] post items and bitstreams (#11) * post items and bitstreams * Update cli.py * Update cli.py --- dsaps/cli.py | 39 ++++++++++++++++++++----- dsaps/models.py | 68 +++++++++++++++++++++++++++++++++++++------- tests/test_models.py | 65 +++++++++++++++++++++++++++++++++++++----- 3 files changed, 148 insertions(+), 24 deletions(-) diff --git a/dsaps/cli.py b/dsaps/cli.py index 2f4a9f5..0e30c12 100644 --- a/dsaps/cli.py +++ b/dsaps/cli.py @@ -1,5 +1,8 @@ import datetime +import glob +import json import logging +import os import time import click @@ -69,15 +72,37 @@ def search(ctx, field, string, search_type): 'collection.') @click.option('-n', '--coll_name', prompt='Enter the name of the collection', help='The name of the collection to be created.') +@click.option('-m', '--metadata', prompt='Enter the path of the metadata file', + help='The path of the JSON file of metadata.') +@click.option('-f', '--file_path', prompt='Enter the path', + help='The path of the content, a URL or local drive path.') +@click.option('-t', '--file_type', prompt='Enter the file type', + help='The file type to be uploaded.') +@click.option('-i', '--ingest_type', prompt='Enter the type of ingest', + help='The type of ingest to perform: local, remote.', + type=click.Choice(['local', 'remote'])) @click.pass_context -def newcoll(ctx, comm_handle, coll_name): +def newcoll(ctx, comm_handle, coll_name, metadata, file_path, file_type, + ingest_type): client = ctx.obj['client'] - coll_id = client.post_coll_to_comm(comm_handle, coll_name) - logger.info(coll_id) - # STEPS TO ADD - # post items to collections - # post bistreams to item_links - # post prov notes + start_time = ctx.obj['start_time'] + with open(metadata, encoding='UTF-8') as fp: + coll_metadata = json.load(fp) + coll_id = client.post_coll_to_comm(comm_handle, coll_name) + file_dict = {} + if ingest_type == 'local': + files = glob.glob(f'{file_path}/**/*.{file_type}', recursive=True) + for file in files: + file_name = os.path.splitext(os.path.basename(file))[0] + file_dict[file_name] = file + elif ingest_type == 'remote': + file_dict = models.build_file_dict_remote(file_path, file_type, + file_dict) + items = client.post_items_to_coll(coll_id, coll_metadata, file_dict, + ingest_type) + for item in items: + logger.info(f'Item posted: {item}') + models.elapsed_time(start_time, 'Total runtime:') if __name__ == '__main__': diff --git a/dsaps/models.py b/dsaps/models.py index d399d56..1ac98da 100644 --- a/dsaps/models.py +++ b/dsaps/models.py @@ -1,6 +1,7 @@ import datetime from functools import partial import operator +import os import requests import time @@ -24,6 +25,7 @@ def __init__(self, url): logger.info('Initializing client') def authenticate(self, email, password): + """Authenticate user to DSpace API.""" header = self.header data = {'email': email, 'password': password} session = requests.post(f'{self.url}/login', headers=header, @@ -54,6 +56,7 @@ def get_record(self, uuid, rec_type): def filtered_item_search(self, key, string, query_type, selected_collections=''): + """Performs a search against the filtered items endpoint.""" offset = 0 items = '' item_links = [] @@ -63,10 +66,9 @@ def filtered_item_search(self, key, string, query_type, 'query_val[]': string, '&collSel[]': selected_collections, 'limit': 200, 'offset': offset} logger.info(params) - print(endpoint) response = requests.get(endpoint, headers=self.header, params=params, cookies=self.cookies) - print(f'Response url: {response.url}') + logger.info(f'Response url: {response.url}') response = response.json() items = response['items'] for item in items: @@ -75,6 +77,7 @@ def filtered_item_search(self, key, string, query_type, return item_links def post_coll_to_comm(self, comm_handle, coll_name): + """Posts a collection to a specified community.""" endpoint = f'{self.url}/handle/{comm_handle}' community = requests.get(endpoint, headers=self.header, cookies=self.cookies).json() @@ -83,7 +86,52 @@ def post_coll_to_comm(self, comm_handle, coll_name): endpoint2 = f'{self.url}/communities/{comm_id}/collections' coll_id = requests.post(endpoint2, headers=self.header, cookies=self.cookies, json=collection).json() - return coll_id['link'] + coll_id = coll_id['uuid'] + logger.info(f'Collection posted: {coll_id}') + return coll_id + + def post_items_to_coll(self, coll_id, coll_metadata, file_dict, + ingest_type): + """Posts items to a specified collection.""" + for item_metadata in coll_metadata: + file_exists = '' + for element in [e for e in item_metadata['metadata'] + if e['key'] == 'file_identifier']: + file_identifier = element['value'] + item_metadata['metadata'].remove(element) + for k in [e for e in file_dict if file_identifier in e]: + file_exists = True + if file_exists is True: + endpoint = f'{self.url}/collections/{coll_id}/items' + item_id = requests.post(endpoint, headers=self.header, + cookies=self.cookies, + json=item_metadata).json() + item_id = item_id['uuid'] + bit_ids = self.post_bitstreams_to_item(item_id, + file_identifier, + file_dict, ingest_type) + for bit_id in bit_ids: + logger.info(f'Bitstream posted: {bit_id}') + yield item_id + + def post_bitstreams_to_item(self, item_id, file_identifier, file_dict, + ingest_type): + """Posts bitstreams to a specified item.""" + for k, v in file_dict.items(): + if k.startswith(file_identifier): + bitstream = file_dict[k] + file_name = os.path.basename(bitstream) + if ingest_type == 'local': + data = open(bitstream, 'rb') + elif ingest_type == 'remote': + data = requests.get(bitstream) + endpoint = (f'{self.url}/items/{item_id}' + + f'/bitstreams?name={file_name}') + header_upload = {'accept': 'application/json'} + bit_id = requests.post(endpoint, headers=header_upload, + cookies=self.cookies, data=data).json() + bit_id = bit_id['uuid'] + yield bit_id def _pop_inst(self, class_type, rec_obj): """Populate class instance with data from record.""" @@ -100,6 +148,7 @@ def _pop_inst(self, class_type, rec_obj): return rec_obj def _build_uuid_list(self, rec_obj, children): + """Builds a list of the uuids for an object's children.""" child_list = [] for child in rec_obj[children]: child_list.append(child['uuid']) @@ -138,15 +187,14 @@ class MetadataEntry(BaseRecord): language = Field() -def build_file_list_remote(directory_url, file_extension): - """Build list of files in local directory.""" - file_list = {} +def build_file_dict_remote(directory_url, file_type, file_dict): + """Build list of files in a remote directory.""" response = requests.get(directory_url) links = html.fromstring(response.content).iterlinks() - for link in links: - if link[2].endswith(file_extension): - file_list[link[2]] = f'{directory_url}{link[2]}' - return file_list + for link in [l for l in links if l[2].endswith(file_type)]: + file_identifier = link[2].replace(f'.{file_type}', '') + file_dict[file_identifier] = f'{directory_url}{link[2]}' + return file_dict def elapsed_time(start_time, label): diff --git a/tests/test_models.py b/tests/test_models.py index ba41103..cd89292 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -14,6 +14,16 @@ def client(): return client +@pytest.fixture +def sample_content(tmp_path): + content = 'test' + dir = tmp_path / 'sub' + dir.mkdir() + sample_content = dir / '123.pdf' + sample_content.write_text(content) + return sample_content + + def test_authenticate(client): """Test authenticate function.""" with requests_mock.Mocker() as m: @@ -55,11 +65,12 @@ def test_filtered_item_search(client): def test_post_coll_to_comm(client): + """Test post_coll_to_comm function.""" with requests_mock.Mocker() as m: comm_handle = '1234' coll_name = 'Test Collection' json_object_1 = {'uuid': 'a1b2'} - json_object_2 = {'link': '5678'} + json_object_2 = {'uuid': '5678'} m.get('mock://example.com/handle/1234', json=json_object_1) m.post('mock://example.com/communities/a1b2/collections', json=json_object_2) @@ -67,6 +78,45 @@ def test_post_coll_to_comm(client): assert coll_id == '5678' +def test_post_items_to_coll(client, sample_content): + """Test post_items_to_coll function.""" + with requests_mock.Mocker() as m: + coll_metadata = [{"metadata": [ + {"key": "file_identifier", + "value": "123"}, + {"key": "dc.title", "value": + "Monitoring Works: Getting Teachers", + "language": "en_US"}]}] + coll_id = '789' + ingest_type = 'local' + file_dict = {'123': sample_content} + json_object_1 = {'uuid': 'a1b2'} + m.post('mock://example.com/collections/789/items', json=json_object_1) + url = 'mock://example.com/items/a1b2/bitstreams?name=123.pdf' + json_object_2 = {'uuid': 'c3d4'} + m.post(url, json=json_object_2) + item_ids = client.post_items_to_coll(coll_id, coll_metadata, file_dict, + ingest_type) + for item_id in item_ids: + assert 'a1b2' == item_id + + +def test_post_bitstreams_to_item(client, sample_content): + """Test post_bitstreams_to_item function.""" + with requests_mock.Mocker() as m: + item_id = 'a1b2' + ingest_type = 'local' + file_identifier = '123' + file_dict = {'123': sample_content} + json_object_1 = {'uuid': 'c3d4'} + url = 'mock://example.com/items/a1b2/bitstreams?name=123.pdf' + m.post(url, json=json_object_1) + bit_ids = client.post_bitstreams_to_item(item_id, file_identifier, + file_dict, ingest_type) + for bit_id in bit_ids: + assert 'c3d4' == bit_id + + def test__pop_inst(client): """Test _pop_inst function.""" class_type = models.Collection @@ -84,8 +134,8 @@ def test__build_uuid_list(client): assert '1234' in child_list -def test_build_file_list_remote(): - """Test build_file_list_remote function.""" +def test_build_file_dict_remote(): + """Test build_file_dict_remote function.""" content = '' content += 'Index of /pdf

    Index of /' content += 'pdf

    NameLast modified' @@ -93,8 +143,9 @@ def test_build_file_list_remote(): content += '2001-02-16 11:59 107K
    ' with requests_mock.Mocker() as m: directory_url = 'mock://test.com/pdfs/' - file_extension = 'pdf' + file_type = 'pdf' + file_dict = {} m.get(directory_url, text=content) - file_list = models.build_file_list_remote(directory_url, - file_extension) - assert '999.pdf' in file_list + file_list = models.build_file_dict_remote(directory_url, file_type, + file_dict) + assert '999' in file_list From 1957bdf3a5cba14283c583c7bfd293bcbaf57d29 Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Sat, 18 Jan 2020 14:29:48 -0500 Subject: [PATCH 10/22] reconcile command --- dsaps/cli.py | 40 ++++++++++++++++++++++++++++++++++++++++ dsaps/models.py | 10 ++++++++++ tests/test_models.py | 22 ++++++++++++++-------- 3 files changed, 64 insertions(+), 8 deletions(-) diff --git a/dsaps/cli.py b/dsaps/cli.py index 0e30c12..cfc4ad0 100644 --- a/dsaps/cli.py +++ b/dsaps/cli.py @@ -1,3 +1,4 @@ +import csv import datetime import glob import json @@ -105,5 +106,44 @@ def newcoll(ctx, comm_handle, coll_name, metadata, file_path, file_type, models.elapsed_time(start_time, 'Total runtime:') +@main.command() +@click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file', + help='The path of the CSV file of metadata.') +@click.option('-f', '--file_path', prompt='Enter the path', + help='The path of the content, a URL or local drive path.') +@click.option('-t', '--file_type', prompt='Enter the file type', + help='The file type to be uploaded.') +def reconcile(metadata_csv, file_path, file_type): + if file_path.startswith('http'): + file_dict = models.build_file_dict_remote(file_path, file_type, {}) + else: + files = glob.glob(f'{file_path}/**/*.{file_type}', recursive=True) + for file in files: + file_name = os.path.splitext(os.path.basename(file))[0] + file_dict[file_name] = file + metadata_ids = [] + with open(metadata_csv) as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + value = row['file_identifier'] + metadata_ids.append(value) + file_matches = [] + file_ids = [] + for file_id, v in file_dict.items(): + file_ids.append(file_id) + for metadata_id in [m for m in metadata_ids if file_id == m]: + file_matches.append(file_id) + metadata_matches = [] + for metadata_id in metadata_ids: + for file_id in file_dict: + if file_id == metadata_id: + metadata_matches.append(metadata_id) + no_files = set(metadata_ids) - set(metadata_matches) + no_metadata = set(file_ids) - set(file_matches) + models.create_csv_from_list(no_metadata, 'no_metadata.csv') + models.create_csv_from_list(no_files, 'no_files.csv') + models.create_csv_from_list(metadata_matches, 'metadata_matches.csv') + + if __name__ == '__main__': main() diff --git a/dsaps/models.py b/dsaps/models.py index 1ac98da..8c13020 100644 --- a/dsaps/models.py +++ b/dsaps/models.py @@ -1,3 +1,4 @@ +import csv import datetime from functools import partial import operator @@ -197,6 +198,15 @@ def build_file_dict_remote(directory_url, file_type, file_dict): return file_dict +def create_csv_from_list(list_name, output): + """Creates CSV file from list content.""" + with open(output, 'w') as f: + writer = csv.writer(f) + writer.writerow(['id']) + for item in list_name: + writer.writerow([item]) + + def elapsed_time(start_time, label): """Calculate elapsed time.""" td = datetime.timedelta(seconds=time.time() - start_time) diff --git a/tests/test_models.py b/tests/test_models.py index cd89292..5bc8b49 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -25,7 +25,7 @@ def sample_content(tmp_path): def test_authenticate(client): - """Test authenticate function.""" + """Test authenticate method.""" with requests_mock.Mocker() as m: email = 'test@test.mock' password = '1234' @@ -39,7 +39,7 @@ def test_authenticate(client): def test_get_record(client): - """Test get_record function.""" + """Test get_record method.""" with requests_mock.Mocker() as m: uri = 'mock://example.com/items/123?expand=all' json_object = {'metadata': {'title': 'Sample title'}, 'type': 'item'} @@ -49,7 +49,7 @@ def test_get_record(client): def test_filtered_item_search(client): - """Test filtered_item_search function.""" + """Test filtered_item_search method.""" with requests_mock.Mocker() as m: key = 'dc.title' string = 'test' @@ -65,7 +65,7 @@ def test_filtered_item_search(client): def test_post_coll_to_comm(client): - """Test post_coll_to_comm function.""" + """Test post_coll_to_comm method.""" with requests_mock.Mocker() as m: comm_handle = '1234' coll_name = 'Test Collection' @@ -79,7 +79,7 @@ def test_post_coll_to_comm(client): def test_post_items_to_coll(client, sample_content): - """Test post_items_to_coll function.""" + """Test post_items_to_coll method.""" with requests_mock.Mocker() as m: coll_metadata = [{"metadata": [ {"key": "file_identifier", @@ -102,7 +102,7 @@ def test_post_items_to_coll(client, sample_content): def test_post_bitstreams_to_item(client, sample_content): - """Test post_bitstreams_to_item function.""" + """Test post_bitstreams_to_item method.""" with requests_mock.Mocker() as m: item_id = 'a1b2' ingest_type = 'local' @@ -118,7 +118,7 @@ def test_post_bitstreams_to_item(client, sample_content): def test__pop_inst(client): - """Test _pop_inst function.""" + """Test _pop_inst method.""" class_type = models.Collection rec_obj = {'name': 'Test title', 'type': 'collection', 'items': []} rec_obj = client._pop_inst(class_type, rec_obj) @@ -127,7 +127,7 @@ def test__pop_inst(client): def test__build_uuid_list(client): - """Test _build_uuid_list function.""" + """Test _build_uuid_list method.""" rec_obj = {'items': [{'uuid': '1234'}]} children = 'items' child_list = client._build_uuid_list(rec_obj, children) @@ -149,3 +149,9 @@ def test_build_file_dict_remote(): file_list = models.build_file_dict_remote(directory_url, file_type, file_dict) assert '999' in file_list + + +# # How to test this? Applies to asaps as well +# def test_create_csv_from_list(): +# """Test create_csv_from_list function.""" +# assert False From 62520ffd9f6fe67855051786370208f09de9fefc Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Sun, 19 Jan 2020 15:41:52 -0500 Subject: [PATCH 11/22] metadata json transform --- dsaps/cli.py | 45 ++++++++++++++++++++++++++++++++++++++++++++ dsaps/models.py | 36 +++++++++++++++++++++++++++++++++++ tests/test_models.py | 18 ++++++++++++++++++ 3 files changed, 99 insertions(+) diff --git a/dsaps/cli.py b/dsaps/cli.py index cfc4ad0..60478a1 100644 --- a/dsaps/cli.py +++ b/dsaps/cli.py @@ -145,5 +145,50 @@ def reconcile(metadata_csv, file_path, file_type): models.create_csv_from_list(metadata_matches, 'metadata_matches.csv') +@main.command() +@click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file', + help='The path of the CSV file of metadata.') +def metadatajson(metadata_csv): + with open(metadata_csv) as csvfile: + reader = csv.DictReader(csvfile) + metadata_group = [] + for row in reader: + metadata_rec = [] + models.metadata_csv(row, metadata_rec, 'fileIdentifier', + 'file_identifier', '', '') + models.metadata_csv(row, metadata_rec, 'dc.contributor.author', + 'author name - direct', '', '') + models.metadata_csv(row, metadata_rec, 'dc.contributor.advisor', + 'supervisor(s)', '', '') + models.metadata_csv(row, metadata_rec, 'dc.date.issued', + 'pub date', '', '') + models.metadata_csv(row, metadata_rec, 'dc.description.abstract', + 'Abstract', 'en_US', '') + models.metadata_direct(metadata_rec, 'dc.format.mimetype', + 'application/pdf', 'en_US') + models.metadata_direct(metadata_rec, 'dc.language.iso', 'en_US', + 'en_US') + models.metadata_direct(metadata_rec, 'dc.publisher', + 'Massachusetts Institute of Technology. ' + 'Laboratory for Computer Science', 'en_US') + models.metadata_csv(row, metadata_rec, + 'dc.relation.ispartofseries', + 'file_identifier', 'en_US', '') + models.metadata_direct(metadata_rec, 'dc.rights', + 'Educational use permitted', 'en_US') + models.metadata_direct(metadata_rec, 'dc.rights.uri', + 'http://rightsstatements.org/vocab/' + 'InC-EDU/1.0/', 'en_US') + models.metadata_csv(row, metadata_rec, 'dc.title', 'Title', + 'en_US', '') + models.metadata_direct(metadata_rec, 'dc.type', 'Technical Report', + 'en_US') + item = {'metadata': metadata_rec} + metadata_group.append(item) + file_name = os.path.splitext(os.path.basename(metadata_csv))[0] + f = open(f'{file_name}.json', 'w') + json.dump(metadata_group, f) + + if __name__ == '__main__': main() diff --git a/dsaps/models.py b/dsaps/models.py index 8c13020..fdd4f76 100644 --- a/dsaps/models.py +++ b/dsaps/models.py @@ -211,3 +211,39 @@ def elapsed_time(start_time, label): """Calculate elapsed time.""" td = datetime.timedelta(seconds=time.time() - start_time) logger.info(f'{label} : {td}') + + +def metadata_csv(row, metadata_rec, key, field, language, delimiter): + """Create metadata elements from CSV, including fields with delimiters.""" + if row[field] != '': + if delimiter != '' and delimiter in row[field]: + values = row[field].split(delimiter) + for value in values: + if language != '': + metadata_elem = {'key': key, 'language': language, 'value': + value} + metadata_rec.append(metadata_elem) + else: + metadata_elem = {'key': key, 'value': value} + metadata_rec.append(metadata_elem) + else: + value = row[field] + if language != '': + metadata_elem = {'key': key, 'language': language, 'value': + value} + metadata_rec.append(metadata_elem) + else: + metadata_elem = {'key': key, 'value': value} + metadata_rec.append(metadata_elem) + else: + pass + + +def metadata_direct(metadata_rec, key, value, language): + """Create metadata element with specified value.""" + if language != '': + metadata_elem = {'key': key, 'language': language, 'value': value} + metadata_rec.append(metadata_elem) + else: + metadata_elem = {'key': key, 'value': value} + metadata_rec.append(metadata_elem) diff --git a/tests/test_models.py b/tests/test_models.py index 5bc8b49..c06f4a4 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -155,3 +155,21 @@ def test_build_file_dict_remote(): # def test_create_csv_from_list(): # """Test create_csv_from_list function.""" # assert False + + +def test_metadata_csv(): + """Test metadata_csv function.""" + metadata_rec = [] + row = {'title': 'Test title'} + models.metadata_csv(row, metadata_rec, 'dc.title', 'title', 'en_US', '') + assert metadata_rec[0]['key'] == 'dc.title' + assert metadata_rec[0]['value'] == 'Test title' + + +def test_metadata_direct(): + """Test metadata_direct function.""" + metadata_rec = [] + value = 'No one may ever view this content.' + models.metadata_direct(metadata_rec, 'dc.rights', value, 'en_US') + assert metadata_rec[0]['key'] == 'dc.rights' + assert metadata_rec[0]['value'] == 'No one may ever view this content.' From 3362540d58fbaf07be27d998f0c52642deb845e6 Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Mon, 2 Mar 2020 11:30:50 -0500 Subject: [PATCH 12/22] PR updates --- dsaps/cli.py | 70 +++++++++++++++++++++++--------------------- dsaps/models.py | 49 ++++++++++++------------------- tests/test_models.py | 19 ++++++------ 3 files changed, 63 insertions(+), 75 deletions(-) diff --git a/dsaps/cli.py b/dsaps/cli.py index 60478a1..9881ed2 100644 --- a/dsaps/cli.py +++ b/dsaps/cli.py @@ -47,6 +47,11 @@ def main(ctx, url, email, password): ctx.obj['start_time'] = start_time +@click.group() +def aux(): + pass + + @main.command() @click.option('-f', '--field', prompt='Enter the field to be searched', help='The field to search.') @@ -106,7 +111,7 @@ def newcoll(ctx, comm_handle, coll_name, metadata, file_path, file_type, models.elapsed_time(start_time, 'Total runtime:') -@main.command() +@aux.command() @click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file', help='The path of the CSV file of metadata.') @click.option('-f', '--file_path', prompt='Enter the path', @@ -145,50 +150,47 @@ def reconcile(metadata_csv, file_path, file_type): models.create_csv_from_list(metadata_matches, 'metadata_matches.csv') -@main.command() +@aux.command() @click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file', help='The path of the CSV file of metadata.') def metadatajson(metadata_csv): with open(metadata_csv) as csvfile: reader = csv.DictReader(csvfile) metadata_group = [] + mapping_dict = {'fileIdentifier': ['file_identifier'], + 'dc.contributor.author': ['author name - direct'], + 'dc.contributor.advisor': ['supervisor(s)'], + 'dc.date.issued': ['pub date'], + 'dc.description.abstract': ['Abstract', 'en_US'], + 'dc.title': ['Title', 'en_US'], + 'dc.relation.ispartofseries': ['file_identifier']} for row in reader: metadata_rec = [] - models.metadata_csv(row, metadata_rec, 'fileIdentifier', - 'file_identifier', '', '') - models.metadata_csv(row, metadata_rec, 'dc.contributor.author', - 'author name - direct', '', '') - models.metadata_csv(row, metadata_rec, 'dc.contributor.advisor', - 'supervisor(s)', '', '') - models.metadata_csv(row, metadata_rec, 'dc.date.issued', - 'pub date', '', '') - models.metadata_csv(row, metadata_rec, 'dc.description.abstract', - 'Abstract', 'en_US', '') - models.metadata_direct(metadata_rec, 'dc.format.mimetype', - 'application/pdf', 'en_US') - models.metadata_direct(metadata_rec, 'dc.language.iso', 'en_US', - 'en_US') - models.metadata_direct(metadata_rec, 'dc.publisher', - 'Massachusetts Institute of Technology. ' - 'Laboratory for Computer Science', 'en_US') - models.metadata_csv(row, metadata_rec, - 'dc.relation.ispartofseries', - 'file_identifier', 'en_US', '') - models.metadata_direct(metadata_rec, 'dc.rights', - 'Educational use permitted', 'en_US') - models.metadata_direct(metadata_rec, 'dc.rights.uri', - 'http://rightsstatements.org/vocab/' - 'InC-EDU/1.0/', 'en_US') - models.metadata_csv(row, metadata_rec, 'dc.title', 'Title', - 'en_US', '') - models.metadata_direct(metadata_rec, 'dc.type', 'Technical Report', - 'en_US') + metadata_rec = models.create_metadata_rec(mapping_dict, row, + metadata_rec) + metadata_rec.append({'key': 'dc.format.mimetype', 'language': + 'en_US', 'value': 'application/pdf'}) + metadata_rec.append({'key': 'dc.language.iso', 'language': + 'en_US', 'value': 'en_US'}) + metadata_rec.append({'key': 'dc.publisher', 'language': 'en_US', + 'value': 'Massachusetts Institute of ' + 'Technology. Laboratory for Computer' + 'Science'}) + metadata_rec.append({'key': 'dc.rights', 'language': 'en_US', + 'value': 'Educational use permitted'}) + metadata_rec.append({'key': 'dc.rights.uri', 'language': 'en_US', + 'value': 'http://rightsstatements.org/vocab/' + 'InC-EDU/1.0/'}) + metadata_rec.append({'key': 'dc.type', 'language': 'en_US', + 'value': 'Technical Report'}) item = {'metadata': metadata_rec} metadata_group.append(item) file_name = os.path.splitext(os.path.basename(metadata_csv))[0] - f = open(f'{file_name}.json', 'w') - json.dump(metadata_group, f) + with open(f'{file_name}.json', 'w') as f: + json.dump(metadata_group, f) + +cli = click.CommandCollection(sources=[main, aux]) if __name__ == '__main__': - main() + cli() diff --git a/dsaps/models.py b/dsaps/models.py index fdd4f76..aabc523 100644 --- a/dsaps/models.py +++ b/dsaps/models.py @@ -213,37 +213,24 @@ def elapsed_time(start_time, label): logger.info(f'{label} : {td}') -def metadata_csv(row, metadata_rec, key, field, language, delimiter): - """Create metadata elements from CSV, including fields with delimiters.""" - if row[field] != '': - if delimiter != '' and delimiter in row[field]: - values = row[field].split(delimiter) - for value in values: - if language != '': - metadata_elem = {'key': key, 'language': language, 'value': - value} - metadata_rec.append(metadata_elem) - else: - metadata_elem = {'key': key, 'value': value} - metadata_rec.append(metadata_elem) - else: - value = row[field] - if language != '': - metadata_elem = {'key': key, 'language': language, 'value': - value} - metadata_rec.append(metadata_elem) - else: - metadata_elem = {'key': key, 'value': value} - metadata_rec.append(metadata_elem) +def metadata_csv(row, key, field, language=None): + """Create metadata element from CSV.""" + value = row[field] + if language is not None: + metadata_elem = {'key': key, 'language': language, 'value': + value} else: - pass + metadata_elem = {'key': key, 'value': value} + return metadata_elem -def metadata_direct(metadata_rec, key, value, language): - """Create metadata element with specified value.""" - if language != '': - metadata_elem = {'key': key, 'language': language, 'value': value} - metadata_rec.append(metadata_elem) - else: - metadata_elem = {'key': key, 'value': value} - metadata_rec.append(metadata_elem) +def create_metadata_rec(mapping_dict, row, metadata_rec): + """Create metadata record from CSV.""" + for k, v in mapping_dict.items(): + if len(v) == 2: + metadata_elem = metadata_csv(row, k, v[0], v[1]) + else: + metadata_elem = metadata_csv(row, k, v[0]) + if metadata_elem['value'] != '': + metadata_rec.append(metadata_elem) + return metadata_rec diff --git a/tests/test_models.py b/tests/test_models.py index c06f4a4..a61e813 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -159,17 +159,16 @@ def test_build_file_dict_remote(): def test_metadata_csv(): """Test metadata_csv function.""" - metadata_rec = [] row = {'title': 'Test title'} - models.metadata_csv(row, metadata_rec, 'dc.title', 'title', 'en_US', '') - assert metadata_rec[0]['key'] == 'dc.title' - assert metadata_rec[0]['value'] == 'Test title' + metadata_elem = models.metadata_csv(row, 'dc.title', 'title', 'en_US') + assert metadata_elem['key'] == 'dc.title' + assert metadata_elem['value'] == 'Test title' -def test_metadata_direct(): - """Test metadata_direct function.""" +def test_create_metadata_rec(): metadata_rec = [] - value = 'No one may ever view this content.' - models.metadata_direct(metadata_rec, 'dc.rights', value, 'en_US') - assert metadata_rec[0]['key'] == 'dc.rights' - assert metadata_rec[0]['value'] == 'No one may ever view this content.' + row = {'title': 'Test title'} + mapping_dict = {'dc.title': ['title']} + metadata_rec = models.create_metadata_rec(mapping_dict, row, metadata_rec) + assert metadata_rec[0]['key'] == 'dc.title' + assert metadata_rec[0]['value'] == 'Test title' From f238420c1268d8988601faa75790224e98760e5a Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Tue, 2 Mar 2021 15:11:41 -0500 Subject: [PATCH 13/22] get id from handle and misc refactoring --- .travis.yml | 2 +- Pipfile | 4 +-- dsaps/cli.py | 35 ++++---------------------- dsaps/models.py | 59 +++++++++++++++++++++++++++++++------------- setup.py | 6 ++--- tests/test_models.py | 58 +++++++++++++++++++++++++++---------------- 6 files changed, 90 insertions(+), 74 deletions(-) diff --git a/.travis.yml b/.travis.yml index 3a2a5d7..4b1d45a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ language: python python: - - "3.7" + - "3.8" install: - pipenv install --dev script: diff --git a/Pipfile b/Pipfile index 1da3184..c02ecb3 100644 --- a/Pipfile +++ b/Pipfile @@ -10,12 +10,12 @@ requests-mock = "*" [packages] requests = "*" structlog = "*" -attr = "*" +attrs = "*" click = "*" lxml = "*" [requires] -python_version = "3.7" +python_version = "3.8" [scripts] dsaps = "python -c \"from dsaps.cli import main; main()\"" diff --git a/dsaps/cli.py b/dsaps/cli.py index 9881ed2..92a7145 100644 --- a/dsaps/cli.py +++ b/dsaps/cli.py @@ -24,6 +24,8 @@ @click.pass_context def main(ctx, url, email, password): ctx.obj = {} + if os.path.isdir('logs') is False: + os.mkdir('logs') dt = datetime.datetime.utcnow().isoformat(timespec='seconds') log_suffix = f'{dt}.log' structlog.configure(processors=[ @@ -47,31 +49,6 @@ def main(ctx, url, email, password): ctx.obj['start_time'] = start_time -@click.group() -def aux(): - pass - - -@main.command() -@click.option('-f', '--field', prompt='Enter the field to be searched', - help='The field to search.') -@click.option('-s', '--string', prompt='Enter the string', - help='The field to search.') -@click.option('-t', '--search_type', prompt='Enter the type of search', - help='The type of search.', - type=click.Choice(['exists', 'doesnt_exist', 'equals', - 'not_equals', 'contains', 'doesnt_contain']), - default='contains') -@click.pass_context -def search(ctx, field, string, search_type): - # Temp function for testing - client = ctx.obj['client'] - start_time = ctx.obj['start_time'] - item_links = client.filtered_item_search(field, string, search_type) - logger.info(item_links) - models.elapsed_time(start_time, 'Elapsed time') - - @main.command() @click.option('-c', '--comm_handle', prompt='Enter the community handle', help='The handle of the community in which to create the ,' @@ -111,7 +88,7 @@ def newcoll(ctx, comm_handle, coll_name, metadata, file_path, file_type, models.elapsed_time(start_time, 'Total runtime:') -@aux.command() +@main.command() @click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file', help='The path of the CSV file of metadata.') @click.option('-f', '--file_path', prompt='Enter the path', @@ -150,7 +127,7 @@ def reconcile(metadata_csv, file_path, file_type): models.create_csv_from_list(metadata_matches, 'metadata_matches.csv') -@aux.command() +@main.command() @click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file', help='The path of the CSV file of metadata.') def metadatajson(metadata_csv): @@ -190,7 +167,5 @@ def metadatajson(metadata_csv): json.dump(metadata_group, f) -cli = click.CommandCollection(sources=[main, aux]) - if __name__ == '__main__': - cli() + main() diff --git a/dsaps/models.py b/dsaps/models.py index aabc523..577e669 100644 --- a/dsaps/models.py +++ b/dsaps/models.py @@ -77,6 +77,13 @@ def filtered_item_search(self, key, string, query_type, offset = offset + 200 return item_links + def get_id_from_handle(self, handle): + """Posts a collection to a specified community.""" + endpoint = f'{self.url}/handle/{handle}' + rec_obj = requests.get(endpoint, headers=self.header, + cookies=self.cookies).json() + return rec_obj['uuid'] + def post_coll_to_comm(self, comm_handle, coll_name): """Posts a collection to a specified community.""" endpoint = f'{self.url}/handle/{comm_handle}' @@ -119,13 +126,17 @@ def post_bitstreams_to_item(self, item_id, file_identifier, file_dict, ingest_type): """Posts bitstreams to a specified item.""" for k, v in file_dict.items(): + bitstreams = [] if k.startswith(file_identifier): - bitstream = file_dict[k] - file_name = os.path.basename(bitstream) + bitstreams.append(k) + bitstreams.sort() + for bitstream in bitstreams: + bitstream_path = file_dict[bitstream] + file_name = os.path.basename(bitstream_path) if ingest_type == 'local': - data = open(bitstream, 'rb') + data = open(bitstream_path, 'rb') elif ingest_type == 'remote': - data = requests.get(bitstream) + data = requests.get(bitstream_path) endpoint = (f'{self.url}/items/{item_id}' + f'/bitstreams?name={file_name}') header_upload = {'accept': 'application/json'} @@ -192,7 +203,7 @@ def build_file_dict_remote(directory_url, file_type, file_dict): """Build list of files in a remote directory.""" response = requests.get(directory_url) links = html.fromstring(response.content).iterlinks() - for link in [l for l in links if l[2].endswith(file_type)]: + for link in [i for i in links if i[2].endswith(file_type)]: file_identifier = link[2].replace(f'.{file_type}', '') file_dict[file_identifier] = f'{directory_url}{link[2]}' return file_dict @@ -213,24 +224,38 @@ def elapsed_time(start_time, label): logger.info(f'{label} : {td}') -def metadata_csv(row, key, field, language=None): +def metadata_csv(row, key, field, language=None, delimiter=''): """Create metadata element from CSV.""" - value = row[field] - if language is not None: - metadata_elem = {'key': key, 'language': language, 'value': - value} - else: - metadata_elem = {'key': key, 'value': value} - return metadata_elem + metadata_elems = [] + if row[field] != '': + if delimiter != '' and delimiter in row[field]: + values = row[field].split(delimiter) + for value in values: + if language is not None: + metadata_elem = {'key': key, 'language': language, 'value': + value} + metadata_elems.append(metadata_elem) + else: + metadata_elem = {'key': key, 'value': value} + metadata_elems.append(metadata_elem) + else: + value = row[field] + if language is not None: + metadata_elem = {'key': key, 'language': language, 'value': + value} + else: + metadata_elem = {'key': key, 'value': value} + metadata_elems.append(metadata_elem) + return metadata_elems def create_metadata_rec(mapping_dict, row, metadata_rec): """Create metadata record from CSV.""" for k, v in mapping_dict.items(): - if len(v) == 2: - metadata_elem = metadata_csv(row, k, v[0], v[1]) + if len(v) == 3: + metadata_elems = metadata_csv(row, k, v[0], v[1], v[2]) else: - metadata_elem = metadata_csv(row, k, v[0]) - if metadata_elem['value'] != '': + metadata_elems = metadata_csv(row, k, v[0]) + for metadata_elem in metadata_elems: metadata_rec.append(metadata_elem) return metadata_rec diff --git a/setup.py b/setup.py index 15bcbdf..33de900 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup, find_packages setup( - name='DSpace API Python Scripts', + name='dsaps', version='1.0.0', description='', packages=find_packages(exclude=['tests']), @@ -10,7 +10,7 @@ install_requires=[ 'requests', 'structlog', - 'attr', + 'attrs', 'click', 'lxml', ], @@ -19,5 +19,5 @@ 'dsaps=dsaps.cli:main', ] }, - python_requires='>=3.7.1', + python_requires='>=3.8', ) diff --git a/tests/test_models.py b/tests/test_models.py index a61e813..a87611d 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -30,9 +30,9 @@ def test_authenticate(client): email = 'test@test.mock' password = '1234' cookies = {'JSESSIONID': '11111111'} - json_object = {'fullname': 'User Name'} + user_json = {'fullname': 'User Name'} m.post('mock://example.com/login', cookies=cookies) - m.get('mock://example.com/status', json=json_object) + m.get('mock://example.com/status', json=user_json) client.authenticate(email, password) assert client.user_full_name == 'User Name' assert client.cookies == cookies @@ -42,10 +42,10 @@ def test_get_record(client): """Test get_record method.""" with requests_mock.Mocker() as m: uri = 'mock://example.com/items/123?expand=all' - json_object = {'metadata': {'title': 'Sample title'}, 'type': 'item'} - m.get(uri, json=json_object) + rec_json = {'metadata': {'title': 'Sample title'}, 'type': 'item'} + m.get(uri, json=rec_json) rec_obj = client.get_record('123', 'items') - assert attr.asdict(rec_obj)['metadata'] == json_object['metadata'] + assert attr.asdict(rec_obj)['metadata'] == rec_json['metadata'] def test_filtered_item_search(client): @@ -55,25 +55,35 @@ def test_filtered_item_search(client): string = 'test' query_type = 'contains' endpoint = 'mock://example.com/filtered-items?' - json_object_1 = {'items': [{'link': '1234'}]} - json_object_2 = {'items': []} - m.get(endpoint, [{'json': json_object_1}, {'json': json_object_2}]) + results_json1 = {'items': [{'link': '1234'}]} + results_json2 = {'items': []} + m.get(endpoint, [{'json': results_json1}, {'json': results_json2}]) item_links = client.filtered_item_search(key, string, query_type, selected_collections='') assert '1234' in item_links +def test_get_id_from_handle(client): + """Test get_id_from_handle method.""" + with requests_mock.Mocker() as m: + handle = 'mock://example.com/handle/111.1111' + rec_json = {'uuid': '123'} + m.get(handle, json=rec_json) + id = client.get_id_from_handle('111.1111') + assert id == '123' + + def test_post_coll_to_comm(client): """Test post_coll_to_comm method.""" with requests_mock.Mocker() as m: comm_handle = '1234' coll_name = 'Test Collection' - json_object_1 = {'uuid': 'a1b2'} - json_object_2 = {'uuid': '5678'} - m.get('mock://example.com/handle/1234', json=json_object_1) + comm_json = {'uuid': 'a1b2'} + coll_json = {'uuid': '5678'} + m.get('mock://example.com/handle/1234', json=comm_json) m.post('mock://example.com/communities/a1b2/collections', - json=json_object_2) + json=coll_json) coll_id = client.post_coll_to_comm(comm_handle, coll_name) assert coll_id == '5678' @@ -86,15 +96,17 @@ def test_post_items_to_coll(client, sample_content): "value": "123"}, {"key": "dc.title", "value": "Monitoring Works: Getting Teachers", - "language": "en_US"}]}] + "language": "en_US"}, + {"key": "dc.relation.isversionof", + "value": "repo/0/ao/123"}]}] coll_id = '789' ingest_type = 'local' file_dict = {'123': sample_content} - json_object_1 = {'uuid': 'a1b2'} - m.post('mock://example.com/collections/789/items', json=json_object_1) + item_json = {'uuid': 'a1b2', 'handle': '1111.1/1111'} + m.post('mock://example.com/collections/789/items', json=item_json) url = 'mock://example.com/items/a1b2/bitstreams?name=123.pdf' - json_object_2 = {'uuid': 'c3d4'} - m.post(url, json=json_object_2) + b_json = {'uuid': 'c3d4'} + m.post(url, json=b_json) item_ids = client.post_items_to_coll(coll_id, coll_metadata, file_dict, ingest_type) for item_id in item_ids: @@ -108,9 +120,9 @@ def test_post_bitstreams_to_item(client, sample_content): ingest_type = 'local' file_identifier = '123' file_dict = {'123': sample_content} - json_object_1 = {'uuid': 'c3d4'} + b_json = {'uuid': 'c3d4'} url = 'mock://example.com/items/a1b2/bitstreams?name=123.pdf' - m.post(url, json=json_object_1) + m.post(url, json=b_json) bit_ids = client.post_bitstreams_to_item(item_id, file_identifier, file_dict, ingest_type) for bit_id in bit_ids: @@ -161,8 +173,12 @@ def test_metadata_csv(): """Test metadata_csv function.""" row = {'title': 'Test title'} metadata_elem = models.metadata_csv(row, 'dc.title', 'title', 'en_US') - assert metadata_elem['key'] == 'dc.title' - assert metadata_elem['value'] == 'Test title' + assert metadata_elem[0]['key'] == 'dc.title' + assert metadata_elem[0]['value'] == 'Test title' + + +# def test_create_ingest_report(): +# assert False def test_create_metadata_rec(): From ef87125577aca3cdb0aee61868890933f04e4639 Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Thu, 11 Mar 2021 14:00:31 -0500 Subject: [PATCH 14/22] PR updates --- dsaps/models.py | 79 +++++++++++++++++++++----------------------- tests/test_models.py | 77 ++++++++++++++++++++++++++++++++++-------- 2 files changed, 100 insertions(+), 56 deletions(-) diff --git a/dsaps/models.py b/dsaps/models.py index 577e669..7f0b400 100644 --- a/dsaps/models.py +++ b/dsaps/models.py @@ -1,3 +1,4 @@ +import collections import csv import datetime from functools import partial @@ -78,7 +79,7 @@ def filtered_item_search(self, key, string, query_type, return item_links def get_id_from_handle(self, handle): - """Posts a collection to a specified community.""" + """Retrieves UUID for an object based on its handle.""" endpoint = f'{self.url}/handle/{handle}' rec_obj = requests.get(endpoint, headers=self.header, cookies=self.cookies).json() @@ -124,26 +125,29 @@ def post_items_to_coll(self, coll_id, coll_metadata, file_dict, def post_bitstreams_to_item(self, item_id, file_identifier, file_dict, ingest_type): - """Posts bitstreams to a specified item.""" - for k, v in file_dict.items(): - bitstreams = [] - if k.startswith(file_identifier): - bitstreams.append(k) - bitstreams.sort() - for bitstream in bitstreams: - bitstream_path = file_dict[bitstream] - file_name = os.path.basename(bitstream_path) - if ingest_type == 'local': - data = open(bitstream_path, 'rb') - elif ingest_type == 'remote': - data = requests.get(bitstream_path) - endpoint = (f'{self.url}/items/{item_id}' - + f'/bitstreams?name={file_name}') - header_upload = {'accept': 'application/json'} - bit_id = requests.post(endpoint, headers=header_upload, - cookies=self.cookies, data=data).json() - bit_id = bit_id['uuid'] - yield bit_id + """Post a sorted set of bitstreams to a specified item.""" + file_dict = collections.OrderedDict(sorted(file_dict.items())) + for bitstream, v in file_dict.items(): + bit_id = self.post_bitstream(item_id, file_identifier, file_dict, + ingest_type, bitstream) + yield bit_id + + def post_bitstream(self, item_id, file_identifier, file_dict, ingest_type, + bitstream): + """Post a bitstream to a specified item.""" + bitstream_path = file_dict[bitstream] + file_name = os.path.basename(bitstream_path) + if ingest_type == 'local': + data = open(bitstream_path, 'rb') + elif ingest_type == 'remote': + data = requests.get(bitstream_path) + endpoint = (f'{self.url}/items/{item_id}' + + f'/bitstreams?name={file_name}') + header_upload = {'accept': 'application/json'} + bit_id = requests.post(endpoint, headers=header_upload, + cookies=self.cookies, data=data).json() + bit_id = bit_id['uuid'] + return bit_id def _pop_inst(self, class_type, rec_obj): """Populate class instance with data from record.""" @@ -224,38 +228,29 @@ def elapsed_time(start_time, label): logger.info(f'{label} : {td}') -def metadata_csv(row, key, field, language=None, delimiter=''): - """Create metadata element from CSV.""" +def metadata_elems_from_row(row, key, field, language=None, delimiter=''): + """Create a metadata element from a CSV row.""" metadata_elems = [] if row[field] != '': - if delimiter != '' and delimiter in row[field]: + if delimiter: values = row[field].split(delimiter) - for value in values: - if language is not None: - metadata_elem = {'key': key, 'language': language, 'value': - value} - metadata_elems.append(metadata_elem) - else: - metadata_elem = {'key': key, 'value': value} - metadata_elems.append(metadata_elem) else: - value = row[field] - if language is not None: - metadata_elem = {'key': key, 'language': language, 'value': - value} - else: - metadata_elem = {'key': key, 'value': value} - metadata_elems.append(metadata_elem) + values = [row[field]] + for value in values: + metadata_elem = {'key': key, 'language': language, 'value': + value} + metadata_elems.append({k: v for k, v in metadata_elem.items() + if v is not None}) return metadata_elems def create_metadata_rec(mapping_dict, row, metadata_rec): - """Create metadata record from CSV.""" + """Create metadata record from a series of metadata elements.""" for k, v in mapping_dict.items(): if len(v) == 3: - metadata_elems = metadata_csv(row, k, v[0], v[1], v[2]) + metadata_elems = metadata_elems_from_row(row, k, v[0], v[1], v[2]) else: - metadata_elems = metadata_csv(row, k, v[0]) + metadata_elems = metadata_elems_from_row(row, k, v[0]) for metadata_elem in metadata_elems: metadata_rec.append(metadata_elem) return metadata_rec diff --git a/tests/test_models.py b/tests/test_models.py index a87611d..49224cb 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -15,11 +15,20 @@ def client(): @pytest.fixture -def sample_content(tmp_path): +def sample_content_1(tmp_path): content = 'test' dir = tmp_path / 'sub' dir.mkdir() - sample_content = dir / '123.pdf' + sample_content = dir / '123_1.pdf' + sample_content.write_text(content) + return sample_content + + +@pytest.fixture +def sample_content_2(tmp_path): + content = 'test' + dir = tmp_path / 'sub' + sample_content = dir / '123_2.pdf' sample_content.write_text(content) return sample_content @@ -88,7 +97,7 @@ def test_post_coll_to_comm(client): assert coll_id == '5678' -def test_post_items_to_coll(client, sample_content): +def test_post_items_to_coll(client, sample_content_1): """Test post_items_to_coll method.""" with requests_mock.Mocker() as m: coll_metadata = [{"metadata": [ @@ -101,10 +110,10 @@ def test_post_items_to_coll(client, sample_content): "value": "repo/0/ao/123"}]}] coll_id = '789' ingest_type = 'local' - file_dict = {'123': sample_content} + file_dict = {'123': sample_content_1} item_json = {'uuid': 'a1b2', 'handle': '1111.1/1111'} m.post('mock://example.com/collections/789/items', json=item_json) - url = 'mock://example.com/items/a1b2/bitstreams?name=123.pdf' + url = 'mock://example.com/items/a1b2/bitstreams?name=123_1.pdf' b_json = {'uuid': 'c3d4'} m.post(url, json=b_json) item_ids = client.post_items_to_coll(coll_id, coll_metadata, file_dict, @@ -113,20 +122,42 @@ def test_post_items_to_coll(client, sample_content): assert 'a1b2' == item_id -def test_post_bitstreams_to_item(client, sample_content): +def test_post_bitstreams_to_item(client, sample_content_1, sample_content_2): """Test post_bitstreams_to_item method.""" with requests_mock.Mocker() as m: item_id = 'a1b2' ingest_type = 'local' file_identifier = '123' - file_dict = {'123': sample_content} - b_json = {'uuid': 'c3d4'} - url = 'mock://example.com/items/a1b2/bitstreams?name=123.pdf' - m.post(url, json=b_json) + file_dict = {'123_2': sample_content_2, '123_1': sample_content_1} + b_json_1 = {'uuid': 'c3d4'} + url_1 = 'mock://example.com/items/a1b2/bitstreams?name=123_1.pdf' + m.post(url_1, json=b_json_1) + b_json_2 = {'uuid': 'e5f6'} + url_2 = 'mock://example.com/items/a1b2/bitstreams?name=123_2.pdf' + m.post(url_2, json=b_json_2) bit_ids = client.post_bitstreams_to_item(item_id, file_identifier, file_dict, ingest_type) + bit_ids_output = [] for bit_id in bit_ids: - assert 'c3d4' == bit_id + bit_ids_output.append(bit_id) + assert bit_ids_output[0] == 'c3d4' + assert bit_ids_output[1] == 'e5f6' + + +def test_post_bitstream(client, sample_content_1): + """Test post_bitstream method.""" + with requests_mock.Mocker() as m: + item_id = 'a1b2' + ingest_type = 'local' + file_identifier = '123' + file_dict = {'123': sample_content_1} + b_json = {'uuid': 'c3d4'} + url = 'mock://example.com/items/a1b2/bitstreams?name=123_1.pdf' + bitstream = '123' + m.post(url, json=b_json) + bit_id = client.post_bitstream(item_id, file_identifier, file_dict, + ingest_type, bitstream) + assert 'c3d4' == bit_id def test__pop_inst(client): @@ -169,12 +200,30 @@ def test_build_file_dict_remote(): # assert False -def test_metadata_csv(): - """Test metadata_csv function.""" +def test_metadata_elems_from_row(): + """Test metadata_elems_from_row function.""" row = {'title': 'Test title'} - metadata_elem = models.metadata_csv(row, 'dc.title', 'title', 'en_US') + metadata_elem = models.metadata_elems_from_row(row, 'dc.title', 'title', + 'en_US') assert metadata_elem[0]['key'] == 'dc.title' assert metadata_elem[0]['value'] == 'Test title' + assert metadata_elem[0]['language'] == 'en_US' + metadata_elem = models.metadata_elems_from_row(row, 'dc.title', 'title') + assert metadata_elem[0]['key'] == 'dc.title' + assert metadata_elem[0]['value'] == 'Test title' + assert 'language' not in metadata_elem[0] + row = {'title': ''} + metadata_elem = models.metadata_elems_from_row(row, 'dc.title', 'title') + assert metadata_elem == [] + row = {'title': 'Test title 1|Test title 2'} + metadata_elem = models.metadata_elems_from_row(row, 'dc.title', 'title', + 'en_US', '|') + assert metadata_elem[0]['key'] == 'dc.title' + assert metadata_elem[0]['value'] == 'Test title 1' + assert metadata_elem[0]['language'] == 'en_US' + assert metadata_elem[1]['key'] == 'dc.title' + assert metadata_elem[1]['value'] == 'Test title 2' + assert metadata_elem[1]['language'] == 'en_US' # def test_create_ingest_report(): From 05c977ea4705c2260baf62efcd22b9359415ad11 Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Fri, 12 Mar 2021 10:20:14 -0500 Subject: [PATCH 15/22] conftest --- dsaps/models.py | 2 +- tests/conftest.py | 69 ++++++++++++++++ tests/test_models.py | 183 ++++++++++++------------------------------- 3 files changed, 122 insertions(+), 132 deletions(-) create mode 100644 tests/conftest.py diff --git a/dsaps/models.py b/dsaps/models.py index 7f0b400..49be4fe 100644 --- a/dsaps/models.py +++ b/dsaps/models.py @@ -215,7 +215,7 @@ def build_file_dict_remote(directory_url, file_type, file_dict): def create_csv_from_list(list_name, output): """Creates CSV file from list content.""" - with open(output, 'w') as f: + with open(f'{output}.csv', 'w') as f: writer = csv.writer(f) writer.writerow(['id']) for item in list_name: diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..d115826 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,69 @@ +from click.testing import CliRunner +import pytest +import requests_mock + +from dsaps import models + + +@pytest.fixture(autouse=True) +def client(): + client = models.Client('mock://example.com/') + client.header = {} + client.cookies = {} + client.user_full_name = '' + return client + + +@pytest.fixture(autouse=True) +def ds_mock(): + with requests_mock.Mocker() as m: + cookies = {'JSESSIONID': '11111111'} + m.post('mock://example.com/login', cookies=cookies) + user_json = {'fullname': 'User Name'} + m.get('mock://example.com/status', json=user_json) + rec_json = {'metadata': {'title': 'Sample title'}, 'type': 'item'} + m.get('mock://example.com/items/123?expand=all', json=rec_json) + results_json1 = {'items': [{'link': '1234'}]} + results_json2 = {'items': []} + m.get('mock://example.com/filtered-items?', [{'json': results_json1}, + {'json': results_json2}]) + rec_json = {'uuid': '123'} + m.get('mock://example.com/handle/111.1111', json=rec_json) + comm_json = {'uuid': 'a1b2'} + m.get('mock://example.com/handle/1234', json=comm_json) + coll_json = {'uuid': '5678'} + m.post('mock://example.com/communities/a1b2/collections', + json=coll_json) + item_json = {'uuid': 'a1b2', 'handle': '1111.1/1111'} + m.post('mock://example.com/collections/789/items', json=item_json) + b_json_1 = {'uuid': 'c3d4'} + url_1 = 'mock://example.com/items/a1b2/bitstreams?name=123_1.pdf' + m.post(url_1, json=b_json_1) + b_json_2 = {'uuid': 'e5f6'} + url_2 = 'mock://example.com/items/a1b2/bitstreams?name=123_2.pdf' + m.post(url_2, json=b_json_2) + yield m + + +@pytest.fixture(autouse=True) +def runner(): + return CliRunner() + + +@pytest.fixture(autouse=True) +def sample_content_1(tmp_path): + content = 'test' + dir = tmp_path / 'sub' + dir.mkdir() + sample_content = dir / '123_1.pdf' + sample_content.write_text(content) + return sample_content + + +@pytest.fixture(autouse=True) +def sample_content_2(tmp_path): + content = 'test' + dir = tmp_path / 'sub' + sample_content = dir / '123_2.pdf' + sample_content.write_text(content) + return sample_content diff --git a/tests/test_models.py b/tests/test_models.py index 49224cb..86be464 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,162 +1,78 @@ +import csv + import attr -import pytest import requests_mock from dsaps import models -@pytest.fixture -def client(): - client = models.Client('mock://example.com/') - client.header = {} - client.cookies = {} - client.user_full_name = '' - return client - - -@pytest.fixture -def sample_content_1(tmp_path): - content = 'test' - dir = tmp_path / 'sub' - dir.mkdir() - sample_content = dir / '123_1.pdf' - sample_content.write_text(content) - return sample_content - - -@pytest.fixture -def sample_content_2(tmp_path): - content = 'test' - dir = tmp_path / 'sub' - sample_content = dir / '123_2.pdf' - sample_content.write_text(content) - return sample_content - - def test_authenticate(client): """Test authenticate method.""" - with requests_mock.Mocker() as m: - email = 'test@test.mock' - password = '1234' - cookies = {'JSESSIONID': '11111111'} - user_json = {'fullname': 'User Name'} - m.post('mock://example.com/login', cookies=cookies) - m.get('mock://example.com/status', json=user_json) - client.authenticate(email, password) - assert client.user_full_name == 'User Name' - assert client.cookies == cookies + email = 'test@test.mock' + password = '1234' + client.authenticate(email, password) + assert client.user_full_name == 'User Name' + assert client.cookies == {'JSESSIONID': '11111111'} def test_get_record(client): """Test get_record method.""" - with requests_mock.Mocker() as m: - uri = 'mock://example.com/items/123?expand=all' - rec_json = {'metadata': {'title': 'Sample title'}, 'type': 'item'} - m.get(uri, json=rec_json) - rec_obj = client.get_record('123', 'items') - assert attr.asdict(rec_obj)['metadata'] == rec_json['metadata'] + rec_obj = client.get_record('123', 'items') + assert attr.asdict(rec_obj)['metadata'] == {'title': 'Sample title'} def test_filtered_item_search(client): """Test filtered_item_search method.""" - with requests_mock.Mocker() as m: - key = 'dc.title' - string = 'test' - query_type = 'contains' - endpoint = 'mock://example.com/filtered-items?' - results_json1 = {'items': [{'link': '1234'}]} - results_json2 = {'items': []} - m.get(endpoint, [{'json': results_json1}, {'json': results_json2}]) - - item_links = client.filtered_item_search(key, string, query_type, - selected_collections='') - assert '1234' in item_links + key = 'dc.title' + string = 'test' + query_type = 'contains' + item_links = client.filtered_item_search(key, string, query_type, + selected_collections='') + assert '1234' in item_links def test_get_id_from_handle(client): """Test get_id_from_handle method.""" - with requests_mock.Mocker() as m: - handle = 'mock://example.com/handle/111.1111' - rec_json = {'uuid': '123'} - m.get(handle, json=rec_json) - id = client.get_id_from_handle('111.1111') - assert id == '123' + id = client.get_id_from_handle('111.1111') + assert id == '123' def test_post_coll_to_comm(client): """Test post_coll_to_comm method.""" - with requests_mock.Mocker() as m: - comm_handle = '1234' - coll_name = 'Test Collection' - comm_json = {'uuid': 'a1b2'} - coll_json = {'uuid': '5678'} - m.get('mock://example.com/handle/1234', json=comm_json) - m.post('mock://example.com/communities/a1b2/collections', - json=coll_json) - coll_id = client.post_coll_to_comm(comm_handle, coll_name) - assert coll_id == '5678' + comm_handle = '1234' + coll_name = 'Test Collection' + coll_id = client.post_coll_to_comm(comm_handle, coll_name) + assert coll_id == '5678' def test_post_items_to_coll(client, sample_content_1): """Test post_items_to_coll method.""" - with requests_mock.Mocker() as m: - coll_metadata = [{"metadata": [ - {"key": "file_identifier", - "value": "123"}, - {"key": "dc.title", "value": - "Monitoring Works: Getting Teachers", - "language": "en_US"}, - {"key": "dc.relation.isversionof", - "value": "repo/0/ao/123"}]}] - coll_id = '789' - ingest_type = 'local' - file_dict = {'123': sample_content_1} - item_json = {'uuid': 'a1b2', 'handle': '1111.1/1111'} - m.post('mock://example.com/collections/789/items', json=item_json) - url = 'mock://example.com/items/a1b2/bitstreams?name=123_1.pdf' - b_json = {'uuid': 'c3d4'} - m.post(url, json=b_json) - item_ids = client.post_items_to_coll(coll_id, coll_metadata, file_dict, - ingest_type) - for item_id in item_ids: - assert 'a1b2' == item_id + coll_metadata = [{"metadata": [ + {"key": "file_identifier", + "value": "123"}, + {"key": "dc.title", "value": + "Monitoring Works: Getting Teachers", + "language": "en_US"}, + {"key": "dc.relation.isversionof", + "value": "repo/0/ao/123"}]}] + coll_id = '789' + ingest_type = 'local' + file_dict = {'123': sample_content_1} + item_ids = client.post_items_to_coll(coll_id, coll_metadata, file_dict, + ingest_type) + for item_id in item_ids: + assert 'a1b2' == item_id def test_post_bitstreams_to_item(client, sample_content_1, sample_content_2): """Test post_bitstreams_to_item method.""" - with requests_mock.Mocker() as m: - item_id = 'a1b2' - ingest_type = 'local' - file_identifier = '123' - file_dict = {'123_2': sample_content_2, '123_1': sample_content_1} - b_json_1 = {'uuid': 'c3d4'} - url_1 = 'mock://example.com/items/a1b2/bitstreams?name=123_1.pdf' - m.post(url_1, json=b_json_1) - b_json_2 = {'uuid': 'e5f6'} - url_2 = 'mock://example.com/items/a1b2/bitstreams?name=123_2.pdf' - m.post(url_2, json=b_json_2) - bit_ids = client.post_bitstreams_to_item(item_id, file_identifier, - file_dict, ingest_type) - bit_ids_output = [] - for bit_id in bit_ids: - bit_ids_output.append(bit_id) - assert bit_ids_output[0] == 'c3d4' - assert bit_ids_output[1] == 'e5f6' - - -def test_post_bitstream(client, sample_content_1): - """Test post_bitstream method.""" - with requests_mock.Mocker() as m: - item_id = 'a1b2' - ingest_type = 'local' - file_identifier = '123' - file_dict = {'123': sample_content_1} - b_json = {'uuid': 'c3d4'} - url = 'mock://example.com/items/a1b2/bitstreams?name=123_1.pdf' - bitstream = '123' - m.post(url, json=b_json) - bit_id = client.post_bitstream(item_id, file_identifier, file_dict, - ingest_type, bitstream) + item_id = 'a1b2' + ingest_type = 'local' + file_identifier = '123' + file_dict = {'123': sample_content_1} + bit_ids = client.post_bitstreams_to_item(item_id, file_identifier, + file_dict, ingest_type) + for bit_id in bit_ids: assert 'c3d4' == bit_id @@ -194,10 +110,15 @@ def test_build_file_dict_remote(): assert '999' in file_list -# # How to test this? Applies to asaps as well -# def test_create_csv_from_list(): -# """Test create_csv_from_list function.""" -# assert False +def test_create_csv_from_list(runner): + """Test create_csv_from_list function.""" + with runner.isolated_filesystem(): + list_name = ['123'] + models.create_csv_from_list(list_name, 'output') + with open('output.csv') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + assert row['id'] == '123' def test_metadata_elems_from_row(): From 4c1d830b41ca94fc3c335fa9c5ad5da046cf6a6d Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Tue, 16 Mar 2021 12:20:53 -0400 Subject: [PATCH 16/22] reconcile refactor --- dsaps/cli.py | 32 ++--------------- dsaps/workflows.py | 79 +++++++++++++++++++++++++++++++++++++++++ tests/conftest.py | 37 +++++++++---------- tests/test_cli.py | 30 ++++++++++++++++ tests/test_models.py | 28 +++++++++++---- tests/test_workflows.py | 79 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 231 insertions(+), 54 deletions(-) create mode 100644 dsaps/workflows.py create mode 100644 tests/test_cli.py create mode 100644 tests/test_workflows.py diff --git a/dsaps/cli.py b/dsaps/cli.py index 92a7145..8f3390c 100644 --- a/dsaps/cli.py +++ b/dsaps/cli.py @@ -9,7 +9,7 @@ import click import structlog -from dsaps import models +from dsaps import models, workflows logger = structlog.get_logger() @@ -96,35 +96,7 @@ def newcoll(ctx, comm_handle, coll_name, metadata, file_path, file_type, @click.option('-t', '--file_type', prompt='Enter the file type', help='The file type to be uploaded.') def reconcile(metadata_csv, file_path, file_type): - if file_path.startswith('http'): - file_dict = models.build_file_dict_remote(file_path, file_type, {}) - else: - files = glob.glob(f'{file_path}/**/*.{file_type}', recursive=True) - for file in files: - file_name = os.path.splitext(os.path.basename(file))[0] - file_dict[file_name] = file - metadata_ids = [] - with open(metadata_csv) as csvfile: - reader = csv.DictReader(csvfile) - for row in reader: - value = row['file_identifier'] - metadata_ids.append(value) - file_matches = [] - file_ids = [] - for file_id, v in file_dict.items(): - file_ids.append(file_id) - for metadata_id in [m for m in metadata_ids if file_id == m]: - file_matches.append(file_id) - metadata_matches = [] - for metadata_id in metadata_ids: - for file_id in file_dict: - if file_id == metadata_id: - metadata_matches.append(metadata_id) - no_files = set(metadata_ids) - set(metadata_matches) - no_metadata = set(file_ids) - set(file_matches) - models.create_csv_from_list(no_metadata, 'no_metadata.csv') - models.create_csv_from_list(no_files, 'no_files.csv') - models.create_csv_from_list(metadata_matches, 'metadata_matches.csv') + workflows.reconcile_files_and_metadata(metadata_csv, file_path, file_type) @main.command() diff --git a/dsaps/workflows.py b/dsaps/workflows.py new file mode 100644 index 0000000..ffc43c7 --- /dev/null +++ b/dsaps/workflows.py @@ -0,0 +1,79 @@ +import csv +import glob +import os + +from dsaps import models + + +def create_file_dict_and_list(file_path, file_type): + """Creates a dict of file IDs and file paths and a list of file IDs.""" + if file_path.startswith('http'): + file_dict = models.build_file_dict_remote(file_path, file_type, {}) + else: + files = glob.glob(f'{file_path}/**/*.{file_type}', recursive=True) + file_dict = {} + file_ids = [] + for file in files: + file_name = os.path.splitext(os.path.basename(file))[0] + file_dict[file_name] = file + file_ids.append(file_name) + return file_dict, file_ids + + +def create_metadata_id_list(metadata_csv): + """Creates a list of IDs from a metadata CSV""" + metadata_ids = [] + with open(metadata_csv) as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + value = row['file_identifier'] + metadata_ids.append(value) + return metadata_ids + + +def match_files_to_metadata(file_dict, file_ids, metadata_ids): + """Creates a list of files matched to metadata records.""" + file_matches = [] + for file_id, v in file_dict.items(): + for metadata_id in [m for m in metadata_ids + if file_id.startswith(m)]: + file_matches.append(file_id) + return file_matches + + +def match_metadata_to_files(file_dict, metadata_ids): + """Creates a list of metadata records matched to files.""" + metadata_matches = [] + for metadata_id in metadata_ids: + for file_id in file_dict: + if file_id.startswith(metadata_id): + metadata_matches.append(metadata_id) + return metadata_matches + + +def reconcile_files_and_metadata(metadata_csv, file_path, file_type): + """Runs a reconciliation of files and metadata.""" + file_dict, file_ids = create_file_dict_and_list(file_path, file_type) + metadata_ids = create_metadata_id_list(metadata_csv) + metadata_matches = match_metadata_to_files(file_dict, metadata_ids) + file_matches = match_files_to_metadata(file_dict, file_ids, metadata_ids) + no_files = set(metadata_ids) - set(metadata_matches) + no_metadata = set(file_ids) - set(file_matches) + models.create_csv_from_list(no_metadata, 'no_metadata') + models.create_csv_from_list(no_files, 'no_files') + models.create_csv_from_list(metadata_matches, 'metadata_matches') + update_metadata_csv(metadata_csv, metadata_matches) + + +def update_metadata_csv(metadata_csv, metadata_matches): + """Creates an updated CSV of metadata records with matching files.""" + with open(metadata_csv) as csvfile: + reader = csv.DictReader(csvfile) + upd_md_file_name = f'updated-{os.path.basename(metadata_csv)}' + with open(f'{upd_md_file_name}', 'w') as updated_csv: + writer = csv.DictWriter(updated_csv, fieldnames=reader.fieldnames) + writer.writeheader() + csvfile.seek(0) + for row in reader: + if row['file_identifier'] in metadata_matches: + writer.writerow(row) diff --git a/tests/conftest.py b/tests/conftest.py index d115826..e695383 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,3 +1,5 @@ +import csv + from click.testing import CliRunner import pytest import requests_mock @@ -37,10 +39,10 @@ def ds_mock(): item_json = {'uuid': 'a1b2', 'handle': '1111.1/1111'} m.post('mock://example.com/collections/789/items', json=item_json) b_json_1 = {'uuid': 'c3d4'} - url_1 = 'mock://example.com/items/a1b2/bitstreams?name=123_1.pdf' + url_1 = 'mock://example.com/items/a1b2/bitstreams?name=test_01.pdf' m.post(url_1, json=b_json_1) b_json_2 = {'uuid': 'e5f6'} - url_2 = 'mock://example.com/items/a1b2/bitstreams?name=123_2.pdf' + url_2 = 'mock://example.com/items/a1b2/bitstreams?name=test_02.pdf' m.post(url_2, json=b_json_2) yield m @@ -51,19 +53,18 @@ def runner(): @pytest.fixture(autouse=True) -def sample_content_1(tmp_path): - content = 'test' - dir = tmp_path / 'sub' - dir.mkdir() - sample_content = dir / '123_1.pdf' - sample_content.write_text(content) - return sample_content - - -@pytest.fixture(autouse=True) -def sample_content_2(tmp_path): - content = 'test' - dir = tmp_path / 'sub' - sample_content = dir / '123_2.pdf' - sample_content.write_text(content) - return sample_content +def sample_files_dir(tmp_path): + sample_files_dir = tmp_path / 'files' + sample_files_dir.mkdir() + with open(f'{sample_files_dir}/test_01.pdf', 'w'): + pass + with open(f'{sample_files_dir}/test_02.pdf', 'w'): + pass + with open(f'{sample_files_dir}/best_01.pdf', 'w'): + pass + with open(f'{sample_files_dir}/metadata.csv', 'w') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['uri'] + ['title'] + ['file_identifier']) + writer.writerow(['/repo/0/ao/123'] + ['Test Item'] + ['test']) + writer.writerow(['/repo/0/ao/456'] + ['Tast Item'] + ['tast']) + return str(sample_files_dir) diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..ca4bdf1 --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,30 @@ +import csv +import os +import requests_mock + +from dsaps.cli import main + + +def test_reconcile(runner): + """Test reconcile command.""" + with requests_mock.Mocker() as m: + with runner.isolated_filesystem(): + os.mkdir('files') + with open('metadata.csv', 'w') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['uri'] + ['title'] + ['file_identifier']) + writer.writerow(['/repo/0/ao/123'] + ['Test Item'] + ['test']) + cookies = {'JSESSIONID': '11111111'} + user_json = {'fullname': 'User Name'} + m.post('mock://example.com/login', cookies=cookies) + m.get('mock://example.com/status', json=user_json) + result = runner.invoke(main, + ['--url', 'mock://example.com/', + '--email', 'test@test.mock', + '--password', '1234', + 'reconcile', + '--metadata_csv', 'metadata.csv', + '--file_path', 'files', + '--file_type', 'pdf' + ]) + assert result.exit_code == 0 diff --git a/tests/test_models.py b/tests/test_models.py index 86be464..830349b 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -45,11 +45,11 @@ def test_post_coll_to_comm(client): assert coll_id == '5678' -def test_post_items_to_coll(client, sample_content_1): +def test_post_items_to_coll(client, sample_files_dir): """Test post_items_to_coll method.""" coll_metadata = [{"metadata": [ {"key": "file_identifier", - "value": "123"}, + "value": "test"}, {"key": "dc.title", "value": "Monitoring Works: Getting Teachers", "language": "en_US"}, @@ -57,23 +57,39 @@ def test_post_items_to_coll(client, sample_content_1): "value": "repo/0/ao/123"}]}] coll_id = '789' ingest_type = 'local' - file_dict = {'123': sample_content_1} + file_dict = {'test_01': f'{sample_files_dir}/test_01.pdf'} item_ids = client.post_items_to_coll(coll_id, coll_metadata, file_dict, ingest_type) for item_id in item_ids: assert 'a1b2' == item_id -def test_post_bitstreams_to_item(client, sample_content_1, sample_content_2): +def test_post_bitstreams_to_item(client, sample_files_dir): """Test post_bitstreams_to_item method.""" item_id = 'a1b2' ingest_type = 'local' file_identifier = '123' - file_dict = {'123': sample_content_1} + file_dict = {'test_02': f'{sample_files_dir}/test_02.pdf', + 'test_01': f'{sample_files_dir}/test_01.pdf'} bit_ids = client.post_bitstreams_to_item(item_id, file_identifier, file_dict, ingest_type) + bit_ids_output = [] for bit_id in bit_ids: - assert 'c3d4' == bit_id + bit_ids_output.append(bit_id) + assert bit_ids_output[0] == 'c3d4' + assert bit_ids_output[1] == 'e5f6' + + +def test_post_bitstream(client, sample_files_dir): + """Test post_bitstream method.""" + item_id = 'a1b2' + ingest_type = 'local' + file_identifier = '123' + file_dict = {'test_01': f'{sample_files_dir}/test_01.pdf'} + bitstream = 'test_01' + bit_id = client.post_bitstream(item_id, file_identifier, file_dict, + ingest_type, bitstream) + assert 'c3d4' == bit_id def test__pop_inst(client): diff --git a/tests/test_workflows.py b/tests/test_workflows.py new file mode 100644 index 0000000..360d286 --- /dev/null +++ b/tests/test_workflows.py @@ -0,0 +1,79 @@ +import csv + +from dsaps import workflows + + +def test_create_file_dict_and_id_list(runner, sample_files_dir): + """Test create_file_dict_and_id_list function.""" + file_path = sample_files_dir + file_dict, file_ids = workflows.create_file_dict_and_list(sample_files_dir, + 'pdf') + assert file_dict['test_02'] == f'{file_path}/test_02.pdf' + assert file_dict['test_01'] == f'{file_path}/test_01.pdf' + assert file_dict['best_01'] == f'{file_path}/best_01.pdf' + for id in ['test_02', 'test_01', 'best_01']: + assert id in file_ids + + +def test_create_metadata_id_list(runner, sample_files_dir): + """Test create_metadata_id_list function.""" + metadata_path = f'{sample_files_dir}/metadata.csv' + metadata_ids = workflows.create_metadata_id_list(metadata_path) + assert 'test' in metadata_ids + + +def test_match_files_to_metadata(): + """Test match_files_to_metadata function.""" + file_dict = {'test_01': 'files/test_01.pdf'} + file_ids = ['test_01'] + metadata_ids = ['test', 'tast'] + file_matches = workflows.match_files_to_metadata(file_dict, file_ids, + metadata_ids) + assert len(file_matches) == 1 + assert 'test_01' in file_matches + + +def test_match_metadata_to_files(): + """Test match_metadata_to_files function.""" + file_dict = {'test_01': 'files/test_01.pdf', + 'tast_01': 'files/tast_01.pdf'} + metadata_ids = ['test'] + file_matches = workflows.match_metadata_to_files(file_dict, metadata_ids) + assert len(file_matches) == 1 + assert 'test' in file_matches + + +def test_reconcile_files_and_metadata(runner, sample_files_dir): + """Test reconcile function.""" + with runner.isolated_filesystem(): + metadata_path = f'{sample_files_dir}/metadata.csv' + workflows.reconcile_files_and_metadata(metadata_path, sample_files_dir, + 'pdf') + with open('updated-metadata.csv') as csvfile2: + reader = csv.DictReader(csvfile2) + for row in reader: + assert row['uri'] == '/repo/0/ao/123' + assert row['title'] == 'Test Item' + assert row['file_identifier'] == 'test' + with open('no_metadata.csv') as csvfile3: + reader = csv.DictReader(csvfile3) + for row in reader: + assert row['id'] == 'best_01' + with open('no_files.csv') as csvfile4: + reader = csv.DictReader(csvfile4) + for row in reader: + assert row['id'] == 'tast' + + +def test_update_metadata_csv(runner, sample_files_dir): + """Test update_metadata_csv function.""" + with runner.isolated_filesystem(): + metadata_matches = ['test'] + workflows.update_metadata_csv(f'{sample_files_dir}/metadata.csv', + metadata_matches) + with open('updated-metadata.csv') as csvfile2: + reader = csv.DictReader(csvfile2) + for row in reader: + assert row['uri'] == '/repo/0/ao/123' + assert row['title'] == 'Test Item' + assert row['file_identifier'] == 'test' From c8d52b6617d38d7678262a74688b83f61964f301 Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Thu, 18 Mar 2021 12:00:29 -0400 Subject: [PATCH 17/22] PR updates --- dsaps/cli.py | 11 ++++-- dsaps/models.py | 6 +-- dsaps/workflows.py | 38 +++++++++---------- tests/conftest.py | 36 ++++++++++++------ tests/test_cli.py | 37 ++++++------------ tests/test_models.py | 44 ++++++++++------------ tests/test_workflows.py | 83 +++++++++++++++++++---------------------- 7 files changed, 124 insertions(+), 131 deletions(-) diff --git a/dsaps/cli.py b/dsaps/cli.py index 8f3390c..ed46716 100644 --- a/dsaps/cli.py +++ b/dsaps/cli.py @@ -91,12 +91,17 @@ def newcoll(ctx, comm_handle, coll_name, metadata, file_path, file_type, @main.command() @click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file', help='The path of the CSV file of metadata.') +@click.option('-o', '--output_path', prompt='Enter the output path', + default='', help='The path of the output files, include ' + '/ at the end of the path') @click.option('-f', '--file_path', prompt='Enter the path', - help='The path of the content, a URL or local drive path.') + help='The path of the content, a URL or local drive path.' + 'Include / at the end of a local drive path.') @click.option('-t', '--file_type', prompt='Enter the file type', help='The file type to be uploaded.') -def reconcile(metadata_csv, file_path, file_type): - workflows.reconcile_files_and_metadata(metadata_csv, file_path, file_type) +def reconcile(metadata_csv, file_path, file_type, output_path): + workflows.reconcile_files_and_metadata(metadata_csv, output_path, + file_path, file_type) @main.command() diff --git a/dsaps/models.py b/dsaps/models.py index 49be4fe..e389a7c 100644 --- a/dsaps/models.py +++ b/dsaps/models.py @@ -128,11 +128,11 @@ def post_bitstreams_to_item(self, item_id, file_identifier, file_dict, """Post a sorted set of bitstreams to a specified item.""" file_dict = collections.OrderedDict(sorted(file_dict.items())) for bitstream, v in file_dict.items(): - bit_id = self.post_bitstream(item_id, file_identifier, file_dict, - ingest_type, bitstream) + bit_id = self.post_bitstream(item_id, file_dict, ingest_type, + bitstream) yield bit_id - def post_bitstream(self, item_id, file_identifier, file_dict, ingest_type, + def post_bitstream(self, item_id, file_dict, ingest_type, bitstream): """Post a bitstream to a specified item.""" bitstream_path = file_dict[bitstream] diff --git a/dsaps/workflows.py b/dsaps/workflows.py index ffc43c7..21affd8 100644 --- a/dsaps/workflows.py +++ b/dsaps/workflows.py @@ -5,19 +5,17 @@ from dsaps import models -def create_file_dict_and_list(file_path, file_type): - """Creates a dict of file IDs and file paths and a list of file IDs.""" +def create_file_dict(file_path, file_type): + """Creates a dict of file IDs and file paths.""" if file_path.startswith('http'): file_dict = models.build_file_dict_remote(file_path, file_type, {}) else: files = glob.glob(f'{file_path}/**/*.{file_type}', recursive=True) file_dict = {} - file_ids = [] for file in files: file_name = os.path.splitext(os.path.basename(file))[0] file_dict[file_name] = file - file_ids.append(file_name) - return file_dict, file_ids + return file_dict def create_metadata_id_list(metadata_csv): @@ -31,7 +29,7 @@ def create_metadata_id_list(metadata_csv): return metadata_ids -def match_files_to_metadata(file_dict, file_ids, metadata_ids): +def match_files_to_metadata(file_dict, metadata_ids): """Creates a list of files matched to metadata records.""" file_matches = [] for file_id, v in file_dict.items(): @@ -45,35 +43,37 @@ def match_metadata_to_files(file_dict, metadata_ids): """Creates a list of metadata records matched to files.""" metadata_matches = [] for metadata_id in metadata_ids: - for file_id in file_dict: - if file_id.startswith(metadata_id): - metadata_matches.append(metadata_id) + for file_id in [f for f in file_dict + if f.startswith(metadata_id)]: + metadata_matches.append(metadata_id) return metadata_matches -def reconcile_files_and_metadata(metadata_csv, file_path, file_type): +def reconcile_files_and_metadata(metadata_csv, output_path, file_path, + file_type): """Runs a reconciliation of files and metadata.""" - file_dict, file_ids = create_file_dict_and_list(file_path, file_type) + file_dict = create_file_dict(file_path, file_type) + file_ids = file_dict.keys() metadata_ids = create_metadata_id_list(metadata_csv) metadata_matches = match_metadata_to_files(file_dict, metadata_ids) - file_matches = match_files_to_metadata(file_dict, file_ids, metadata_ids) + file_matches = match_files_to_metadata(file_dict, metadata_ids) no_files = set(metadata_ids) - set(metadata_matches) no_metadata = set(file_ids) - set(file_matches) - models.create_csv_from_list(no_metadata, 'no_metadata') - models.create_csv_from_list(no_files, 'no_files') - models.create_csv_from_list(metadata_matches, 'metadata_matches') - update_metadata_csv(metadata_csv, metadata_matches) + models.create_csv_from_list(no_metadata, f'{output_path}no_metadata') + models.create_csv_from_list(no_files, f'{output_path}no_files') + models.create_csv_from_list(metadata_matches, + f'{output_path}metadata_matches') + update_metadata_csv(metadata_csv, output_path, metadata_matches) -def update_metadata_csv(metadata_csv, metadata_matches): +def update_metadata_csv(metadata_csv, output_path, metadata_matches): """Creates an updated CSV of metadata records with matching files.""" with open(metadata_csv) as csvfile: reader = csv.DictReader(csvfile) upd_md_file_name = f'updated-{os.path.basename(metadata_csv)}' - with open(f'{upd_md_file_name}', 'w') as updated_csv: + with open(f'{output_path}{upd_md_file_name}', 'w') as updated_csv: writer = csv.DictWriter(updated_csv, fieldnames=reader.fieldnames) writer.writeheader() - csvfile.seek(0) for row in reader: if row['file_identifier'] in metadata_matches: writer.writerow(row) diff --git a/tests/conftest.py b/tests/conftest.py index e695383..2778674 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,7 +7,7 @@ from dsaps import models -@pytest.fixture(autouse=True) +@pytest.fixture() def client(): client = models.Client('mock://example.com/') client.header = {} @@ -17,7 +17,7 @@ def client(): @pytest.fixture(autouse=True) -def ds_mock(): +def web_mock(): with requests_mock.Mocker() as m: cookies = {'JSESSIONID': '11111111'} m.post('mock://example.com/login', cookies=cookies) @@ -44,27 +44,39 @@ def ds_mock(): b_json_2 = {'uuid': 'e5f6'} url_2 = 'mock://example.com/items/a1b2/bitstreams?name=test_02.pdf' m.post(url_2, json=b_json_2) + m.get('mock://remoteserver.com/files/test_01.pdf', content=b'') yield m -@pytest.fixture(autouse=True) +@pytest.fixture() def runner(): return CliRunner() -@pytest.fixture(autouse=True) -def sample_files_dir(tmp_path): - sample_files_dir = tmp_path / 'files' - sample_files_dir.mkdir() - with open(f'{sample_files_dir}/test_01.pdf', 'w'): +@pytest.fixture() +def input_dir(tmp_path): + input_dir = tmp_path / 'files' + input_dir.mkdir() + input_2nd_lvl = input_dir / 'more_files' + input_2nd_lvl.mkdir() + with open(f'{input_dir}/test_01.pdf', 'w'): pass - with open(f'{sample_files_dir}/test_02.pdf', 'w'): + with open(f'{input_2nd_lvl}/test_02.pdf', 'w'): pass - with open(f'{sample_files_dir}/best_01.pdf', 'w'): + with open(f'{input_dir}/best_01.pdf', 'w'): pass - with open(f'{sample_files_dir}/metadata.csv', 'w') as csvfile: + with open(f'{input_dir}/test_01.jpg', 'w'): + pass + with open(f'{input_dir}/metadata.csv', 'w') as csvfile: writer = csv.writer(csvfile) writer.writerow(['uri'] + ['title'] + ['file_identifier']) writer.writerow(['/repo/0/ao/123'] + ['Test Item'] + ['test']) writer.writerow(['/repo/0/ao/456'] + ['Tast Item'] + ['tast']) - return str(sample_files_dir) + return str(f'{input_dir}/') + + +@pytest.fixture() +def output_dir(tmp_path): + output_dir = tmp_path / 'output' + output_dir.mkdir() + return str(f'{output_dir}/') diff --git a/tests/test_cli.py b/tests/test_cli.py index ca4bdf1..42fdc0a 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,30 +1,17 @@ -import csv -import os -import requests_mock - from dsaps.cli import main -def test_reconcile(runner): +def test_reconcile(runner, input_dir, output_dir): """Test reconcile command.""" - with requests_mock.Mocker() as m: - with runner.isolated_filesystem(): - os.mkdir('files') - with open('metadata.csv', 'w') as csvfile: - writer = csv.writer(csvfile) - writer.writerow(['uri'] + ['title'] + ['file_identifier']) - writer.writerow(['/repo/0/ao/123'] + ['Test Item'] + ['test']) - cookies = {'JSESSIONID': '11111111'} - user_json = {'fullname': 'User Name'} - m.post('mock://example.com/login', cookies=cookies) - m.get('mock://example.com/status', json=user_json) - result = runner.invoke(main, - ['--url', 'mock://example.com/', - '--email', 'test@test.mock', - '--password', '1234', - 'reconcile', - '--metadata_csv', 'metadata.csv', - '--file_path', 'files', - '--file_type', 'pdf' - ]) + result = runner.invoke(main, + ['--url', 'mock://example.com/', + '--email', 'test@test.mock', + '--password', '1234', + 'reconcile', + '--metadata_csv', + f'{input_dir}/metadata.csv', + '--file_path', 'files', + '--file_type', 'pdf', + '--output_path', f'{output_dir}' + ]) assert result.exit_code == 0 diff --git a/tests/test_models.py b/tests/test_models.py index 830349b..6dae282 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -45,7 +45,7 @@ def test_post_coll_to_comm(client): assert coll_id == '5678' -def test_post_items_to_coll(client, sample_files_dir): +def test_post_items_to_coll(client, input_dir): """Test post_items_to_coll method.""" coll_metadata = [{"metadata": [ {"key": "file_identifier", @@ -57,38 +57,35 @@ def test_post_items_to_coll(client, sample_files_dir): "value": "repo/0/ao/123"}]}] coll_id = '789' ingest_type = 'local' - file_dict = {'test_01': f'{sample_files_dir}/test_01.pdf'} + file_dict = {'test_01': f'{input_dir}test_01.pdf'} item_ids = client.post_items_to_coll(coll_id, coll_metadata, file_dict, ingest_type) for item_id in item_ids: assert 'a1b2' == item_id -def test_post_bitstreams_to_item(client, sample_files_dir): +def test_post_bitstreams_to_item(client, input_dir): """Test post_bitstreams_to_item method.""" item_id = 'a1b2' ingest_type = 'local' file_identifier = '123' - file_dict = {'test_02': f'{sample_files_dir}/test_02.pdf', - 'test_01': f'{sample_files_dir}/test_01.pdf'} + file_dict = {'test_02': f'{input_dir}more_files/test_02.pdf', + 'test_01': f'{input_dir}test_01.pdf'} bit_ids = client.post_bitstreams_to_item(item_id, file_identifier, file_dict, ingest_type) - bit_ids_output = [] - for bit_id in bit_ids: - bit_ids_output.append(bit_id) - assert bit_ids_output[0] == 'c3d4' - assert bit_ids_output[1] == 'e5f6' + assert next(bit_ids) == 'c3d4' + assert next(bit_ids) == 'e5f6' -def test_post_bitstream(client, sample_files_dir): +def test_post_bitstream(client, input_dir): """Test post_bitstream method.""" item_id = 'a1b2' - ingest_type = 'local' - file_identifier = '123' - file_dict = {'test_01': f'{sample_files_dir}/test_01.pdf'} + file_dict = {'test_01': f'{input_dir}test_01.pdf'} bitstream = 'test_01' - bit_id = client.post_bitstream(item_id, file_identifier, file_dict, - ingest_type, bitstream) + bit_id = client.post_bitstream(item_id, file_dict, 'local', bitstream) + assert 'c3d4' == bit_id + file_dict = {'test_01': 'mock://remoteserver.com/files/test_01.pdf'} + bit_id = client.post_bitstream(item_id, file_dict, 'remote', bitstream) assert 'c3d4' == bit_id @@ -126,15 +123,14 @@ def test_build_file_dict_remote(): assert '999' in file_list -def test_create_csv_from_list(runner): +def test_create_csv_from_list(output_dir): """Test create_csv_from_list function.""" - with runner.isolated_filesystem(): - list_name = ['123'] - models.create_csv_from_list(list_name, 'output') - with open('output.csv') as csvfile: - reader = csv.DictReader(csvfile) - for row in reader: - assert row['id'] == '123' + list_name = ['123'] + models.create_csv_from_list(list_name, f'{output_dir}output') + with open(f'{output_dir}output.csv') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + assert row['id'] == '123' def test_metadata_elems_from_row(): diff --git a/tests/test_workflows.py b/tests/test_workflows.py index 360d286..e32f9b8 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -3,32 +3,27 @@ from dsaps import workflows -def test_create_file_dict_and_id_list(runner, sample_files_dir): - """Test create_file_dict_and_id_list function.""" - file_path = sample_files_dir - file_dict, file_ids = workflows.create_file_dict_and_list(sample_files_dir, - 'pdf') - assert file_dict['test_02'] == f'{file_path}/test_02.pdf' - assert file_dict['test_01'] == f'{file_path}/test_01.pdf' - assert file_dict['best_01'] == f'{file_path}/best_01.pdf' - for id in ['test_02', 'test_01', 'best_01']: - assert id in file_ids +def test_create_file_dict(input_dir): + """Test create_file_dict function.""" + file_dict = workflows.create_file_dict(input_dir, 'pdf') + assert file_dict['test_02'] == f'{input_dir}more_files/test_02.pdf' + assert file_dict['test_01'] == f'{input_dir}test_01.pdf' + assert file_dict['best_01'] == f'{input_dir}best_01.pdf' -def test_create_metadata_id_list(runner, sample_files_dir): +def test_create_metadata_id_list(input_dir): """Test create_metadata_id_list function.""" - metadata_path = f'{sample_files_dir}/metadata.csv' + metadata_path = f'{input_dir}metadata.csv' metadata_ids = workflows.create_metadata_id_list(metadata_path) assert 'test' in metadata_ids + assert 'tast' in metadata_ids def test_match_files_to_metadata(): """Test match_files_to_metadata function.""" file_dict = {'test_01': 'files/test_01.pdf'} - file_ids = ['test_01'] metadata_ids = ['test', 'tast'] - file_matches = workflows.match_files_to_metadata(file_dict, file_ids, - metadata_ids) + file_matches = workflows.match_files_to_metadata(file_dict, metadata_ids) assert len(file_matches) == 1 assert 'test_01' in file_matches @@ -43,37 +38,35 @@ def test_match_metadata_to_files(): assert 'test' in file_matches -def test_reconcile_files_and_metadata(runner, sample_files_dir): +def test_reconcile_files_and_metadata(input_dir, output_dir): """Test reconcile function.""" - with runner.isolated_filesystem(): - metadata_path = f'{sample_files_dir}/metadata.csv' - workflows.reconcile_files_and_metadata(metadata_path, sample_files_dir, - 'pdf') - with open('updated-metadata.csv') as csvfile2: - reader = csv.DictReader(csvfile2) - for row in reader: - assert row['uri'] == '/repo/0/ao/123' - assert row['title'] == 'Test Item' - assert row['file_identifier'] == 'test' - with open('no_metadata.csv') as csvfile3: - reader = csv.DictReader(csvfile3) - for row in reader: - assert row['id'] == 'best_01' - with open('no_files.csv') as csvfile4: - reader = csv.DictReader(csvfile4) - for row in reader: - assert row['id'] == 'tast' + metadata_path = f'{input_dir}metadata.csv' + workflows.reconcile_files_and_metadata(metadata_path, output_dir, + input_dir, 'pdf') + with open(f'{output_dir}updated-metadata.csv') as csvfile2: + reader = csv.DictReader(csvfile2) + for row in reader: + assert row['uri'] == '/repo/0/ao/123' + assert row['title'] == 'Test Item' + assert row['file_identifier'] == 'test' + with open(f'{output_dir}no_metadata.csv') as csvfile3: + reader = csv.DictReader(csvfile3) + for row in reader: + assert row['id'] == 'best_01' + with open(f'{output_dir}no_files.csv') as csvfile4: + reader = csv.DictReader(csvfile4) + for row in reader: + assert row['id'] == 'tast' -def test_update_metadata_csv(runner, sample_files_dir): +def test_update_metadata_csv(input_dir, output_dir): """Test update_metadata_csv function.""" - with runner.isolated_filesystem(): - metadata_matches = ['test'] - workflows.update_metadata_csv(f'{sample_files_dir}/metadata.csv', - metadata_matches) - with open('updated-metadata.csv') as csvfile2: - reader = csv.DictReader(csvfile2) - for row in reader: - assert row['uri'] == '/repo/0/ao/123' - assert row['title'] == 'Test Item' - assert row['file_identifier'] == 'test' + metadata_matches = ['test'] + workflows.update_metadata_csv(f'{input_dir}metadata.csv', output_dir, + metadata_matches) + with open(f'{output_dir}updated-metadata.csv') as csvfile2: + reader = csv.DictReader(csvfile2) + for row in reader: + assert row['uri'] == '/repo/0/ao/123' + assert row['title'] == 'Test Item' + assert row['file_identifier'] == 'test' From 4aa51821beaa8381a88ec29df2aac871ac6ba4d0 Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Fri, 9 Apr 2021 14:51:42 -0400 Subject: [PATCH 18/22] major refactor (#19) * major refactor * Restructure code examples * PR updates * Code review fixes Co-authored-by: Helen Bailey --- .gitignore | 3 + Pipfile.lock | 243 ++++++++-------- config/aspace_mapping.json | 17 ++ dsaps/cli.py | 172 +++++------ dsaps/{workflows.py => helpers.py} | 60 ++-- dsaps/metadata.py | 78 +++++ dsaps/models.py | 269 ++++++++---------- tests/conftest.py | 121 +++++--- tests/fixtures/aspace_metadata_delimited.csv | 3 + .../fixtures/aspace_metadata_no_delimiter.csv | 2 + tests/fixtures/metadata_delim.csv | 3 + tests/fixtures/metadata_num_col.csv | 2 + tests/fixtures/standard_mapping.json | 22 ++ tests/test_cli.py | 61 +++- tests/test_helpers.py | 97 +++++++ tests/test_metadata.py | 61 ++++ tests/test_models.py | 209 ++++++-------- tests/test_workflows.py | 72 ----- 18 files changed, 860 insertions(+), 635 deletions(-) create mode 100644 config/aspace_mapping.json rename dsaps/{workflows.py => helpers.py} (55%) create mode 100644 dsaps/metadata.py create mode 100644 tests/fixtures/aspace_metadata_delimited.csv create mode 100644 tests/fixtures/aspace_metadata_no_delimiter.csv create mode 100644 tests/fixtures/metadata_delim.csv create mode 100644 tests/fixtures/metadata_num_col.csv create mode 100644 tests/fixtures/standard_mapping.json create mode 100644 tests/test_helpers.py create mode 100644 tests/test_metadata.py delete mode 100644 tests/test_workflows.py diff --git a/.gitignore b/.gitignore index 28f8a70..92037cf 100644 --- a/.gitignore +++ b/.gitignore @@ -178,6 +178,9 @@ local/* !data/.keep .profile *.csv +!tests/fixtures/*.csv *.json +!config/*.json +!tests/fixtures/*.json createItemMetadataFromCSV_* *.txt diff --git a/Pipfile.lock b/Pipfile.lock index 2581cd3..33f1610 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,11 +1,11 @@ { "_meta": { "hash": { - "sha256": "f068635c34247a99e86662b9c499882fbb86089a20d7bd584193f44cae883b76" + "sha256": "49000269c1d938e1cf8e815b9f7d86df95693fb5fe1262e610db0b28739889b2" }, "pipfile-spec": 6, "requires": { - "python_version": "3.7" + "python_version": "3.8" }, "sources": [ { @@ -16,229 +16,230 @@ ] }, "default": { - "attr": { + "attrs": { "hashes": [ - "sha256:0b1aaddb85bd9e9c4bd75092f4440d6616ff40b0df0437f00771871670f7c9fd", - "sha256:9091548058d17f132596e61fa7518e504f76b9a4c61ca7d86e1f96dbf7d4775d" + "sha256:31b2eced602aa8423c2aea9c76a724617ed67cf9513173fd3a4f03e3a929c7e6", + "sha256:832aa3cde19744e49938b91fea06d69ecb9e649c93ba974535d08ad92164f700" ], "index": "pypi", - "version": "==0.3.1" + "version": "==20.3.0" }, "certifi": { "hashes": [ - "sha256:017c25db2a153ce562900032d5bc68e9f191e44e9a0f762f373977de9df1fbb3", - "sha256:25b64c7da4cd7479594d035c08c2d809eb4aab3a26e5a990ea98cc450c320f1f" + "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c", + "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830" ], - "version": "==2019.11.28" + "version": "==2020.12.5" }, "chardet": { "hashes": [ - "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", - "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691" + "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa", + "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5" ], - "version": "==3.0.4" + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==4.0.0" }, "click": { "hashes": [ - "sha256:2335065e6395b9e67ca716de5f7526736bfa6ceead690adf616d925bdc622b13", - "sha256:5b94b49521f6456670fdb30cd82a4eca9412788a93fa6dd6df72c94d5a8ff2d7" + "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a", + "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc" ], "index": "pypi", - "version": "==7.0" + "version": "==7.1.2" }, "idna": { "hashes": [ - "sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407", - "sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c" + "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6", + "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0" ], - "version": "==2.8" + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==2.10" }, "lxml": { "hashes": [ - "sha256:00ac0d64949fef6b3693813fe636a2d56d97a5a49b5bbb86e4cc4cc50ebc9ea2", - "sha256:0571e607558665ed42e450d7bf0e2941d542c18e117b1ebbf0ba72f287ad841c", - "sha256:0e3f04a7615fdac0be5e18b2406529521d6dbdb0167d2a690ee328bef7807487", - "sha256:13cf89be53348d1c17b453867da68704802966c433b2bb4fa1f970daadd2ef70", - "sha256:217262fcf6a4c2e1c7cb1efa08bd9ebc432502abc6c255c4abab611e8be0d14d", - "sha256:223e544828f1955daaf4cefbb4853bc416b2ec3fd56d4f4204a8b17007c21250", - "sha256:277cb61fede2f95b9c61912fefb3d43fbd5f18bf18a14fae4911b67984486f5d", - "sha256:3213f753e8ae86c396e0e066866e64c6b04618e85c723b32ecb0909885211f74", - "sha256:4690984a4dee1033da0af6df0b7a6bde83f74e1c0c870623797cec77964de34d", - "sha256:4fcc472ef87f45c429d3b923b925704aa581f875d65bac80f8ab0c3296a63f78", - "sha256:61409bd745a265a742f2693e4600e4dbd45cc1daebe1d5fad6fcb22912d44145", - "sha256:678f1963f755c5d9f5f6968dded7b245dd1ece8cf53c1aa9d80e6734a8c7f41d", - "sha256:6c6d03549d4e2734133badb9ab1c05d9f0ef4bcd31d83e5d2b4747c85cfa21da", - "sha256:6e74d5f4d6ecd6942375c52ffcd35f4318a61a02328f6f1bd79fcb4ffedf969e", - "sha256:7b4fc7b1ecc987ca7aaf3f4f0e71bbfbd81aaabf87002558f5bc95da3a865bcd", - "sha256:7ed386a40e172ddf44c061ad74881d8622f791d9af0b6f5be20023029129bc85", - "sha256:8f54f0924d12c47a382c600c880770b5ebfc96c9fd94cf6f6bdc21caf6163ea7", - "sha256:ad9b81351fdc236bda538efa6879315448411a81186c836d4b80d6ca8217cdb9", - "sha256:bbd00e21ea17f7bcc58dccd13869d68441b32899e89cf6cfa90d624a9198ce85", - "sha256:c3c289762cc09735e2a8f8a49571d0e8b4f57ea831ea11558247b5bdea0ac4db", - "sha256:cf4650942de5e5685ad308e22bcafbccfe37c54aa7c0e30cd620c2ee5c93d336", - "sha256:cfcbc33c9c59c93776aa41ab02e55c288a042211708b72fdb518221cc803abc8", - "sha256:e301055deadfedbd80cf94f2f65ff23126b232b0d1fea28f332ce58137bcdb18", - "sha256:ebbfe24df7f7b5c6c7620702496b6419f6a9aa2fd7f005eb731cc80d7b4692b9", - "sha256:eff69ddbf3ad86375c344339371168640951c302450c5d3e9936e98d6459db06", - "sha256:f6ed60a62c5f1c44e789d2cf14009423cb1646b44a43e40a9cf6a21f077678a1" + "sha256:079f3ae844f38982d156efce585bc540c16a926d4436712cf4baee0cce487a3d", + "sha256:0fbcf5565ac01dff87cbfc0ff323515c823081c5777a9fc7703ff58388c258c3", + "sha256:122fba10466c7bd4178b07dba427aa516286b846b2cbd6f6169141917283aae2", + "sha256:1b7584d421d254ab86d4f0b13ec662a9014397678a7c4265a02a6d7c2b18a75f", + "sha256:26e761ab5b07adf5f555ee82fb4bfc35bf93750499c6c7614bd64d12aaa67927", + "sha256:289e9ca1a9287f08daaf796d96e06cb2bc2958891d7911ac7cae1c5f9e1e0ee3", + "sha256:2a9d50e69aac3ebee695424f7dbd7b8c6d6eb7de2a2eb6b0f6c7db6aa41e02b7", + "sha256:33bb934a044cf32157c12bfcfbb6649807da20aa92c062ef51903415c704704f", + "sha256:3439c71103ef0e904ea0a1901611863e51f50b5cd5e8654a151740fde5e1cade", + "sha256:39b78571b3b30645ac77b95f7c69d1bffc4cf8c3b157c435a34da72e78c82468", + "sha256:4289728b5e2000a4ad4ab8da6e1db2e093c63c08bdc0414799ee776a3f78da4b", + "sha256:4bff24dfeea62f2e56f5bab929b4428ae6caba2d1eea0c2d6eb618e30a71e6d4", + "sha256:542d454665a3e277f76954418124d67516c5f88e51a900365ed54a9806122b83", + "sha256:5a0a14e264069c03e46f926be0d8919f4105c1623d620e7ec0e612a2e9bf1c04", + "sha256:66e575c62792c3f9ca47cb8b6fab9e35bab91360c783d1606f758761810c9791", + "sha256:74f7d8d439b18fa4c385f3f5dfd11144bb87c1da034a466c5b5577d23a1d9b51", + "sha256:7610b8c31688f0b1be0ef882889817939490a36d0ee880ea562a4e1399c447a1", + "sha256:76fa7b1362d19f8fbd3e75fe2fb7c79359b0af8747e6f7141c338f0bee2f871a", + "sha256:7728e05c35412ba36d3e9795ae8995e3c86958179c9770e65558ec3fdfd3724f", + "sha256:8157dadbb09a34a6bd95a50690595e1fa0af1a99445e2744110e3dca7831c4ee", + "sha256:820628b7b3135403540202e60551e741f9b6d3304371712521be939470b454ec", + "sha256:884ab9b29feaca361f7f88d811b1eea9bfca36cf3da27768d28ad45c3ee6f969", + "sha256:89b8b22a5ff72d89d48d0e62abb14340d9e99fd637d046c27b8b257a01ffbe28", + "sha256:92e821e43ad382332eade6812e298dc9701c75fe289f2a2d39c7960b43d1e92a", + "sha256:b007cbb845b28db4fb8b6a5cdcbf65bacb16a8bd328b53cbc0698688a68e1caa", + "sha256:bc4313cbeb0e7a416a488d72f9680fffffc645f8a838bd2193809881c67dd106", + "sha256:bccbfc27563652de7dc9bdc595cb25e90b59c5f8e23e806ed0fd623755b6565d", + "sha256:c4f05c5a7c49d2fb70223d0d5bcfbe474cf928310ac9fa6a7c6dddc831d0b1d4", + "sha256:ce256aaa50f6cc9a649c51be3cd4ff142d67295bfc4f490c9134d0f9f6d58ef0", + "sha256:d2e35d7bf1c1ac8c538f88d26b396e73dd81440d59c1ef8522e1ea77b345ede4", + "sha256:df7c53783a46febb0e70f6b05df2ba104610f2fb0d27023409734a3ecbb78fb2", + "sha256:efac139c3f0bf4f0939f9375af4b02c5ad83a622de52d6dfa8e438e8e01d0eb0", + "sha256:efd7a09678fd8b53117f6bae4fa3825e0a22b03ef0a932e070c0bdbb3a35e654", + "sha256:f2380a6376dfa090227b663f9678150ef27543483055cc327555fb592c5967e2", + "sha256:f8380c03e45cf09f8557bdaa41e1fa7c81f3ae22828e1db470ab2a6c96d8bc23", + "sha256:f90ba11136bfdd25cae3951af8da2e95121c9b9b93727b1b896e3fa105b2f586" ], "index": "pypi", - "version": "==4.4.2" + "version": "==4.6.3" }, "requests": { "hashes": [ - "sha256:11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4", - "sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31" + "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804", + "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e" ], "index": "pypi", - "version": "==2.22.0" - }, - "six": { - "hashes": [ - "sha256:1f1b7d42e254082a9db6279deae68afb421ceba6158efa6131de7b3003ee93fd", - "sha256:30f610279e8b2578cab6db20741130331735c781b56053c59c4076da27f06b66" - ], - "version": "==1.13.0" + "version": "==2.25.1" }, "structlog": { "hashes": [ - "sha256:4287058cf4ce1a59bc5dea290d6386d37f29a37529c9a51cdf7387e51710152b", - "sha256:6640e6690fc31d5949bc614c1a630464d3aaa625284aeb7c6e486c3010d73e12" + "sha256:62f06fc0ee32fb8580f0715eea66cb87271eb7efb0eaf9af6b639cba8981de47", + "sha256:d9d2d890532e8db83c6977a2a676fb1889922ff0c26ad4dc0ecac26f9fafbc57" ], "index": "pypi", - "version": "==19.2.0" + "version": "==21.1.0" }, "urllib3": { "hashes": [ - "sha256:a8a318824cc77d1fd4b2bec2ded92646630d7fe8619497b142c84a9e6f5a7293", - "sha256:f3c5fd51747d450d4dcf6f923c81f78f811aab8205fda64b0aba34a4e48b0745" + "sha256:2f4da4594db7e1e110a944bb1b551fdf4e6c136ad42e4234131391e21eb5b0df", + "sha256:e7b021f7241115872f92f43c6508082facffbd1c048e3c6e2bb9c2a157e28937" ], - "version": "==1.25.7" + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", + "version": "==1.26.4" } }, "develop": { "attrs": { "hashes": [ - "sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c", - "sha256:f7b7ce16570fe9965acd6d30101a28f62fb4a7f9e926b3bbc9b61f8b04247e72" + "sha256:31b2eced602aa8423c2aea9c76a724617ed67cf9513173fd3a4f03e3a929c7e6", + "sha256:832aa3cde19744e49938b91fea06d69ecb9e649c93ba974535d08ad92164f700" ], - "version": "==19.3.0" + "index": "pypi", + "version": "==20.3.0" }, "certifi": { "hashes": [ - "sha256:017c25db2a153ce562900032d5bc68e9f191e44e9a0f762f373977de9df1fbb3", - "sha256:25b64c7da4cd7479594d035c08c2d809eb4aab3a26e5a990ea98cc450c320f1f" + "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c", + "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830" ], - "version": "==2019.11.28" + "version": "==2020.12.5" }, "chardet": { "hashes": [ - "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", - "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691" + "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa", + "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5" ], - "version": "==3.0.4" + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==4.0.0" }, "idna": { "hashes": [ - "sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407", - "sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c" - ], - "version": "==2.8" - }, - "importlib-metadata": { - "hashes": [ - "sha256:073a852570f92da5f744a3472af1b61e28e9f78ccf0c9117658dc32b15de7b45", - "sha256:d95141fbfa7ef2ec65cfd945e2af7e5a6ddbd7c8d9a25e66ff3be8e3daf9f60f" + "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6", + "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0" ], - "markers": "python_version < '3.8'", - "version": "==1.3.0" + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==2.10" }, - "more-itertools": { + "iniconfig": { "hashes": [ - "sha256:b84b238cce0d9adad5ed87e745778d20a3f8487d0f0cb8b8a586816c7496458d", - "sha256:c833ef592a0324bcc6a60e48440da07645063c453880c9477ceb22490aec1564" + "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3", + "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32" ], - "version": "==8.0.2" + "version": "==1.1.1" }, "packaging": { "hashes": [ - "sha256:28b924174df7a2fa32c1953825ff29c61e2f5e082343165438812f00d3a7fc47", - "sha256:d9551545c6d761f3def1677baf08ab2a3ca17c56879e70fecba2fc4dde4ed108" + "sha256:5b327ac1320dc863dca72f4514ecc086f31186744b84a230374cc1fd776feae5", + "sha256:67714da7f7bc052e064859c05c595155bd1ee9f69f76557e21f051443c20947a" ], - "version": "==19.2" + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==20.9" }, "pluggy": { "hashes": [ "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0", "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d" ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==0.13.1" }, "py": { "hashes": [ - "sha256:64f65755aee5b381cea27766a3a147c3f15b9b6b9ac88676de66ba2ae36793fa", - "sha256:dc639b046a6e2cff5bbe40194ad65936d6ba360b52b3c3fe1d08a82dd50b5e53" + "sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3", + "sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a" ], - "version": "==1.8.0" + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.10.0" }, "pyparsing": { "hashes": [ - "sha256:20f995ecd72f2a1f4bf6b072b63b22e2eb457836601e76d6e5dfcd75436acc1f", - "sha256:4ca62001be367f01bd3e92ecbb79070272a9d4964dce6a48a82ff0b8bc7e683a" + "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1", + "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b" ], - "version": "==2.4.5" + "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==2.4.7" }, "pytest": { "hashes": [ - "sha256:6b571215b5a790f9b41f19f3531c53a45cf6bb8ef2988bc1ff9afb38270b25fa", - "sha256:e41d489ff43948babd0fad7ad5e49b8735d5d55e26628a58673c39ff61d95de4" + "sha256:671238a46e4df0f3498d1c3270e5deb9b32d25134c99b7d75370a68cfbe9b634", + "sha256:6ad9c7bdf517a808242b998ac20063c41532a570d088d77eec1ee12b0b5574bc" ], "index": "pypi", - "version": "==5.3.2" + "version": "==6.2.3" }, "requests": { "hashes": [ - "sha256:11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4", - "sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31" + "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804", + "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e" ], "index": "pypi", - "version": "==2.22.0" + "version": "==2.25.1" }, "requests-mock": { "hashes": [ - "sha256:510df890afe08d36eca5bb16b4aa6308a6f85e3159ad3013bac8b9de7bd5a010", - "sha256:88d3402dd8b3c69a9e4f9d3a73ad11b15920c6efd36bc27bf1f701cf4a8e4646" + "sha256:11215c6f4df72702aa357f205cf1e537cffd7392b3e787b58239bde5fb3db53b", + "sha256:e68f46844e4cee9d447150343c9ae875f99fa8037c6dcf5f15bf1fe9ab43d226" ], "index": "pypi", - "version": "==1.7.0" + "version": "==1.8.0" }, "six": { "hashes": [ - "sha256:1f1b7d42e254082a9db6279deae68afb421ceba6158efa6131de7b3003ee93fd", - "sha256:30f610279e8b2578cab6db20741130331735c781b56053c59c4076da27f06b66" - ], - "version": "==1.13.0" - }, - "urllib3": { - "hashes": [ - "sha256:a8a318824cc77d1fd4b2bec2ded92646630d7fe8619497b142c84a9e6f5a7293", - "sha256:f3c5fd51747d450d4dcf6f923c81f78f811aab8205fda64b0aba34a4e48b0745" + "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259", + "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced" ], - "version": "==1.25.7" + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.15.0" }, - "wcwidth": { + "toml": { "hashes": [ - "sha256:3df37372226d6e63e1b1e1eda15c594bca98a22d33a23832a90998faa96bc65e", - "sha256:f4ebe71925af7b40a864553f761ed559b43544f8f71746c2d756c7fe788ade7c" + "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", + "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f" ], - "version": "==0.1.7" + "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==0.10.2" }, - "zipp": { + "urllib3": { "hashes": [ - "sha256:3718b1cbcd963c7d4c5511a8240812904164b7f381b647143a89d3b98f9bcd8e", - "sha256:f06903e9f1f43b12d371004b4ac7b06ab39a44adc747266928ae6debfa7b3335" + "sha256:2f4da4594db7e1e110a944bb1b551fdf4e6c136ad42e4234131391e21eb5b0df", + "sha256:e7b021f7241115872f92f43c6508082facffbd1c048e3c6e2bb9c2a157e28937" ], - "version": "==0.6.0" + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", + "version": "==1.26.4" } } } diff --git a/config/aspace_mapping.json b/config/aspace_mapping.json new file mode 100644 index 0000000..8572479 --- /dev/null +++ b/config/aspace_mapping.json @@ -0,0 +1,17 @@ +{ + "file_identifier": { + "csv_field_name": "file_identifier", + "language": null, + "delimiter": "" + }, + "dc.title": { + "csv_field_name": "title", + "language": "en_US", + "delimiter": "" + }, + "dc.relation.isversionof": { + "csv_field_name": "uri", + "language": null, + "delimiter": "" + } +} \ No newline at end of file diff --git a/dsaps/cli.py b/dsaps/cli.py index ed46716..e045a55 100644 --- a/dsaps/cli.py +++ b/dsaps/cli.py @@ -1,6 +1,5 @@ import csv import datetime -import glob import json import logging import os @@ -9,18 +8,18 @@ import click import structlog -from dsaps import models, workflows +from dsaps.models import Client, Collection +from dsaps import helpers logger = structlog.get_logger() -@click.group() -@click.option('--url', envvar='DSPACE_URL') -@click.option('-e', '--email', prompt='Enter email', +@click.group(chain=True) +@click.option('--url', envvar='DSPACE_URL', required=True,) +@click.option('-e', '--email', envvar='TEST_EMAIL', required=True, help='The email of the user for authentication.') -@click.option('-p', '--password', prompt='Enter password', - envvar='TEST_PASS', hide_input=True, - help='The password for authentication.') +@click.option('-p', '--password', envvar='TEST_PASS', required=True, + hide_input=True, help='The password for authentication.') @click.pass_context def main(ctx, url, email, password): ctx.obj = {} @@ -42,106 +41,93 @@ def main(ctx, url, email, password): 'w')], level=logging.INFO) logger.info('Application start') - client = models.Client(url) + client = Client(url) client.authenticate(email, password) start_time = time.time() ctx.obj['client'] = client ctx.obj['start_time'] = start_time + ctx.obj['log_suffix'] = log_suffix @main.command() -@click.option('-c', '--comm_handle', prompt='Enter the community handle', - help='The handle of the community in which to create the ,' - 'collection.') -@click.option('-n', '--coll_name', prompt='Enter the name of the collection', - help='The name of the collection to be created.') -@click.option('-m', '--metadata', prompt='Enter the path of the metadata file', - help='The path of the JSON file of metadata.') -@click.option('-f', '--file_path', prompt='Enter the path', - help='The path of the content, a URL or local drive path.') -@click.option('-t', '--file_type', prompt='Enter the file type', - help='The file type to be uploaded.') -@click.option('-i', '--ingest_type', prompt='Enter the type of ingest', - help='The type of ingest to perform: local, remote.', - type=click.Choice(['local', 'remote'])) +@click.option('-m', '--metadata-csv', required=True, + type=click.Path(exists=True), + help='The full path to the CSV file of metadata for the items.') +@click.option('--field-map', required=True, type=click.Path(exists=True), + help='Path to JSON field mapping file') +@click.option('-d', '--directory', required=True, + help='The full path to the content, either a directory of files ' + 'or a URL for the storage location.') +@click.option('-t', '--file-type', + help='The file type to be uploaded, if limited to one file ' + 'type.', default='*') +@click.option('-r', '--ingest-report', is_flag=True, + help='Create ingest report for updating other systems.') +@click.option('-c', '--collection-handle', + help='The handle of the collection to which items are being ' + 'added.', default=None) @click.pass_context -def newcoll(ctx, comm_handle, coll_name, metadata, file_path, file_type, - ingest_type): +def additems(ctx, metadata_csv, field_map, directory, file_type, ingest_report, + collection_handle): + """Adds items to a specified collection from a metadata CSV, a field + mapping file, and a directory of files. May be run in conjunction with the + newcollection CLI commands.""" client = ctx.obj['client'] start_time = ctx.obj['start_time'] - with open(metadata, encoding='UTF-8') as fp: - coll_metadata = json.load(fp) - coll_id = client.post_coll_to_comm(comm_handle, coll_name) - file_dict = {} - if ingest_type == 'local': - files = glob.glob(f'{file_path}/**/*.{file_type}', recursive=True) - for file in files: - file_name = os.path.splitext(os.path.basename(file))[0] - file_dict[file_name] = file - elif ingest_type == 'remote': - file_dict = models.build_file_dict_remote(file_path, file_type, - file_dict) - items = client.post_items_to_coll(coll_id, coll_metadata, file_dict, - ingest_type) - for item in items: - logger.info(f'Item posted: {item}') - models.elapsed_time(start_time, 'Total runtime:') + if 'collection_uuid' not in ctx.obj and collection_handle is None: + raise click.UsageError('collection_handle option must be used or ' + 'additems must be run after newcollection ' + 'command.') + elif 'collection_uuid' in ctx.obj: + collection_uuid = ctx.obj['collection_uuid'] + else: + collection_uuid = client.get_uuid_from_handle(collection_handle) + with open(metadata_csv, 'r') as csvfile, open(field_map, 'r') as jsonfile: + metadata = csv.DictReader(csvfile) + mapping = json.load(jsonfile) + collection = Collection.from_csv(metadata, mapping) + for item in collection.items: + item.bitstreams_from_directory(directory, file_type) + collection.uuid = collection_uuid + items = collection.post_items(client) + if ingest_report: + report_name = metadata_csv.replace('.csv', '-ingest.csv') + helpers.create_ingest_report(items, report_name) + elapsed_time = datetime.timedelta(seconds=time.time() - start_time) + logger.info(f'Total runtime : {elapsed_time}') @main.command() -@click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file', - help='The path of the CSV file of metadata.') -@click.option('-o', '--output_path', prompt='Enter the output path', - default='', help='The path of the output files, include ' - '/ at the end of the path') -@click.option('-f', '--file_path', prompt='Enter the path', - help='The path of the content, a URL or local drive path.' - 'Include / at the end of a local drive path.') -@click.option('-t', '--file_type', prompt='Enter the file type', - help='The file type to be uploaded.') -def reconcile(metadata_csv, file_path, file_type, output_path): - workflows.reconcile_files_and_metadata(metadata_csv, output_path, - file_path, file_type) +@click.option('-c', '--community-handle', required=True, + help='The handle of the community in which to create the ,' + 'collection.') +@click.option('-n', '--collection-name', required=True, + help='The name of the collection to be created.') +@click.pass_context +def newcollection(ctx, community_handle, collection_name): + """Posts a new collection to a specified community. Used in conjunction + with the additems CLI command to populate the new collection with + items.""" + client = ctx.obj['client'] + collection_uuid = client.post_coll_to_comm(community_handle, + collection_name) + ctx.obj['collection_uuid'] = collection_uuid -@main.command() -@click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file', - help='The path of the CSV file of metadata.') -def metadatajson(metadata_csv): - with open(metadata_csv) as csvfile: - reader = csv.DictReader(csvfile) - metadata_group = [] - mapping_dict = {'fileIdentifier': ['file_identifier'], - 'dc.contributor.author': ['author name - direct'], - 'dc.contributor.advisor': ['supervisor(s)'], - 'dc.date.issued': ['pub date'], - 'dc.description.abstract': ['Abstract', 'en_US'], - 'dc.title': ['Title', 'en_US'], - 'dc.relation.ispartofseries': ['file_identifier']} - for row in reader: - metadata_rec = [] - metadata_rec = models.create_metadata_rec(mapping_dict, row, - metadata_rec) - metadata_rec.append({'key': 'dc.format.mimetype', 'language': - 'en_US', 'value': 'application/pdf'}) - metadata_rec.append({'key': 'dc.language.iso', 'language': - 'en_US', 'value': 'en_US'}) - metadata_rec.append({'key': 'dc.publisher', 'language': 'en_US', - 'value': 'Massachusetts Institute of ' - 'Technology. Laboratory for Computer' - 'Science'}) - metadata_rec.append({'key': 'dc.rights', 'language': 'en_US', - 'value': 'Educational use permitted'}) - metadata_rec.append({'key': 'dc.rights.uri', 'language': 'en_US', - 'value': 'http://rightsstatements.org/vocab/' - 'InC-EDU/1.0/'}) - metadata_rec.append({'key': 'dc.type', 'language': 'en_US', - 'value': 'Technical Report'}) - item = {'metadata': metadata_rec} - metadata_group.append(item) - file_name = os.path.splitext(os.path.basename(metadata_csv))[0] - with open(f'{file_name}.json', 'w') as f: - json.dump(metadata_group, f) +# @main.command() +# @click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file', +# help='The path of the CSV file of metadata.') +# @click.option('-o', '--output_path', prompt='Enter the output path', +# default='', help='The path of the output files, include ' +# '/ at the end of the path') +# @click.option('-f', '--file_path', prompt='Enter the path', +# help='The path of the content, a URL or local drive path.' +# 'Include / at the end of a local drive path.') +# @click.option('-t', '--file_type', prompt='Enter the file type', +# help='The file type to be uploaded.') +# def reconcile(metadata_csv, file_path, file_type, output_path): +# workflows.reconcile_files_and_metadata(metadata_csv, output_path, +# file_path, file_type) if __name__ == '__main__': diff --git a/dsaps/workflows.py b/dsaps/helpers.py similarity index 55% rename from dsaps/workflows.py rename to dsaps/helpers.py index 21affd8..15dabc7 100644 --- a/dsaps/workflows.py +++ b/dsaps/helpers.py @@ -2,30 +2,49 @@ import glob import os -from dsaps import models +import structlog + + +logger = structlog.get_logger() + + +def create_csv_from_list(list_name, output): + """Creates CSV file from list content.""" + with open(f'{output}.csv', 'w') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['id']) + for item in list_name: + writer.writerow([item]) def create_file_dict(file_path, file_type): """Creates a dict of file IDs and file paths.""" - if file_path.startswith('http'): - file_dict = models.build_file_dict_remote(file_path, file_type, {}) - else: - files = glob.glob(f'{file_path}/**/*.{file_type}', recursive=True) - file_dict = {} - for file in files: - file_name = os.path.splitext(os.path.basename(file))[0] - file_dict[file_name] = file + files = glob.glob(f'{file_path}/**/*.{file_type}', recursive=True) + file_dict = {} + for file in files: + file_name = os.path.splitext(os.path.basename(file))[0] + file_dict[file_name] = file return file_dict +def create_ingest_report(items, file_name): + """Creates ingest report of other systems' identifiers with a newly created + DSpace handle.""" + with open(f'{file_name}', 'w') as writecsv: + writer = csv.writer(writecsv) + writer.writerow(['uri', 'link']) + for item in items: + writer.writerow([item.source_system_identifier] + + [f'https://hdl.handle.net/{item.handle}']) + + def create_metadata_id_list(metadata_csv): """Creates a list of IDs from a metadata CSV""" metadata_ids = [] with open(metadata_csv) as csvfile: reader = csv.DictReader(csvfile) - for row in reader: - value = row['file_identifier'] - metadata_ids.append(value) + for row in [r for r in reader if r['file_identifier'] != '']: + metadata_ids.append(row['file_identifier']) return metadata_ids @@ -49,23 +68,6 @@ def match_metadata_to_files(file_dict, metadata_ids): return metadata_matches -def reconcile_files_and_metadata(metadata_csv, output_path, file_path, - file_type): - """Runs a reconciliation of files and metadata.""" - file_dict = create_file_dict(file_path, file_type) - file_ids = file_dict.keys() - metadata_ids = create_metadata_id_list(metadata_csv) - metadata_matches = match_metadata_to_files(file_dict, metadata_ids) - file_matches = match_files_to_metadata(file_dict, metadata_ids) - no_files = set(metadata_ids) - set(metadata_matches) - no_metadata = set(file_ids) - set(file_matches) - models.create_csv_from_list(no_metadata, f'{output_path}no_metadata') - models.create_csv_from_list(no_files, f'{output_path}no_files') - models.create_csv_from_list(metadata_matches, - f'{output_path}metadata_matches') - update_metadata_csv(metadata_csv, output_path, metadata_matches) - - def update_metadata_csv(metadata_csv, output_path, metadata_matches): """Creates an updated CSV of metadata records with matching files.""" with open(metadata_csv) as csvfile: diff --git a/dsaps/metadata.py b/dsaps/metadata.py new file mode 100644 index 0000000..dd59fdf --- /dev/null +++ b/dsaps/metadata.py @@ -0,0 +1,78 @@ +import csv + + +def create_json_metadata(metadata_csv, multiple_terms): + """Creates JSON metadata from a CSV.""" + with open(metadata_csv) as csvfile: + reader = csv.DictReader(csvfile) + metadata_group = [] + # WE SHOULD DISCUSS HOW TO HANDLE MAPPING DICT CHANGES + mapping_dict = {'file_identifier': ['file_identifier'], + 'dc.title': ['title', 'en_US'], + 'dc.relation.isversionof': ['uri'], + 'dc.contributor.author': ['authors', None, '|'] + } + for row in reader: + metadata_rec = [] + if multiple_terms == 'delimited': + metadata_rec = create_metadata_rec_delim(mapping_dict, row, + metadata_rec) + else: + metadata_rec = create_metadata_rec_num_col(row, metadata_rec) + item = {'metadata': metadata_rec} + metadata_group.append(item) + return metadata_group + + +def create_metadata_rec_delim(mapping_dict, row, metadata_rec): + """Uses a mapping dict to create a metadata record from a series of metadata + elements.""" + for k, v in mapping_dict.items(): + if len(v) == 3: + metadata_elems = metadata_elems_from_row(row, k, v[0], v[1], v[2]) + elif len(v) == 2: + metadata_elems = metadata_elems_from_row(row, k, v[0], v[1]) + else: + metadata_elems = metadata_elems_from_row(row, k, v[0]) + for metadata_elem in metadata_elems: + metadata_rec.append(metadata_elem) + return metadata_rec + + +def create_metadata_rec_num_col(row, metadata_rec): + """Uses a CSV that contains DC property column names and numbered columns + for multiple terms to create a metadata record from a series of metadata + elements.""" + for csv_key, csv_value in row.items(): + if csv_value is not None: + if csv_key[-1].isdigit(): + dc_key = csv_key[:-2] + else: + dc_key = csv_key + # THE FIELDS THAT SHOULDN'T RECEIVE A LANG TAG IS ALSO LIKELY + # CHANGE WITH THE MAPPING DICT + if dc_key not in ['dc.contributor.author', 'file_identifier', + 'dc.relation.isversionof', 'dc.date.issued']: + metadata_elems = metadata_elems_from_row(row, dc_key, csv_key, + 'en_US') + else: + metadata_elems = metadata_elems_from_row(row, dc_key, csv_key) + for metadata_elem in metadata_elems: + metadata_rec.append(metadata_elem) + return metadata_rec + + +def metadata_elems_from_row(row, key, field, language=None, delimiter=''): + """Create a metadata element from a CSV row.""" + metadata_elems = [] + if row[field] != '': + if delimiter: + values = row[field].split(delimiter) + else: + values = [row[field]] + for value in values: + metadata_elem = {'key': key, 'language': language, 'value': + value} + metadata_elems.append({k: v for k, v in metadata_elem.items() + if v is not None}) + return metadata_elems diff --git a/dsaps/models.py b/dsaps/models.py index e389a7c..40100d0 100644 --- a/dsaps/models.py +++ b/dsaps/models.py @@ -1,20 +1,17 @@ -import collections -import csv -import datetime from functools import partial +import glob import operator import os -import requests -import time import attr -from lxml import html +import requests import structlog -op = operator.attrgetter('name') Field = partial(attr.ib, default=None) +Group = partial(attr.ib, default=[]) logger = structlog.get_logger() +op = operator.attrgetter('name') class Client: @@ -40,22 +37,6 @@ def authenticate(self, email, password): self.header = header logger.info(f'Authenticated to {self.url} as 'f'{self.user_full_name}') - def get_record(self, uuid, rec_type): - """Retrieve an individual record of a particular type.""" - url = f'{self.url}/{rec_type}/{uuid}?expand=all' - record = requests.get(url, headers=self.header, - cookies=self.cookies).json() - if rec_type == 'items': - rec_obj = self._pop_inst(Item, record) - elif rec_type == 'communities': - rec_obj = self._pop_inst(Community, record) - elif rec_type == 'collections': - rec_obj = self._pop_inst(Collection, record) - else: - logger.info('Invalid record type.') - exit() - return rec_obj - def filtered_item_search(self, key, string, query_type, selected_collections=''): """Performs a search against the filtered items endpoint.""" @@ -78,76 +59,64 @@ def filtered_item_search(self, key, string, query_type, offset = offset + 200 return item_links - def get_id_from_handle(self, handle): + def get_uuid_from_handle(self, handle): """Retrieves UUID for an object based on its handle.""" - endpoint = f'{self.url}/handle/{handle}' - rec_obj = requests.get(endpoint, headers=self.header, + hdl_endpoint = f'{self.url}/handle/{handle}' + rec_obj = requests.get(hdl_endpoint, headers=self.header, cookies=self.cookies).json() return rec_obj['uuid'] + def get_record(self, uuid, rec_type): + """Retrieve an individual record of a particular type.""" + url = f'{self.url}/{rec_type}/{uuid}?expand=all' + record = requests.get(url, headers=self.header, + cookies=self.cookies).json() + if rec_type == 'items': + rec_obj = self._pop_inst(Item, record) + elif rec_type == 'communities': + rec_obj = self._pop_inst(Community, record) + elif rec_type == 'collections': + rec_obj = self._pop_inst(Collection, record) + else: + logger.info('Invalid record type.') + exit() + return rec_obj + + def post_bitstream(self, item_uuid, bitstream): + """Posts a bitstream to a specified item and returns the bitstream + ID.""" + endpoint = (f'{self.url}/items/{item_uuid}' + f'/bitstreams?name={bitstream.name}') + header_upload = {'accept': 'application/json'} + data = open(bitstream.file_path, 'rb') + response = requests.post(endpoint, headers=header_upload, + cookies=self.cookies, data=data).json() + bitstream_uuid = response['uuid'] + return bitstream_uuid + def post_coll_to_comm(self, comm_handle, coll_name): """Posts a collection to a specified community.""" - endpoint = f'{self.url}/handle/{comm_handle}' - community = requests.get(endpoint, headers=self.header, + hdl_endpoint = f'{self.url}/handle/{comm_handle}' + community = requests.get(hdl_endpoint, headers=self.header, cookies=self.cookies).json() - comm_id = community['uuid'] - collection = {'name': coll_name} - endpoint2 = f'{self.url}/communities/{comm_id}/collections' - coll_id = requests.post(endpoint2, headers=self.header, - cookies=self.cookies, json=collection).json() - coll_id = coll_id['uuid'] - logger.info(f'Collection posted: {coll_id}') - return coll_id - - def post_items_to_coll(self, coll_id, coll_metadata, file_dict, - ingest_type): - """Posts items to a specified collection.""" - for item_metadata in coll_metadata: - file_exists = '' - for element in [e for e in item_metadata['metadata'] - if e['key'] == 'file_identifier']: - file_identifier = element['value'] - item_metadata['metadata'].remove(element) - for k in [e for e in file_dict if file_identifier in e]: - file_exists = True - if file_exists is True: - endpoint = f'{self.url}/collections/{coll_id}/items' - item_id = requests.post(endpoint, headers=self.header, - cookies=self.cookies, - json=item_metadata).json() - item_id = item_id['uuid'] - bit_ids = self.post_bitstreams_to_item(item_id, - file_identifier, - file_dict, ingest_type) - for bit_id in bit_ids: - logger.info(f'Bitstream posted: {bit_id}') - yield item_id - - def post_bitstreams_to_item(self, item_id, file_identifier, file_dict, - ingest_type): - """Post a sorted set of bitstreams to a specified item.""" - file_dict = collections.OrderedDict(sorted(file_dict.items())) - for bitstream, v in file_dict.items(): - bit_id = self.post_bitstream(item_id, file_dict, ingest_type, - bitstream) - yield bit_id - - def post_bitstream(self, item_id, file_dict, ingest_type, - bitstream): - """Post a bitstream to a specified item.""" - bitstream_path = file_dict[bitstream] - file_name = os.path.basename(bitstream_path) - if ingest_type == 'local': - data = open(bitstream_path, 'rb') - elif ingest_type == 'remote': - data = requests.get(bitstream_path) - endpoint = (f'{self.url}/items/{item_id}' - + f'/bitstreams?name={file_name}') - header_upload = {'accept': 'application/json'} - bit_id = requests.post(endpoint, headers=header_upload, - cookies=self.cookies, data=data).json() - bit_id = bit_id['uuid'] - return bit_id + comm_uuid = community['uuid'] + uuid_endpoint = f'{self.url}/communities/{comm_uuid}/collections' + coll_uuid = requests.post(uuid_endpoint, headers=self.header, + cookies=self.cookies, + json={'name': coll_name}).json() + coll_uuid = coll_uuid['uuid'] + logger.info(f'Collection posted: {coll_uuid}') + return coll_uuid + + def post_item_to_collection(self, collection_uuid, item): + """Posts item to a specified collection and returns the item ID.""" + endpoint = f'{self.url}/collections/{collection_uuid}/items' + post_response = requests.post( + endpoint, headers=self.header, cookies=self.cookies, + json={'metadata': attr.asdict(item)['metadata']}).json() + item_uuid = post_response['uuid'] + item_handle = post_response['handle'] + return item_uuid, item_handle def _pop_inst(self, class_type, rec_obj): """Populate class instance with data from record.""" @@ -181,9 +150,28 @@ class BaseRecord: @attr.s -class Item(BaseRecord): - metadata = Field() - bitstreams = Field() +class Collection(BaseRecord): + items = Group() + + def post_items(self, client): + for item in self.items: + item_uuid, item_handle = client.post_item_to_collection(self.uuid, + item) + item.uuid = item_uuid + item.handle = item_handle + logger.info(f'Item posted: {item_uuid}') + for bitstream in item.bitstreams: + bitstream_uuid = client.post_bitstream(item_uuid, bitstream) + bitstream.uuid = bitstream_uuid + logger.info(f'Bitstream posted: {bitstream_uuid}') + yield item + + @classmethod + def from_csv(cls, csv_reader, field_map): + items = [ + Item.from_row(row, field_map) for row in csv_reader + ] + return cls(items=items) @attr.s @@ -192,65 +180,56 @@ class Community(BaseRecord): @attr.s -class Collection(BaseRecord): - items = Field() +class Item(BaseRecord): + metadata = Group() + bitstreams = Group() + file_identifier = Field() + source_system_identifier = Field() + + def bitstreams_from_directory(self, directory, file_type='*'): + files = glob.iglob( + f'{directory}/**/{self.file_identifier}*.{file_type}', + recursive=True + ) + self.bitstreams = [ + Bitstream(name=os.path.basename(f), + file_path=f) for f in files + ] + self.bitstreams.sort(key=lambda x: x.name) + + @classmethod + def from_row(cls, row, field_map): + metadata = [] + for f in field_map: + field = row[field_map[f]['csv_field_name']] + if f == 'file_identifier': + file_identifier = field + continue # file_identifier is not included in DSpace metadata + if f == 'dc.relation.isversionof': + source_system_identifier = field + delimiter = field_map[f]['delimiter'] + language = field_map[f]['language'] + if delimiter: + metadata.extend([ + MetadataEntry(key=f, value=v, language=language) + for v in field.split(delimiter) + ]) + else: + metadata.append( + MetadataEntry(key=f, value=field, language=language) + ) + return cls(metadata=metadata, file_identifier=file_identifier, + source_system_identifier=source_system_identifier) @attr.s -class MetadataEntry(BaseRecord): +class Bitstream(): + name = Field() + file_path = Field() + + +@attr.s +class MetadataEntry(): key = Field() value = Field() language = Field() - - -def build_file_dict_remote(directory_url, file_type, file_dict): - """Build list of files in a remote directory.""" - response = requests.get(directory_url) - links = html.fromstring(response.content).iterlinks() - for link in [i for i in links if i[2].endswith(file_type)]: - file_identifier = link[2].replace(f'.{file_type}', '') - file_dict[file_identifier] = f'{directory_url}{link[2]}' - return file_dict - - -def create_csv_from_list(list_name, output): - """Creates CSV file from list content.""" - with open(f'{output}.csv', 'w') as f: - writer = csv.writer(f) - writer.writerow(['id']) - for item in list_name: - writer.writerow([item]) - - -def elapsed_time(start_time, label): - """Calculate elapsed time.""" - td = datetime.timedelta(seconds=time.time() - start_time) - logger.info(f'{label} : {td}') - - -def metadata_elems_from_row(row, key, field, language=None, delimiter=''): - """Create a metadata element from a CSV row.""" - metadata_elems = [] - if row[field] != '': - if delimiter: - values = row[field].split(delimiter) - else: - values = [row[field]] - for value in values: - metadata_elem = {'key': key, 'language': language, 'value': - value} - metadata_elems.append({k: v for k, v in metadata_elem.items() - if v is not None}) - return metadata_elems - - -def create_metadata_rec(mapping_dict, row, metadata_rec): - """Create metadata record from a series of metadata elements.""" - for k, v in mapping_dict.items(): - if len(v) == 3: - metadata_elems = metadata_elems_from_row(row, k, v[0], v[1], v[2]) - else: - metadata_elems = metadata_elems_from_row(row, k, v[0]) - for metadata_elem in metadata_elems: - metadata_rec.append(metadata_elem) - return metadata_rec diff --git a/tests/conftest.py b/tests/conftest.py index 2778674..c247b1f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,10 +1,11 @@ import csv +import json from click.testing import CliRunner import pytest import requests_mock -from dsaps import models +from dsaps import metadata, models @pytest.fixture() @@ -16,43 +17,6 @@ def client(): return client -@pytest.fixture(autouse=True) -def web_mock(): - with requests_mock.Mocker() as m: - cookies = {'JSESSIONID': '11111111'} - m.post('mock://example.com/login', cookies=cookies) - user_json = {'fullname': 'User Name'} - m.get('mock://example.com/status', json=user_json) - rec_json = {'metadata': {'title': 'Sample title'}, 'type': 'item'} - m.get('mock://example.com/items/123?expand=all', json=rec_json) - results_json1 = {'items': [{'link': '1234'}]} - results_json2 = {'items': []} - m.get('mock://example.com/filtered-items?', [{'json': results_json1}, - {'json': results_json2}]) - rec_json = {'uuid': '123'} - m.get('mock://example.com/handle/111.1111', json=rec_json) - comm_json = {'uuid': 'a1b2'} - m.get('mock://example.com/handle/1234', json=comm_json) - coll_json = {'uuid': '5678'} - m.post('mock://example.com/communities/a1b2/collections', - json=coll_json) - item_json = {'uuid': 'a1b2', 'handle': '1111.1/1111'} - m.post('mock://example.com/collections/789/items', json=item_json) - b_json_1 = {'uuid': 'c3d4'} - url_1 = 'mock://example.com/items/a1b2/bitstreams?name=test_01.pdf' - m.post(url_1, json=b_json_1) - b_json_2 = {'uuid': 'e5f6'} - url_2 = 'mock://example.com/items/a1b2/bitstreams?name=test_02.pdf' - m.post(url_2, json=b_json_2) - m.get('mock://remoteserver.com/files/test_01.pdf', content=b'') - yield m - - -@pytest.fixture() -def runner(): - return CliRunner() - - @pytest.fixture() def input_dir(tmp_path): input_dir = tmp_path / 'files' @@ -67,16 +31,87 @@ def input_dir(tmp_path): pass with open(f'{input_dir}/test_01.jpg', 'w'): pass - with open(f'{input_dir}/metadata.csv', 'w') as csvfile: - writer = csv.writer(csvfile) - writer.writerow(['uri'] + ['title'] + ['file_identifier']) - writer.writerow(['/repo/0/ao/123'] + ['Test Item'] + ['test']) - writer.writerow(['/repo/0/ao/456'] + ['Tast Item'] + ['tast']) return str(f'{input_dir}/') +@pytest.fixture() +def aspace_delimited_csv(): + with open('tests/fixtures/aspace_metadata_delimited.csv') as f: + reader = csv.DictReader(f) + yield reader + + +@pytest.fixture() +def json_metadata_delim(): + json_metadata = metadata.create_json_metadata( + 'tests/fixtures/metadata_delim.csv', 'delimited' + ) + return json_metadata + + +@pytest.fixture() +def aspace_mapping(): + with open('config/aspace_mapping.json') as f: + mapping = json.load(f) + yield mapping + + +@pytest.fixture() +def standard_mapping(): + with open('tests/fixtures/standard_mapping.json') as f: + mapping = json.load(f) + yield mapping + + +@pytest.fixture() +def json_metadata_num_col(): + json_metadata = metadata.create_json_metadata( + 'tests/fixtures/metadata_num_col.csv', 'num_columns' + ) + return json_metadata + + @pytest.fixture() def output_dir(tmp_path): output_dir = tmp_path / 'output' output_dir.mkdir() return str(f'{output_dir}/') + + +@pytest.fixture() +def runner(): + return CliRunner() + + +@pytest.fixture(autouse=True) +def web_mock(input_dir): + with requests_mock.Mocker() as m: + cookies = {'JSESSIONID': '11111111'} + m.post('mock://example.com/login', cookies=cookies) + user_json = {'fullname': 'User Name'} + m.get('mock://example.com/status', json=user_json) + rec_json = {'metadata': {'title': 'Sample title'}, 'type': 'item'} + m.get('mock://example.com/items/123?expand=all', json=rec_json) + results_json1 = {'items': [{'link': '1234'}]} + results_json2 = {'items': []} + m.get('mock://example.com/filtered-items?', [{'json': results_json1}, + {'json': results_json2}]) + rec_json = {'uuid': 'a1b2'} + m.get('mock://example.com/handle/111.1111', json=rec_json) + coll_json = {'uuid': 'c3d4'} + m.post('mock://example.com/communities/a1b2/collections', + json=coll_json) + item_json = {'uuid': 'e5f6', 'handle': '222.2222'} + m.post('mock://example.com/collections/c3d4/items', json=item_json) + b_json_1 = {'uuid': 'g7h8'} + url_1 = 'mock://example.com/items/e5f6/bitstreams?name=test_01.pdf' + m.post(url_1, json=b_json_1) + b_json_2 = {'uuid': 'i9j0'} + url_2 = 'mock://example.com/items/e5f6/bitstreams?name=test_02.pdf' + m.post(url_2, json=b_json_2) + m.get('mock://remoteserver.com/files/test_01.pdf', content=b'Sample') + coll_json = {'uuid': 'k1l2'} + m.get('mock://example.com/handle/333.3333', json=coll_json) + item_json_2 = {'uuid': 'e5f6', 'handle': '222.2222'} + m.post('mock://example.com/collections/k1l2/items', json=item_json_2) + yield m diff --git a/tests/fixtures/aspace_metadata_delimited.csv b/tests/fixtures/aspace_metadata_delimited.csv new file mode 100644 index 0000000..8482b09 --- /dev/null +++ b/tests/fixtures/aspace_metadata_delimited.csv @@ -0,0 +1,3 @@ +uri,title,file_identifier,authors +/repo/0/ao/456,Tast Item,tast,"Smith, John|Smith, Jane" +/repo/0/ao/123,Test Item,test,"Smith, Jane" \ No newline at end of file diff --git a/tests/fixtures/aspace_metadata_no_delimiter.csv b/tests/fixtures/aspace_metadata_no_delimiter.csv new file mode 100644 index 0000000..a078983 --- /dev/null +++ b/tests/fixtures/aspace_metadata_no_delimiter.csv @@ -0,0 +1,2 @@ +uri,title,file_identifier,authors +/repo/0/ao/123,Test Item,test,"Smith, Jane" diff --git a/tests/fixtures/metadata_delim.csv b/tests/fixtures/metadata_delim.csv new file mode 100644 index 0000000..1d7fb4c --- /dev/null +++ b/tests/fixtures/metadata_delim.csv @@ -0,0 +1,3 @@ +uri,title,file_identifier,authors +/repo/0/ao/123,Test Item,test,"Smith, Jane" +/repo/0/ao/456,Tast Item,tast,"Smith, John|Smith, Jane" diff --git a/tests/fixtures/metadata_num_col.csv b/tests/fixtures/metadata_num_col.csv new file mode 100644 index 0000000..0c064d0 --- /dev/null +++ b/tests/fixtures/metadata_num_col.csv @@ -0,0 +1,2 @@ +dc.relation.isversionof,file_identifier,dc.title,dc.contributor.author_1,dc.contributor.author_2 +/repo/0/ao/456,tast,Tast Item,"Smith, John","Smith, Jane" diff --git a/tests/fixtures/standard_mapping.json b/tests/fixtures/standard_mapping.json new file mode 100644 index 0000000..4a5713a --- /dev/null +++ b/tests/fixtures/standard_mapping.json @@ -0,0 +1,22 @@ +{ + "file_identifier": { + "csv_field_name": "file_identifier", + "language": null, + "delimiter": "" + }, + "dc.title": { + "csv_field_name": "title", + "language": "en_US", + "delimiter": "" + }, + "dc.relation.isversionof": { + "csv_field_name": "uri", + "language": null, + "delimiter": "" + }, + "dc.contributor.author": { + "csv_field_name": "authors", + "language": null, + "delimiter": "|" + } +} \ No newline at end of file diff --git a/tests/test_cli.py b/tests/test_cli.py index 42fdc0a..f2fb973 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,17 +1,60 @@ from dsaps.cli import main -def test_reconcile(runner, input_dir, output_dir): - """Test reconcile command.""" +def test_additems(runner, input_dir): + """Test adding items to a collection.""" result = runner.invoke(main, ['--url', 'mock://example.com/', '--email', 'test@test.mock', '--password', '1234', - 'reconcile', - '--metadata_csv', - f'{input_dir}/metadata.csv', - '--file_path', 'files', - '--file_type', 'pdf', - '--output_path', f'{output_dir}' - ]) + 'additems', + '--metadata-csv', + 'tests/fixtures/metadata_delim.csv', + '--field-map', + 'tests/fixtures/standard_mapping.json', + '--directory', input_dir, + '--file-type', 'pdf', + '--collection-handle', '333.3333']) assert result.exit_code == 0 + result = runner.invoke(main, + ['--url', 'mock://example.com/', + '--email', 'test@test.mock', + '--password', '1234', + 'newcollection', + '--community-handle', '111.1111', + '--collection-name', 'Test Collection', + 'additems', + '--metadata-csv', + 'tests/fixtures/metadata_delim.csv', + '--field-map', + 'tests/fixtures/standard_mapping.json', + '--directory', input_dir, + '--file-type', 'pdf']) + assert result.exit_code == 0 + + +def test_newcollection(runner, input_dir): + """Test newcoll command.""" + result = runner.invoke(main, + ['--url', 'mock://example.com/', + '--email', 'test@test.mock', + '--password', '1234', + 'newcollection', + '--community-handle', '111.1111', + '--collection-name', 'Test Collection']) + assert result.exit_code == 0 + + +# def test_reconcile(runner, input_dir, output_dir): +# """Test reconcile command.""" +# result = runner.invoke(main, +# ['--url', 'mock://example.com/', +# '--email', 'test@test.mock', +# '--password', '1234', +# 'reconcile', +# '--metadata_csv', 'tests/fixtures/metadata_delim.csv', +# '--file_path', input_dir, +# '--file_type', 'pdf', +# '--output_path', output_dir +# ]) +# assert result.exit_code == 0 diff --git a/tests/test_helpers.py b/tests/test_helpers.py new file mode 100644 index 0000000..867fd38 --- /dev/null +++ b/tests/test_helpers.py @@ -0,0 +1,97 @@ +import csv + +from dsaps import helpers +from dsaps.models import Item + +# from dsaps.helpers import files_from_location +# +# +# def test_file_list_from_location_with_file_type(input_dir): +# files = files_from_location(input_dir, 'pdf') +# assert 3 == len(files) +# assert {'name': 'test_01', 'path': f'{input_dir}test_01.pdf'} in files +# assert {'name': 'test_02', +# 'path': f'{input_dir}more_files/test_02.pdf'} in files +# +# +# def test_file_list_from_location_without_file_type(input_dir): +# files = files_from_location(input_dir) +# assert 4 == len(files) +# assert {'name': 'test_01', 'path': f'{input_dir}test_01.pdf'} in files +# assert {'name': 'test_02', +# 'path': f'{input_dir}more_files/test_02.pdf'} in files +# assert {'name': 'test_01', 'path': f'{input_dir}test_01.jpg'} in files +# + + +def test_create_csv_from_list(output_dir): + """Test create_csv_from_list function.""" + list_name = ['123'] + helpers.create_csv_from_list(list_name, f'{output_dir}output') + with open(f'{output_dir}output.csv') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + assert row['id'] == '123' + + +def test_create_file_dict(input_dir): + """Test create_file_dict function.""" + file_dict = helpers.create_file_dict(input_dir, 'pdf') + assert file_dict['test_02'] == f'{input_dir}more_files/test_02.pdf' + assert file_dict['test_01'] == f'{input_dir}test_01.pdf' + assert file_dict['best_01'] == f'{input_dir}best_01.pdf' + + +def test_create_ingest_report(runner, output_dir): + """Test create_ingest_report function.""" + file_name = 'ingest_report.csv' + items = [ + Item(source_system_identifier='/repo/0/ao/123', + handle='111.1111') + ] + helpers.create_ingest_report(items, f'{output_dir}{file_name}') + with open(f'{output_dir}{file_name}') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + assert row['uri'] == '/repo/0/ao/123' + assert row['link'] == 'https://hdl.handle.net/111.1111' + + +def test_create_metadata_id_list(input_dir): + """Test create_metadata_id_list function.""" + metadata_path = 'tests/fixtures/metadata_delim.csv' + metadata_ids = helpers.create_metadata_id_list(metadata_path) + assert 'test' in metadata_ids + assert 'tast' in metadata_ids + + +def test_match_files_to_metadata(): + """Test match_files_to_metadata function.""" + file_dict = {'test_01': 'files/test_01.pdf'} + metadata_ids = ['test', 'tast'] + file_matches = helpers.match_files_to_metadata(file_dict, metadata_ids) + assert len(file_matches) == 1 + assert 'test_01' in file_matches + + +def test_match_metadata_to_files(): + """Test match_metadata_to_files function.""" + file_dict = {'test_01': 'files/test_01.pdf', + 'tast_01': 'files/tast_01.pdf'} + metadata_ids = ['test'] + file_matches = helpers.match_metadata_to_files(file_dict, metadata_ids) + assert len(file_matches) == 1 + assert 'test' in file_matches + + +def test_update_metadata_csv(input_dir, output_dir): + """Test update_metadata_csv function.""" + metadata_matches = ['test'] + helpers.update_metadata_csv('tests/fixtures/metadata_delim.csv', + output_dir, metadata_matches) + with open(f'{output_dir}updated-metadata_delim.csv') as csvfile2: + reader = csv.DictReader(csvfile2) + for row in reader: + assert row['uri'] == '/repo/0/ao/123' + assert row['title'] == 'Test Item' + assert row['file_identifier'] == 'test' diff --git a/tests/test_metadata.py b/tests/test_metadata.py new file mode 100644 index 0000000..96aaefa --- /dev/null +++ b/tests/test_metadata.py @@ -0,0 +1,61 @@ +import csv + +from dsaps import metadata + + +def test_create_json_metadata(input_dir, json_metadata_delim): + """Test create_json_metadata function.""" + md_group = metadata.create_json_metadata('tests/fixtures/metadata_delim.csv', + 'delimited') + assert md_group[0]['metadata'] == json_metadata_delim[0]['metadata'] + assert md_group[1]['metadata'] == json_metadata_delim[1]['metadata'] + + +def test_create_metadata_rec_delim(json_metadata_delim): + """Test create_metadata_rec function.""" + mapping_dict = {'file_identifier': ['file_identifier'], + 'dc.title': ['title', 'en_US'], + 'dc.relation.isversionof': ['uri'], + 'dc.contributor.author': ['authors', None, '|']} + with open('tests/fixtures/metadata_delim.csv') as csvfile: + reader = csv.DictReader(csvfile) + metadata_rec_1 = metadata.create_metadata_rec_delim(mapping_dict, + next(reader), []) + assert metadata_rec_1 == json_metadata_delim[0]['metadata'] + metadata_rec_2 = metadata.create_metadata_rec_delim(mapping_dict, + next(reader), []) + assert metadata_rec_2 == json_metadata_delim[1]['metadata'] + + +def test_create_metadata_rec_num_col(json_metadata_num_col): + """Test create_metadata_rec_num_col function.""" + with open('tests/fixtures/metadata_num_col.csv') as csvfile: + reader = csv.DictReader(csvfile) + metadata_rec = metadata.create_metadata_rec_num_col(next(reader), []) + assert metadata_rec == json_metadata_num_col[0]['metadata'] + + +def test_metadata_elems_from_row(): + """Test metadata_elems_from_row function.""" + row = {'title': 'Test title'} + metadata_elem = metadata.metadata_elems_from_row(row, 'dc.title', 'title', + 'en_US') + assert metadata_elem[0]['key'] == 'dc.title' + assert metadata_elem[0]['value'] == 'Test title' + assert metadata_elem[0]['language'] == 'en_US' + metadata_elem = metadata.metadata_elems_from_row(row, 'dc.title', 'title') + assert metadata_elem[0]['key'] == 'dc.title' + assert metadata_elem[0]['value'] == 'Test title' + assert 'language' not in metadata_elem[0] + row = {'title': ''} + metadata_elem = metadata.metadata_elems_from_row(row, 'dc.title', 'title') + assert metadata_elem == [] + row = {'title': 'Test title 1|Test title 2'} + metadata_elem = metadata.metadata_elems_from_row(row, 'dc.title', 'title', + 'en_US', '|') + assert metadata_elem[0]['key'] == 'dc.title' + assert metadata_elem[0]['value'] == 'Test title 1' + assert metadata_elem[0]['language'] == 'en_US' + assert metadata_elem[1]['key'] == 'dc.title' + assert metadata_elem[1]['value'] == 'Test title 2' + assert metadata_elem[1]['language'] == 'en_US' diff --git a/tests/test_models.py b/tests/test_models.py index 6dae282..f9db33e 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,7 +1,4 @@ -import csv - import attr -import requests_mock from dsaps import models @@ -15,12 +12,6 @@ def test_authenticate(client): assert client.cookies == {'JSESSIONID': '11111111'} -def test_get_record(client): - """Test get_record method.""" - rec_obj = client.get_record('123', 'items') - assert attr.asdict(rec_obj)['metadata'] == {'title': 'Sample title'} - - def test_filtered_item_search(client): """Test filtered_item_search method.""" key = 'dc.title' @@ -31,62 +22,54 @@ def test_filtered_item_search(client): assert '1234' in item_links -def test_get_id_from_handle(client): - """Test get_id_from_handle method.""" - id = client.get_id_from_handle('111.1111') - assert id == '123' +def test_get_uuid_from_handle(client): + """Test get_uuid_from_handle method.""" + id = client.get_uuid_from_handle('111.1111') + assert id == 'a1b2' -def test_post_coll_to_comm(client): - """Test post_coll_to_comm method.""" - comm_handle = '1234' - coll_name = 'Test Collection' - coll_id = client.post_coll_to_comm(comm_handle, coll_name) - assert coll_id == '5678' - - -def test_post_items_to_coll(client, input_dir): - """Test post_items_to_coll method.""" - coll_metadata = [{"metadata": [ - {"key": "file_identifier", - "value": "test"}, - {"key": "dc.title", "value": - "Monitoring Works: Getting Teachers", - "language": "en_US"}, - {"key": "dc.relation.isversionof", - "value": "repo/0/ao/123"}]}] - coll_id = '789' - ingest_type = 'local' - file_dict = {'test_01': f'{input_dir}test_01.pdf'} - item_ids = client.post_items_to_coll(coll_id, coll_metadata, file_dict, - ingest_type) - for item_id in item_ids: - assert 'a1b2' == item_id - - -def test_post_bitstreams_to_item(client, input_dir): - """Test post_bitstreams_to_item method.""" - item_id = 'a1b2' - ingest_type = 'local' - file_identifier = '123' - file_dict = {'test_02': f'{input_dir}more_files/test_02.pdf', - 'test_01': f'{input_dir}test_01.pdf'} - bit_ids = client.post_bitstreams_to_item(item_id, file_identifier, - file_dict, ingest_type) - assert next(bit_ids) == 'c3d4' - assert next(bit_ids) == 'e5f6' +def test_get_record(client): + """Test get_record method.""" + rec_obj = client.get_record('123', 'items') + assert attr.asdict(rec_obj)['metadata'] == {'title': 'Sample title'} def test_post_bitstream(client, input_dir): """Test post_bitstream method.""" - item_id = 'a1b2' - file_dict = {'test_01': f'{input_dir}test_01.pdf'} - bitstream = 'test_01' - bit_id = client.post_bitstream(item_id, file_dict, 'local', bitstream) - assert 'c3d4' == bit_id - file_dict = {'test_01': 'mock://remoteserver.com/files/test_01.pdf'} - bit_id = client.post_bitstream(item_id, file_dict, 'remote', bitstream) - assert 'c3d4' == bit_id + item_uuid = 'e5f6' + bitstream = models.Bitstream(name='test_01.pdf', + file_path=f'{input_dir}test_01.pdf') + bit_uuid = client.post_bitstream(item_uuid, bitstream) + assert bit_uuid == 'g7h8' + + +def test_post_coll_to_comm(client): + """Test post_coll_to_comm method.""" + comm_handle = '111.1111' + coll_name = 'Test Collection' + coll_uuid = client.post_coll_to_comm(comm_handle, coll_name) + assert coll_uuid == 'c3d4' + + +def test_post_item_to_collection(client, input_dir): + """Test post_item_to_collection method.""" + item = models.Item() + item.bitstreams = [ + models.Bitstream(name='test_01.pdf', + file_path=f'{input_dir}test_01.pdf') + ] + item.metadata = [ + models.MetadataEntry(key='file_identifier', value='test'), + models.MetadataEntry(key='dc.title', + value='Monitoring Works: Getting Teachers', + language='en_US'), + models.MetadataEntry(key='dc.relation.isversionof', + value='repo/0/ao/123') + ] + coll_uuid = 'c3d4' + item_uuid, item_handle = client.post_item_to_collection(coll_uuid, item) + assert item_uuid == 'e5f6' + assert item_handle == '222.2222' def test__pop_inst(client): @@ -106,67 +89,47 @@ def test__build_uuid_list(client): assert '1234' in child_list -def test_build_file_dict_remote(): - """Test build_file_dict_remote function.""" - content = '' - content += 'Index of /pdf

    Index of /' - content += 'pdf

    NameLast modified' - content += 'Size
    999.pdf' - content += '2001-02-16 11:59 107K
    ' - with requests_mock.Mocker() as m: - directory_url = 'mock://test.com/pdfs/' - file_type = 'pdf' - file_dict = {} - m.get(directory_url, text=content) - file_list = models.build_file_dict_remote(directory_url, file_type, - file_dict) - assert '999' in file_list - - -def test_create_csv_from_list(output_dir): - """Test create_csv_from_list function.""" - list_name = ['123'] - models.create_csv_from_list(list_name, f'{output_dir}output') - with open(f'{output_dir}output.csv') as csvfile: - reader = csv.DictReader(csvfile) - for row in reader: - assert row['id'] == '123' - - -def test_metadata_elems_from_row(): - """Test metadata_elems_from_row function.""" - row = {'title': 'Test title'} - metadata_elem = models.metadata_elems_from_row(row, 'dc.title', 'title', - 'en_US') - assert metadata_elem[0]['key'] == 'dc.title' - assert metadata_elem[0]['value'] == 'Test title' - assert metadata_elem[0]['language'] == 'en_US' - metadata_elem = models.metadata_elems_from_row(row, 'dc.title', 'title') - assert metadata_elem[0]['key'] == 'dc.title' - assert metadata_elem[0]['value'] == 'Test title' - assert 'language' not in metadata_elem[0] - row = {'title': ''} - metadata_elem = models.metadata_elems_from_row(row, 'dc.title', 'title') - assert metadata_elem == [] - row = {'title': 'Test title 1|Test title 2'} - metadata_elem = models.metadata_elems_from_row(row, 'dc.title', 'title', - 'en_US', '|') - assert metadata_elem[0]['key'] == 'dc.title' - assert metadata_elem[0]['value'] == 'Test title 1' - assert metadata_elem[0]['language'] == 'en_US' - assert metadata_elem[1]['key'] == 'dc.title' - assert metadata_elem[1]['value'] == 'Test title 2' - assert metadata_elem[1]['language'] == 'en_US' - - -# def test_create_ingest_report(): -# assert False - - -def test_create_metadata_rec(): - metadata_rec = [] - row = {'title': 'Test title'} - mapping_dict = {'dc.title': ['title']} - metadata_rec = models.create_metadata_rec(mapping_dict, row, metadata_rec) - assert metadata_rec[0]['key'] == 'dc.title' - assert metadata_rec[0]['value'] == 'Test title' +def test_collection_from_csv(aspace_delimited_csv, aspace_mapping): + collection = models.Collection.from_csv( + aspace_delimited_csv, aspace_mapping + ) + assert 2 == len(collection.items) + + +def test_collection_post_items(client, input_dir, aspace_delimited_csv, + aspace_mapping): + collection = models.Collection.from_csv( + aspace_delimited_csv, aspace_mapping + ) + collection.uuid = 'c3d4' + items = collection.post_items(client) + for item in items: + assert item.handle == '222.2222' + assert item.uuid == 'e5f6' + + +def test_item_bitstreams_from_directory(input_dir): + item = models.Item(file_identifier='test') + item.bitstreams_from_directory(input_dir) + assert 3 == len(item.bitstreams) + assert item.bitstreams[0].name == 'test_01.jpg' + assert item.bitstreams[1].name == 'test_01.pdf' + assert item.bitstreams[2].name == 'test_02.pdf' + item.bitstreams_from_directory(input_dir, 'pdf') + assert 2 == len(item.bitstreams) + assert item.bitstreams[0].name == 'test_01.pdf' + assert item.bitstreams[1].name == 'test_02.pdf' + + +def test_item_from_row(aspace_delimited_csv, standard_mapping): + row = next(aspace_delimited_csv) + item = models.Item.from_row(row, standard_mapping) + assert attr.asdict(item)['metadata'] == [ + {'key': 'dc.title', 'value': 'Tast Item', 'language': 'en_US'}, + {'key': 'dc.relation.isversionof', 'value': '/repo/0/ao/456', + 'language': None}, + {'key': 'dc.contributor.author', 'value': 'Smith, John', + 'language': None}, + {'key': 'dc.contributor.author', 'value': 'Smith, Jane', + 'language': None} + ] diff --git a/tests/test_workflows.py b/tests/test_workflows.py deleted file mode 100644 index e32f9b8..0000000 --- a/tests/test_workflows.py +++ /dev/null @@ -1,72 +0,0 @@ -import csv - -from dsaps import workflows - - -def test_create_file_dict(input_dir): - """Test create_file_dict function.""" - file_dict = workflows.create_file_dict(input_dir, 'pdf') - assert file_dict['test_02'] == f'{input_dir}more_files/test_02.pdf' - assert file_dict['test_01'] == f'{input_dir}test_01.pdf' - assert file_dict['best_01'] == f'{input_dir}best_01.pdf' - - -def test_create_metadata_id_list(input_dir): - """Test create_metadata_id_list function.""" - metadata_path = f'{input_dir}metadata.csv' - metadata_ids = workflows.create_metadata_id_list(metadata_path) - assert 'test' in metadata_ids - assert 'tast' in metadata_ids - - -def test_match_files_to_metadata(): - """Test match_files_to_metadata function.""" - file_dict = {'test_01': 'files/test_01.pdf'} - metadata_ids = ['test', 'tast'] - file_matches = workflows.match_files_to_metadata(file_dict, metadata_ids) - assert len(file_matches) == 1 - assert 'test_01' in file_matches - - -def test_match_metadata_to_files(): - """Test match_metadata_to_files function.""" - file_dict = {'test_01': 'files/test_01.pdf', - 'tast_01': 'files/tast_01.pdf'} - metadata_ids = ['test'] - file_matches = workflows.match_metadata_to_files(file_dict, metadata_ids) - assert len(file_matches) == 1 - assert 'test' in file_matches - - -def test_reconcile_files_and_metadata(input_dir, output_dir): - """Test reconcile function.""" - metadata_path = f'{input_dir}metadata.csv' - workflows.reconcile_files_and_metadata(metadata_path, output_dir, - input_dir, 'pdf') - with open(f'{output_dir}updated-metadata.csv') as csvfile2: - reader = csv.DictReader(csvfile2) - for row in reader: - assert row['uri'] == '/repo/0/ao/123' - assert row['title'] == 'Test Item' - assert row['file_identifier'] == 'test' - with open(f'{output_dir}no_metadata.csv') as csvfile3: - reader = csv.DictReader(csvfile3) - for row in reader: - assert row['id'] == 'best_01' - with open(f'{output_dir}no_files.csv') as csvfile4: - reader = csv.DictReader(csvfile4) - for row in reader: - assert row['id'] == 'tast' - - -def test_update_metadata_csv(input_dir, output_dir): - """Test update_metadata_csv function.""" - metadata_matches = ['test'] - workflows.update_metadata_csv(f'{input_dir}metadata.csv', output_dir, - metadata_matches) - with open(f'{output_dir}updated-metadata.csv') as csvfile2: - reader = csv.DictReader(csvfile2) - for row in reader: - assert row['uri'] == '/repo/0/ao/123' - assert row['title'] == 'Test Item' - assert row['file_identifier'] == 'test' From 5c2b2f6b5b643e461ff0cb8cca22e6aa2f2cb85b Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Thu, 22 Apr 2021 14:40:46 -0400 Subject: [PATCH 19/22] reconcile refactor (#20) * reconcile refactor * PR updates * Update cli.py --- dsaps/cli.py | 74 ++++++++++++++++++++++++++------------- dsaps/helpers.py | 43 ++++++++--------------- dsaps/metadata.py | 78 ------------------------------------------ tests/conftest.py | 20 ++--------- tests/test_cli.py | 31 +++++++++-------- tests/test_helpers.py | 42 ++++++----------------- tests/test_metadata.py | 61 --------------------------------- 7 files changed, 94 insertions(+), 255 deletions(-) delete mode 100644 dsaps/metadata.py delete mode 100644 tests/test_metadata.py diff --git a/dsaps/cli.py b/dsaps/cli.py index e045a55..ac79117 100644 --- a/dsaps/cli.py +++ b/dsaps/cli.py @@ -14,6 +14,14 @@ logger = structlog.get_logger() +def validate_path(ctx, param, value): + """Validates th formatting of The submitted path""" + if value[-1] == '/': + return value + else: + raise click.BadParameter('Include / at the end of the path.') + + @click.group(chain=True) @click.option('--url', envvar='DSPACE_URL', required=True,) @click.option('-e', '--email', envvar='TEST_EMAIL', required=True, @@ -51,11 +59,13 @@ def main(ctx, url, email, password): @main.command() @click.option('-m', '--metadata-csv', required=True, - type=click.Path(exists=True), - help='The full path to the CSV file of metadata for the items.') -@click.option('--field-map', required=True, type=click.Path(exists=True), - help='Path to JSON field mapping file') -@click.option('-d', '--directory', required=True, + type=click.Path(exists=True, file_okay=True, dir_okay=False), + help='The path to the CSV file of metadata for the items.') +@click.option('--field-map', required=True, + type=click.Path(exists=True, file_okay=True, dir_okay=False), + help='The path to JSON field mapping file.') +@click.option('-d', '--content-directory', required=True, + type=click.Path(exists=True, dir_okay=True, file_okay=False), help='The full path to the content, either a directory of files ' 'or a URL for the storage location.') @click.option('-t', '--file-type', @@ -67,11 +77,11 @@ def main(ctx, url, email, password): help='The handle of the collection to which items are being ' 'added.', default=None) @click.pass_context -def additems(ctx, metadata_csv, field_map, directory, file_type, ingest_report, - collection_handle): +def additems(ctx, metadata_csv, field_map, content_directory, file_type, + ingest_report, collection_handle): """Adds items to a specified collection from a metadata CSV, a field mapping file, and a directory of files. May be run in conjunction with the - newcollection CLI commands.""" + newcollection CLI command.""" client = ctx.obj['client'] start_time = ctx.obj['start_time'] if 'collection_uuid' not in ctx.obj and collection_handle is None: @@ -87,7 +97,7 @@ def additems(ctx, metadata_csv, field_map, directory, file_type, ingest_report, mapping = json.load(jsonfile) collection = Collection.from_csv(metadata, mapping) for item in collection.items: - item.bitstreams_from_directory(directory, file_type) + item.bitstreams_from_directory(content_directory, file_type) collection.uuid = collection_uuid items = collection.post_items(client) if ingest_report: @@ -114,20 +124,38 @@ def newcollection(ctx, community_handle, collection_name): ctx.obj['collection_uuid'] = collection_uuid -# @main.command() -# @click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file', -# help='The path of the CSV file of metadata.') -# @click.option('-o', '--output_path', prompt='Enter the output path', -# default='', help='The path of the output files, include ' -# '/ at the end of the path') -# @click.option('-f', '--file_path', prompt='Enter the path', -# help='The path of the content, a URL or local drive path.' -# 'Include / at the end of a local drive path.') -# @click.option('-t', '--file_type', prompt='Enter the file type', -# help='The file type to be uploaded.') -# def reconcile(metadata_csv, file_path, file_type, output_path): -# workflows.reconcile_files_and_metadata(metadata_csv, output_path, -# file_path, file_type) +@main.command() +@click.option('-m', '--metadata-csv', required=True, + type=click.Path(exists=True, file_okay=True, dir_okay=False), + help='The path of the CSV file of metadata.') +@click.option('-o', '--output-directory', + type=click.Path(exists=True, file_okay=False), + default=f'{os.getcwd()}/', callback=validate_path, + help='The path of the output files, include / at the end of the ' + 'path.') +@click.option('-d', '--content-directory', required=True, + help='The full path to the content, either a directory of files ' + 'or a URL for the storage location.') +@click.option('-t', '--file-type', + help='The file type to be uploaded, if limited to one file ' + 'type.', default='*') +def reconcile(metadata_csv, output_directory, content_directory, file_type): + """Runs a reconciliation of the specified files and metadata that produces + reports of files with no metadata, metadata with no files, metadata + matched to files, and an updated version of the metadata CSV with only + the records that have matching files.""" + file_ids = helpers.create_file_list(content_directory, file_type) + metadata_ids = helpers.create_metadata_id_list(metadata_csv) + metadata_matches = helpers.match_metadata_to_files(file_ids, metadata_ids) + file_matches = helpers.match_files_to_metadata(file_ids, metadata_ids) + no_files = set(metadata_ids) - set(metadata_matches) + no_metadata = set(file_ids) - set(file_matches) + helpers.create_csv_from_list(no_metadata, f'{output_directory}no_metadata') + helpers.create_csv_from_list(no_files, f'{output_directory}no_files') + helpers.create_csv_from_list(metadata_matches, + f'{output_directory}metadata_matches') + helpers.update_metadata_csv(metadata_csv, output_directory, + metadata_matches) if __name__ == '__main__': diff --git a/dsaps/helpers.py b/dsaps/helpers.py index 15dabc7..c8f4fbd 100644 --- a/dsaps/helpers.py +++ b/dsaps/helpers.py @@ -2,11 +2,6 @@ import glob import os -import structlog - - -logger = structlog.get_logger() - def create_csv_from_list(list_name, output): """Creates CSV file from list content.""" @@ -17,14 +12,11 @@ def create_csv_from_list(list_name, output): writer.writerow([item]) -def create_file_dict(file_path, file_type): - """Creates a dict of file IDs and file paths.""" +def create_file_list(file_path, file_type): + """Creates a list of file names.""" files = glob.glob(f'{file_path}/**/*.{file_type}', recursive=True) - file_dict = {} - for file in files: - file_name = os.path.splitext(os.path.basename(file))[0] - file_dict[file_name] = file - return file_dict + file_list = [os.path.basename(file) for file in files] + return file_list def create_ingest_report(items, file_name): @@ -43,37 +35,32 @@ def create_metadata_id_list(metadata_csv): metadata_ids = [] with open(metadata_csv) as csvfile: reader = csv.DictReader(csvfile) - for row in [r for r in reader if r['file_identifier'] != '']: - metadata_ids.append(row['file_identifier']) + metadata_ids = [row['file_identifier'] for row in reader + if row['file_identifier'] != ''] return metadata_ids -def match_files_to_metadata(file_dict, metadata_ids): +def match_files_to_metadata(file_list, metadata_ids): """Creates a list of files matched to metadata records.""" - file_matches = [] - for file_id, v in file_dict.items(): - for metadata_id in [m for m in metadata_ids - if file_id.startswith(m)]: - file_matches.append(file_id) + file_matches = [file_id for metadata_id in metadata_ids + for file_id in file_list + if file_id.startswith(metadata_id)] return file_matches -def match_metadata_to_files(file_dict, metadata_ids): +def match_metadata_to_files(file_list, metadata_ids): """Creates a list of metadata records matched to files.""" - metadata_matches = [] - for metadata_id in metadata_ids: - for file_id in [f for f in file_dict - if f.startswith(metadata_id)]: - metadata_matches.append(metadata_id) + metadata_matches = [metadata_id for f in file_list for metadata_id in + metadata_ids if f.startswith(metadata_id)] return metadata_matches -def update_metadata_csv(metadata_csv, output_path, metadata_matches): +def update_metadata_csv(metadata_csv, output_directory, metadata_matches): """Creates an updated CSV of metadata records with matching files.""" with open(metadata_csv) as csvfile: reader = csv.DictReader(csvfile) upd_md_file_name = f'updated-{os.path.basename(metadata_csv)}' - with open(f'{output_path}{upd_md_file_name}', 'w') as updated_csv: + with open(f'{output_directory}{upd_md_file_name}', 'w') as updated_csv: writer = csv.DictWriter(updated_csv, fieldnames=reader.fieldnames) writer.writeheader() for row in reader: diff --git a/dsaps/metadata.py b/dsaps/metadata.py deleted file mode 100644 index dd59fdf..0000000 --- a/dsaps/metadata.py +++ /dev/null @@ -1,78 +0,0 @@ -import csv - - -def create_json_metadata(metadata_csv, multiple_terms): - """Creates JSON metadata from a CSV.""" - with open(metadata_csv) as csvfile: - reader = csv.DictReader(csvfile) - metadata_group = [] - # WE SHOULD DISCUSS HOW TO HANDLE MAPPING DICT CHANGES - mapping_dict = {'file_identifier': ['file_identifier'], - 'dc.title': ['title', 'en_US'], - 'dc.relation.isversionof': ['uri'], - 'dc.contributor.author': ['authors', None, '|'] - } - for row in reader: - metadata_rec = [] - if multiple_terms == 'delimited': - metadata_rec = create_metadata_rec_delim(mapping_dict, row, - metadata_rec) - else: - metadata_rec = create_metadata_rec_num_col(row, metadata_rec) - item = {'metadata': metadata_rec} - metadata_group.append(item) - return metadata_group - - -def create_metadata_rec_delim(mapping_dict, row, metadata_rec): - """Uses a mapping dict to create a metadata record from a series of metadata - elements.""" - for k, v in mapping_dict.items(): - if len(v) == 3: - metadata_elems = metadata_elems_from_row(row, k, v[0], v[1], v[2]) - elif len(v) == 2: - metadata_elems = metadata_elems_from_row(row, k, v[0], v[1]) - else: - metadata_elems = metadata_elems_from_row(row, k, v[0]) - for metadata_elem in metadata_elems: - metadata_rec.append(metadata_elem) - return metadata_rec - - -def create_metadata_rec_num_col(row, metadata_rec): - """Uses a CSV that contains DC property column names and numbered columns - for multiple terms to create a metadata record from a series of metadata - elements.""" - for csv_key, csv_value in row.items(): - if csv_value is not None: - if csv_key[-1].isdigit(): - dc_key = csv_key[:-2] - else: - dc_key = csv_key - # THE FIELDS THAT SHOULDN'T RECEIVE A LANG TAG IS ALSO LIKELY - # CHANGE WITH THE MAPPING DICT - if dc_key not in ['dc.contributor.author', 'file_identifier', - 'dc.relation.isversionof', 'dc.date.issued']: - metadata_elems = metadata_elems_from_row(row, dc_key, csv_key, - 'en_US') - else: - metadata_elems = metadata_elems_from_row(row, dc_key, csv_key) - for metadata_elem in metadata_elems: - metadata_rec.append(metadata_elem) - return metadata_rec - - -def metadata_elems_from_row(row, key, field, language=None, delimiter=''): - """Create a metadata element from a CSV row.""" - metadata_elems = [] - if row[field] != '': - if delimiter: - values = row[field].split(delimiter) - else: - values = [row[field]] - for value in values: - metadata_elem = {'key': key, 'language': language, 'value': - value} - metadata_elems.append({k: v for k, v in metadata_elem.items() - if v is not None}) - return metadata_elems diff --git a/tests/conftest.py b/tests/conftest.py index c247b1f..fddeb8a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,7 +5,7 @@ import pytest import requests_mock -from dsaps import metadata, models +from dsaps import models @pytest.fixture() @@ -41,14 +41,6 @@ def aspace_delimited_csv(): yield reader -@pytest.fixture() -def json_metadata_delim(): - json_metadata = metadata.create_json_metadata( - 'tests/fixtures/metadata_delim.csv', 'delimited' - ) - return json_metadata - - @pytest.fixture() def aspace_mapping(): with open('config/aspace_mapping.json') as f: @@ -63,14 +55,6 @@ def standard_mapping(): yield mapping -@pytest.fixture() -def json_metadata_num_col(): - json_metadata = metadata.create_json_metadata( - 'tests/fixtures/metadata_num_col.csv', 'num_columns' - ) - return json_metadata - - @pytest.fixture() def output_dir(tmp_path): output_dir = tmp_path / 'output' @@ -84,7 +68,7 @@ def runner(): @pytest.fixture(autouse=True) -def web_mock(input_dir): +def web_mock(): with requests_mock.Mocker() as m: cookies = {'JSESSIONID': '11111111'} m.post('mock://example.com/login', cookies=cookies) diff --git a/tests/test_cli.py b/tests/test_cli.py index f2fb973..937ba80 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -12,7 +12,7 @@ def test_additems(runner, input_dir): 'tests/fixtures/metadata_delim.csv', '--field-map', 'tests/fixtures/standard_mapping.json', - '--directory', input_dir, + '--content-directory', input_dir, '--file-type', 'pdf', '--collection-handle', '333.3333']) assert result.exit_code == 0 @@ -28,7 +28,7 @@ def test_additems(runner, input_dir): 'tests/fixtures/metadata_delim.csv', '--field-map', 'tests/fixtures/standard_mapping.json', - '--directory', input_dir, + '--content-directory', input_dir, '--file-type', 'pdf']) assert result.exit_code == 0 @@ -45,16 +45,17 @@ def test_newcollection(runner, input_dir): assert result.exit_code == 0 -# def test_reconcile(runner, input_dir, output_dir): -# """Test reconcile command.""" -# result = runner.invoke(main, -# ['--url', 'mock://example.com/', -# '--email', 'test@test.mock', -# '--password', '1234', -# 'reconcile', -# '--metadata_csv', 'tests/fixtures/metadata_delim.csv', -# '--file_path', input_dir, -# '--file_type', 'pdf', -# '--output_path', output_dir -# ]) -# assert result.exit_code == 0 +def test_reconcile(runner, input_dir, output_dir): + """Test reconcile command.""" + result = runner.invoke(main, + ['--url', 'mock://example.com/', + '--email', 'test@test.mock', + '--password', '1234', + 'reconcile', + '--metadata-csv', + 'tests/fixtures/metadata_delim.csv', + '--output-directory', output_dir, + '--content-directory', input_dir, + '--file-type', 'pdf' + ]) + assert result.exit_code == 0 diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 867fd38..f00f9f5 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -3,26 +3,6 @@ from dsaps import helpers from dsaps.models import Item -# from dsaps.helpers import files_from_location -# -# -# def test_file_list_from_location_with_file_type(input_dir): -# files = files_from_location(input_dir, 'pdf') -# assert 3 == len(files) -# assert {'name': 'test_01', 'path': f'{input_dir}test_01.pdf'} in files -# assert {'name': 'test_02', -# 'path': f'{input_dir}more_files/test_02.pdf'} in files -# -# -# def test_file_list_from_location_without_file_type(input_dir): -# files = files_from_location(input_dir) -# assert 4 == len(files) -# assert {'name': 'test_01', 'path': f'{input_dir}test_01.pdf'} in files -# assert {'name': 'test_02', -# 'path': f'{input_dir}more_files/test_02.pdf'} in files -# assert {'name': 'test_01', 'path': f'{input_dir}test_01.jpg'} in files -# - def test_create_csv_from_list(output_dir): """Test create_csv_from_list function.""" @@ -34,12 +14,11 @@ def test_create_csv_from_list(output_dir): assert row['id'] == '123' -def test_create_file_dict(input_dir): - """Test create_file_dict function.""" - file_dict = helpers.create_file_dict(input_dir, 'pdf') - assert file_dict['test_02'] == f'{input_dir}more_files/test_02.pdf' - assert file_dict['test_01'] == f'{input_dir}test_01.pdf' - assert file_dict['best_01'] == f'{input_dir}best_01.pdf' +def test_create_file_list(input_dir): + """Test create_file_list function.""" + file_list = helpers.create_file_list(input_dir, 'pdf') + for file_id in ['test_02.pdf', 'test_01.pdf', 'best_01.pdf']: + assert file_id in file_list def test_create_ingest_report(runner, output_dir): @@ -67,19 +46,18 @@ def test_create_metadata_id_list(input_dir): def test_match_files_to_metadata(): """Test match_files_to_metadata function.""" - file_dict = {'test_01': 'files/test_01.pdf'} + file_list = ['test_01.pdf'] metadata_ids = ['test', 'tast'] - file_matches = helpers.match_files_to_metadata(file_dict, metadata_ids) + file_matches = helpers.match_files_to_metadata(file_list, metadata_ids) assert len(file_matches) == 1 - assert 'test_01' in file_matches + assert 'test_01.pdf' in file_matches def test_match_metadata_to_files(): """Test match_metadata_to_files function.""" - file_dict = {'test_01': 'files/test_01.pdf', - 'tast_01': 'files/tast_01.pdf'} + file_list = ['test_01.pdf', 'tast_01.pdf'] metadata_ids = ['test'] - file_matches = helpers.match_metadata_to_files(file_dict, metadata_ids) + file_matches = helpers.match_metadata_to_files(file_list, metadata_ids) assert len(file_matches) == 1 assert 'test' in file_matches diff --git a/tests/test_metadata.py b/tests/test_metadata.py deleted file mode 100644 index 96aaefa..0000000 --- a/tests/test_metadata.py +++ /dev/null @@ -1,61 +0,0 @@ -import csv - -from dsaps import metadata - - -def test_create_json_metadata(input_dir, json_metadata_delim): - """Test create_json_metadata function.""" - md_group = metadata.create_json_metadata('tests/fixtures/metadata_delim.csv', - 'delimited') - assert md_group[0]['metadata'] == json_metadata_delim[0]['metadata'] - assert md_group[1]['metadata'] == json_metadata_delim[1]['metadata'] - - -def test_create_metadata_rec_delim(json_metadata_delim): - """Test create_metadata_rec function.""" - mapping_dict = {'file_identifier': ['file_identifier'], - 'dc.title': ['title', 'en_US'], - 'dc.relation.isversionof': ['uri'], - 'dc.contributor.author': ['authors', None, '|']} - with open('tests/fixtures/metadata_delim.csv') as csvfile: - reader = csv.DictReader(csvfile) - metadata_rec_1 = metadata.create_metadata_rec_delim(mapping_dict, - next(reader), []) - assert metadata_rec_1 == json_metadata_delim[0]['metadata'] - metadata_rec_2 = metadata.create_metadata_rec_delim(mapping_dict, - next(reader), []) - assert metadata_rec_2 == json_metadata_delim[1]['metadata'] - - -def test_create_metadata_rec_num_col(json_metadata_num_col): - """Test create_metadata_rec_num_col function.""" - with open('tests/fixtures/metadata_num_col.csv') as csvfile: - reader = csv.DictReader(csvfile) - metadata_rec = metadata.create_metadata_rec_num_col(next(reader), []) - assert metadata_rec == json_metadata_num_col[0]['metadata'] - - -def test_metadata_elems_from_row(): - """Test metadata_elems_from_row function.""" - row = {'title': 'Test title'} - metadata_elem = metadata.metadata_elems_from_row(row, 'dc.title', 'title', - 'en_US') - assert metadata_elem[0]['key'] == 'dc.title' - assert metadata_elem[0]['value'] == 'Test title' - assert metadata_elem[0]['language'] == 'en_US' - metadata_elem = metadata.metadata_elems_from_row(row, 'dc.title', 'title') - assert metadata_elem[0]['key'] == 'dc.title' - assert metadata_elem[0]['value'] == 'Test title' - assert 'language' not in metadata_elem[0] - row = {'title': ''} - metadata_elem = metadata.metadata_elems_from_row(row, 'dc.title', 'title') - assert metadata_elem == [] - row = {'title': 'Test title 1|Test title 2'} - metadata_elem = metadata.metadata_elems_from_row(row, 'dc.title', 'title', - 'en_US', '|') - assert metadata_elem[0]['key'] == 'dc.title' - assert metadata_elem[0]['value'] == 'Test title 1' - assert metadata_elem[0]['language'] == 'en_US' - assert metadata_elem[1]['key'] == 'dc.title' - assert metadata_elem[1]['value'] == 'Test title 2' - assert metadata_elem[1]['language'] == 'en_US' From e2bbc460cbfec8913e17229e0edbdabc4a2d0104 Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Fri, 23 Apr 2021 14:43:50 -0400 Subject: [PATCH 20/22] add readme (#21) * add readme * Update cli.py Walking back inadvertent changes from a rebase * Update README.md --- README.md | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++ dsaps/cli.py | 2 +- 2 files changed, 72 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index fecda7b..01e4fee 100644 --- a/README.md +++ b/README.md @@ -1 +1,72 @@ # dsaps + +This command line application provides several ways of interacting with the [DSpace](https://github.com/DSpace/DSpace) API. This application was written for DSpace 6.3, it has not been tested against other DSpace versions. + +## Installation +Clone the repository and install using [pipenv](https://github.com/pypa/pipenv): +``` +pipenv install +``` +After installation, run the application with: +``` +pipenv run dsaps +``` + +## Authentication + +To authenticate, use the following parameters + +Option (short) | Option (long)     | Description +------ | ------ | ----------- +N/A | --url | The DSpace API URL (e.g. https://dspace.mit.edu/rest), defaults to the DSPACE_URL environmental variable if nothing is specified +-e | --email | The email of the user for authentication. +-p | --password | The password for authentication. + +## Commands + +### additems +Adds items to a specified collection from a metadata CSV, a field mapping file, and a directory of files. May be run in conjunction with the newcollection CLI command. + +Option (short) | Option (long)             | Description +------ | ------ | ------- +-m | --metadata-csv | The path to the CSV file of metadata for the items. +-f | --field-map | The path to JSON field mapping file. +-d | --content-directory | The full path to the content, either a directory of files or a URL for the storage location. +-t | --file-type | The file type to be uploaded, if limited to one file type. +-r | --ingest-report| Create ingest report for updating other systems. +-c | --collection-handle | The handle of the collection to which items are being added. + + +#### Example Usage +``` +pipenv run dsaps --url https://dspace.com/rest -e abc@def.com -p ******** additems -m coll_metadata.csv -f config/aspace_mapping.json -d /files/pdfs -t pdf -r -c 111.1/111111 +``` + +### newcollection +Posts a new collection to a specified community. Used in conjunction with the additems CLI command to populate the new collection with items. + +Option (short) | Option (long)            | Description +------ | ------ | ------- +-c | --community-handle | The handle of the community in which to create the collection. +-n | --collection-name | The name of the collection to be created. + +#### Example Usage +``` +pipenv run dsaps --url https://dspace.com/rest -e abc@def.com -p ******** newcollection -c 222.2/222222 -n Test\ Collection additems -m coll_metadata.csv -f config/aspace_mapping.json -d /files/pdfs -t pdf -r +``` + +### reconcile +Runs a reconciliation of the specified files and metadata that produces reports of files with no metadata, metadata with no files, metadata matched to files, and an updated version of the metadata CSV with only the records that have matching files. + + +Option (short) | Option (long)             | Description +------ | ------ | ------- +-m | --metadata-csv | The path of the CSV file of metadata. +-o | --output-directory | The path of the output files, include / at the end of the path. +-d | --content-directory | The full path to the content, either a directory of files or a URL for the storage location. +-t | --file-type | The file type to be uploaded. + +#### Example Usage +``` +pipenv run dsaps --url https://dspace.com/rest -e abc@def.com -p ******** reconcile -m coll_metadata.csv -o /output -d /files/pdfs -t pdf +``` diff --git a/dsaps/cli.py b/dsaps/cli.py index ac79117..087fb6c 100644 --- a/dsaps/cli.py +++ b/dsaps/cli.py @@ -61,7 +61,7 @@ def main(ctx, url, email, password): @click.option('-m', '--metadata-csv', required=True, type=click.Path(exists=True, file_okay=True, dir_okay=False), help='The path to the CSV file of metadata for the items.') -@click.option('--field-map', required=True, +@click.option('-f', '--field-map', required=True, type=click.Path(exists=True, file_okay=True, dir_okay=False), help='The path to JSON field mapping file.') @click.option('-d', '--content-directory', required=True, From 30cd18c7f050d3aeead3c94fc2826ca1c6552ff0 Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Mon, 26 Apr 2021 10:53:19 -0400 Subject: [PATCH 21/22] aspace mapping changes --- config/aspace_mapping.json | 22 ++++++++++++++++++- dsaps/models.py | 4 +++- tests/conftest.py | 7 ------ tests/fixtures/aspace_metadata_delimited.csv | 6 ++--- .../fixtures/aspace_metadata_no_delimiter.csv | 4 ++-- tests/fixtures/metadata_delim.csv | 3 --- tests/fixtures/standard_mapping.json | 22 ------------------- tests/test_cli.py | 12 +++++----- tests/test_helpers.py | 8 +++---- tests/test_models.py | 12 +++++----- 10 files changed, 45 insertions(+), 55 deletions(-) delete mode 100644 tests/fixtures/metadata_delim.csv delete mode 100644 tests/fixtures/standard_mapping.json diff --git a/config/aspace_mapping.json b/config/aspace_mapping.json index 8572479..ea41e3b 100644 --- a/config/aspace_mapping.json +++ b/config/aspace_mapping.json @@ -9,9 +9,29 @@ "language": "en_US", "delimiter": "" }, - "dc.relation.isversionof": { + "source_system_identifier": { "csv_field_name": "uri", "language": null, "delimiter": "" + }, + "dc.contributor.author": { + "csv_field_name": "author", + "language": null, + "delimiter": "|" + }, + "dc.description": { + "csv_field_name": "description", + "language": "en_US", + "delimiter": "" + }, + "dc.rights": { + "csv_field_name": "rights_statement", + "language": "en_US", + "delimiter": "" + }, + "dc.rights.uri": { + "csv_field_name": "rights_uri", + "language": null, + "delimiter": "" } } \ No newline at end of file diff --git a/dsaps/models.py b/dsaps/models.py index 40100d0..a95f088 100644 --- a/dsaps/models.py +++ b/dsaps/models.py @@ -205,8 +205,10 @@ def from_row(cls, row, field_map): if f == 'file_identifier': file_identifier = field continue # file_identifier is not included in DSpace metadata - if f == 'dc.relation.isversionof': + if f == 'source_system_identifier': source_system_identifier = field + continue # source_system_identifier is not included in DSpace + # metadata delimiter = field_map[f]['delimiter'] language = field_map[f]['language'] if delimiter: diff --git a/tests/conftest.py b/tests/conftest.py index fddeb8a..1b53bde 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -48,13 +48,6 @@ def aspace_mapping(): yield mapping -@pytest.fixture() -def standard_mapping(): - with open('tests/fixtures/standard_mapping.json') as f: - mapping = json.load(f) - yield mapping - - @pytest.fixture() def output_dir(tmp_path): output_dir = tmp_path / 'output' diff --git a/tests/fixtures/aspace_metadata_delimited.csv b/tests/fixtures/aspace_metadata_delimited.csv index 8482b09..5a0bab2 100644 --- a/tests/fixtures/aspace_metadata_delimited.csv +++ b/tests/fixtures/aspace_metadata_delimited.csv @@ -1,3 +1,3 @@ -uri,title,file_identifier,authors -/repo/0/ao/456,Tast Item,tast,"Smith, John|Smith, Jane" -/repo/0/ao/123,Test Item,test,"Smith, Jane" \ No newline at end of file +uri,title,file_identifier,author,description,rights_statement,rights_uri +/repo/0/ao/456,Tast Item,tast,"Smith, John|Smith, Jane","More info at /repo/0/ao/456","Totally Free","http://free.gov" +/repo/0/ao/123,Test Item,test,"Smith, Jane","More info at /repo/0/ao/123","Totally Free","http://free.gov" \ No newline at end of file diff --git a/tests/fixtures/aspace_metadata_no_delimiter.csv b/tests/fixtures/aspace_metadata_no_delimiter.csv index a078983..d131dde 100644 --- a/tests/fixtures/aspace_metadata_no_delimiter.csv +++ b/tests/fixtures/aspace_metadata_no_delimiter.csv @@ -1,2 +1,2 @@ -uri,title,file_identifier,authors -/repo/0/ao/123,Test Item,test,"Smith, Jane" +uri,title,file_identifier,author,description,rights_statement,rights_uri +/repo/0/ao/123,Test Item,test,"Smith, Jane","More info at /repo/0/ao/123","Totally Free","http://free.gov" diff --git a/tests/fixtures/metadata_delim.csv b/tests/fixtures/metadata_delim.csv deleted file mode 100644 index 1d7fb4c..0000000 --- a/tests/fixtures/metadata_delim.csv +++ /dev/null @@ -1,3 +0,0 @@ -uri,title,file_identifier,authors -/repo/0/ao/123,Test Item,test,"Smith, Jane" -/repo/0/ao/456,Tast Item,tast,"Smith, John|Smith, Jane" diff --git a/tests/fixtures/standard_mapping.json b/tests/fixtures/standard_mapping.json deleted file mode 100644 index 4a5713a..0000000 --- a/tests/fixtures/standard_mapping.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "file_identifier": { - "csv_field_name": "file_identifier", - "language": null, - "delimiter": "" - }, - "dc.title": { - "csv_field_name": "title", - "language": "en_US", - "delimiter": "" - }, - "dc.relation.isversionof": { - "csv_field_name": "uri", - "language": null, - "delimiter": "" - }, - "dc.contributor.author": { - "csv_field_name": "authors", - "language": null, - "delimiter": "|" - } -} \ No newline at end of file diff --git a/tests/test_cli.py b/tests/test_cli.py index 937ba80..6fc9846 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -9,9 +9,8 @@ def test_additems(runner, input_dir): '--password', '1234', 'additems', '--metadata-csv', - 'tests/fixtures/metadata_delim.csv', - '--field-map', - 'tests/fixtures/standard_mapping.json', + 'tests/fixtures/aspace_metadata_delimited.csv', + '--field-map', 'config/aspace_mapping.json', '--content-directory', input_dir, '--file-type', 'pdf', '--collection-handle', '333.3333']) @@ -25,9 +24,8 @@ def test_additems(runner, input_dir): '--collection-name', 'Test Collection', 'additems', '--metadata-csv', - 'tests/fixtures/metadata_delim.csv', - '--field-map', - 'tests/fixtures/standard_mapping.json', + 'tests/fixtures/aspace_metadata_delimited.csv', + '--field-map', 'config/aspace_mapping.json', '--content-directory', input_dir, '--file-type', 'pdf']) assert result.exit_code == 0 @@ -53,7 +51,7 @@ def test_reconcile(runner, input_dir, output_dir): '--password', '1234', 'reconcile', '--metadata-csv', - 'tests/fixtures/metadata_delim.csv', + 'tests/fixtures/aspace_metadata_delimited.csv', '--output-directory', output_dir, '--content-directory', input_dir, '--file-type', 'pdf' diff --git a/tests/test_helpers.py b/tests/test_helpers.py index f00f9f5..78e0729 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -38,7 +38,7 @@ def test_create_ingest_report(runner, output_dir): def test_create_metadata_id_list(input_dir): """Test create_metadata_id_list function.""" - metadata_path = 'tests/fixtures/metadata_delim.csv' + metadata_path = 'tests/fixtures/aspace_metadata_delimited.csv' metadata_ids = helpers.create_metadata_id_list(metadata_path) assert 'test' in metadata_ids assert 'tast' in metadata_ids @@ -65,10 +65,10 @@ def test_match_metadata_to_files(): def test_update_metadata_csv(input_dir, output_dir): """Test update_metadata_csv function.""" metadata_matches = ['test'] - helpers.update_metadata_csv('tests/fixtures/metadata_delim.csv', + helpers.update_metadata_csv('tests/fixtures/aspace_metadata_delimited.csv', output_dir, metadata_matches) - with open(f'{output_dir}updated-metadata_delim.csv') as csvfile2: - reader = csv.DictReader(csvfile2) + with open(f'{output_dir}updated-aspace_metadata_delimited.csv') as csvfile: + reader = csv.DictReader(csvfile) for row in reader: assert row['uri'] == '/repo/0/ao/123' assert row['title'] == 'Test Item' diff --git a/tests/test_models.py b/tests/test_models.py index f9db33e..bd3eef4 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -121,15 +121,17 @@ def test_item_bitstreams_from_directory(input_dir): assert item.bitstreams[1].name == 'test_02.pdf' -def test_item_from_row(aspace_delimited_csv, standard_mapping): +def test_item_from_row(aspace_delimited_csv, aspace_mapping): row = next(aspace_delimited_csv) - item = models.Item.from_row(row, standard_mapping) + item = models.Item.from_row(row, aspace_mapping) assert attr.asdict(item)['metadata'] == [ {'key': 'dc.title', 'value': 'Tast Item', 'language': 'en_US'}, - {'key': 'dc.relation.isversionof', 'value': '/repo/0/ao/456', - 'language': None}, {'key': 'dc.contributor.author', 'value': 'Smith, John', 'language': None}, {'key': 'dc.contributor.author', 'value': 'Smith, Jane', - 'language': None} + 'language': None}, + {'key': 'dc.description', 'value': 'More info at /repo/0/ao/456', + 'language': 'en_US'}, + {'key': 'dc.rights', 'value': 'Totally Free', 'language': 'en_US'}, + {'key': 'dc.rights.uri', 'value': 'http://free.gov', 'language': None} ] From 263d8932dc802770170526702c0f608be16b6438 Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Tue, 27 Apr 2021 10:48:42 -0400 Subject: [PATCH 22/22] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 01e4fee..be61a0e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # dsaps -This command line application provides several ways of interacting with the [DSpace](https://github.com/DSpace/DSpace) API. This application was written for DSpace 6.3, it has not been tested against other DSpace versions. +This command line application provides several ways of interacting with the [DSpace](https://github.com/DSpace/DSpace) API. This application was written for DSpace 6.3, it has not been tested against other DSpace versions. Previously, this branch of the repository was a set of self-contained scripts that could be run independently, those scripts can be found as a [release](https://github.com/MITLibraries/dspace-api-python-scripts/releases/tag/v1.0). ## Installation Clone the repository and install using [pipenv](https://github.com/pypa/pipenv):