MITLibraries · ehanson8 · Feb 12, 2020 · Jan 18, 2020
diff --git a/dsaps/cli.py b/dsaps/cli.py
@@ -1,3 +1,4 @@
+import csv
 import datetime
 import glob
 import json
@@ -105,5 +106,44 @@ def newcoll(ctx, comm_handle, coll_name, metadata, file_path, file_type,
     models.elapsed_time(start_time, 'Total runtime:')
 
 
+@main.command()
+@click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file',
+              help='The path of the CSV file of metadata.')
+@click.option('-f', '--file_path', prompt='Enter the path',
+              help='The path of the content, a URL or local drive path.')
+@click.option('-t', '--file_type', prompt='Enter the file type',
+              help='The file type to be uploaded.')
+def reconcile(metadata_csv, file_path, file_type):
+    if file_path.startswith('http'):
+        file_dict = models.build_file_dict_remote(file_path, file_type, {})
+    else:
+        files = glob.glob(f'{file_path}/**/*.{file_type}', recursive=True)
+        for file in files:
+            file_name = os.path.splitext(os.path.basename(file))[0]
+            file_dict[file_name] = file
+    metadata_ids = []
+    with open(metadata_csv) as csvfile:
+        reader = csv.DictReader(csvfile)
+        for row in reader:
+            value = row['file_identifier']
+            metadata_ids.append(value)
+    file_matches = []
+    file_ids = []
+    for file_id, v in file_dict.items():
+        file_ids.append(file_id)
+        for metadata_id in [m for m in metadata_ids if file_id == m]:
+            file_matches.append(file_id)
+    metadata_matches = []
+    for metadata_id in metadata_ids:
+        for file_id in file_dict:
+            if file_id == metadata_id:
+                metadata_matches.append(metadata_id)
+    no_files = set(metadata_ids) - set(metadata_matches)
+    no_metadata = set(file_ids) - set(file_matches)
+    models.create_csv_from_list(no_metadata, 'no_metadata.csv')
+    models.create_csv_from_list(no_files, 'no_files.csv')
+    models.create_csv_from_list(metadata_matches, 'metadata_matches.csv')
+
+
 if __name__ == '__main__':
     main()
diff --git a/dsaps/models.py b/dsaps/models.py
@@ -1,3 +1,4 @@
+import csv
 import datetime
 from functools import partial
 import operator
@@ -197,6 +198,15 @@ def build_file_dict_remote(directory_url, file_type, file_dict):
     return file_dict
 
 
+def create_csv_from_list(list_name, output):
+    """Creates CSV file from list content."""
+    with open(output, 'w') as f:
+        writer = csv.writer(f)
+        writer.writerow(['id'])
+        for item in list_name:
+            writer.writerow([item])
+
+
 def elapsed_time(start_time, label):
     """Calculate elapsed time."""
     td = datetime.timedelta(seconds=time.time() - start_time)

diff --git a/tests/test_models.py b/tests/test_models.py
@@ -25,7 +25,7 @@ def sample_content(tmp_path):
 
 
 def test_authenticate(client):
-    """Test authenticate function."""
+    """Test authenticate method."""
     with requests_mock.Mocker() as m:
         email = 'test@test.mock'
         password = '1234'
@@ -39,7 +39,7 @@ def test_authenticate(client):
 
 
 def test_get_record(client):
-    """Test get_record function."""
+    """Test get_record method."""
     with requests_mock.Mocker() as m:
         uri = 'mock://example.com/items/123?expand=all'
         json_object = {'metadata': {'title': 'Sample title'}, 'type': 'item'}
@@ -49,7 +49,7 @@ def test_get_record(client):
 
 
 def test_filtered_item_search(client):
-    """Test filtered_item_search function."""
+    """Test filtered_item_search method."""
     with requests_mock.Mocker() as m:
         key = 'dc.title'
         string = 'test'
@@ -65,7 +65,7 @@ def test_filtered_item_search(client):
 
 
 def test_post_coll_to_comm(client):
-    """Test post_coll_to_comm function."""
+    """Test post_coll_to_comm method."""
     with requests_mock.Mocker() as m:
         comm_handle = '1234'
         coll_name = 'Test Collection'
@@ -79,7 +79,7 @@ def test_post_coll_to_comm(client):
 
 
 def test_post_items_to_coll(client, sample_content):
-    """Test post_items_to_coll function."""
+    """Test post_items_to_coll method."""
     with requests_mock.Mocker() as m:
         coll_metadata = [{"metadata": [
                          {"key": "file_identifier",
@@ -102,7 +102,7 @@ def test_post_items_to_coll(client, sample_content):
 
 
 def test_post_bitstreams_to_item(client, sample_content):
-    """Test post_bitstreams_to_item function."""
+    """Test post_bitstreams_to_item method."""
     with requests_mock.Mocker() as m:
         item_id = 'a1b2'
         ingest_type = 'local'
@@ -118,7 +118,7 @@ def test_post_bitstreams_to_item(client, sample_content):
 
 
 def test__pop_inst(client):
-    """Test _pop_inst function."""
+    """Test _pop_inst method."""
     class_type = models.Collection
     rec_obj = {'name': 'Test title', 'type': 'collection', 'items': []}
     rec_obj = client._pop_inst(class_type, rec_obj)
@@ -127,7 +127,7 @@ def test__pop_inst(client):
 
 
 def test__build_uuid_list(client):
-    """Test _build_uuid_list function."""
+    """Test _build_uuid_list method."""
     rec_obj = {'items': [{'uuid': '1234'}]}
     children = 'items'
     child_list = client._build_uuid_list(rec_obj, children)
@@ -149,3 +149,9 @@ def test_build_file_dict_remote():
         file_list = models.build_file_dict_remote(directory_url, file_type,
                                                   file_dict)
         assert '999' in file_list
+
+
+# # How to test this? Applies to asaps as well
+# def test_create_csv_from_list():
+#     """Test create_csv_from_list function."""
+#     assert False