Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 51 additions & 23 deletions dsaps/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@
logger = structlog.get_logger()


def validate_path(ctx, param, value):
"""Validates th formatting of The submitted path"""
if value[-1] == '/':
return value
else:
raise click.BadParameter('Include / at the end of the path.')


@click.group(chain=True)
@click.option('--url', envvar='DSPACE_URL', required=True,)
@click.option('-e', '--email', envvar='TEST_EMAIL', required=True,
Expand Down Expand Up @@ -51,11 +59,13 @@ def main(ctx, url, email, password):

@main.command()
@click.option('-m', '--metadata-csv', required=True,
type=click.Path(exists=True),
help='The full path to the CSV file of metadata for the items.')
@click.option('--field-map', required=True, type=click.Path(exists=True),
help='Path to JSON field mapping file')
@click.option('-d', '--directory', required=True,
type=click.Path(exists=True, file_okay=True, dir_okay=False),
help='The path to the CSV file of metadata for the items.')
@click.option('--field-map', required=True,
type=click.Path(exists=True, file_okay=True, dir_okay=False),
help='The path to JSON field mapping file.')
@click.option('-d', '--content-directory', required=True,
type=click.Path(exists=True, dir_okay=True, file_okay=False),
help='The full path to the content, either a directory of files '
'or a URL for the storage location.')
@click.option('-t', '--file-type',
Expand All @@ -67,11 +77,11 @@ def main(ctx, url, email, password):
help='The handle of the collection to which items are being '
'added.', default=None)
@click.pass_context
def additems(ctx, metadata_csv, field_map, directory, file_type, ingest_report,
collection_handle):
def additems(ctx, metadata_csv, field_map, content_directory, file_type,
ingest_report, collection_handle):
"""Adds items to a specified collection from a metadata CSV, a field
mapping file, and a directory of files. May be run in conjunction with the
newcollection CLI commands."""
newcollection CLI command."""
client = ctx.obj['client']
start_time = ctx.obj['start_time']
if 'collection_uuid' not in ctx.obj and collection_handle is None:
Expand All @@ -87,7 +97,7 @@ def additems(ctx, metadata_csv, field_map, directory, file_type, ingest_report,
mapping = json.load(jsonfile)
collection = Collection.from_csv(metadata, mapping)
for item in collection.items:
item.bitstreams_from_directory(directory, file_type)
item.bitstreams_from_directory(content_directory, file_type)
collection.uuid = collection_uuid
items = collection.post_items(client)
if ingest_report:
Expand All @@ -114,20 +124,38 @@ def newcollection(ctx, community_handle, collection_name):
ctx.obj['collection_uuid'] = collection_uuid


# @main.command()
# @click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file',
# help='The path of the CSV file of metadata.')
# @click.option('-o', '--output_path', prompt='Enter the output path',
# default='', help='The path of the output files, include '
# '/ at the end of the path')
# @click.option('-f', '--file_path', prompt='Enter the path',
# help='The path of the content, a URL or local drive path.'
# 'Include / at the end of a local drive path.')
# @click.option('-t', '--file_type', prompt='Enter the file type',
# help='The file type to be uploaded.')
# def reconcile(metadata_csv, file_path, file_type, output_path):
# workflows.reconcile_files_and_metadata(metadata_csv, output_path,
# file_path, file_type)
@main.command()
@click.option('-m', '--metadata-csv', required=True,
type=click.Path(exists=True, file_okay=True, dir_okay=False),
help='The path of the CSV file of metadata.')
@click.option('-o', '--output-directory',
type=click.Path(exists=True, file_okay=False),
default=f'{os.getcwd()}/', callback=validate_path,
help='The path of the output files, include / at the end of the '
'path.')
@click.option('-d', '--content-directory', required=True,
help='The full path to the content, either a directory of files '
'or a URL for the storage location.')
@click.option('-t', '--file-type',
help='The file type to be uploaded, if limited to one file '
'type.', default='*')
def reconcile(metadata_csv, output_directory, content_directory, file_type):
"""Runs a reconciliation of the specified files and metadata that produces
reports of files with no metadata, metadata with no files, metadata
matched to files, and an updated version of the metadata CSV with only
the records that have matching files."""
file_ids = helpers.create_file_list(content_directory, file_type)
metadata_ids = helpers.create_metadata_id_list(metadata_csv)
metadata_matches = helpers.match_metadata_to_files(file_ids, metadata_ids)
file_matches = helpers.match_files_to_metadata(file_ids, metadata_ids)
no_files = set(metadata_ids) - set(metadata_matches)
no_metadata = set(file_ids) - set(file_matches)
helpers.create_csv_from_list(no_metadata, f'{output_directory}no_metadata')
helpers.create_csv_from_list(no_files, f'{output_directory}no_files')
helpers.create_csv_from_list(metadata_matches,
f'{output_directory}metadata_matches')
helpers.update_metadata_csv(metadata_csv, output_directory,
metadata_matches)


if __name__ == '__main__':
Expand Down
43 changes: 15 additions & 28 deletions dsaps/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,6 @@
import glob
import os

import structlog


logger = structlog.get_logger()


def create_csv_from_list(list_name, output):
"""Creates CSV file from list content."""
Expand All @@ -17,14 +12,11 @@ def create_csv_from_list(list_name, output):
writer.writerow([item])


def create_file_dict(file_path, file_type):
"""Creates a dict of file IDs and file paths."""
def create_file_list(file_path, file_type):
"""Creates a list of file names."""
files = glob.glob(f'{file_path}/**/*.{file_type}', recursive=True)
file_dict = {}
for file in files:
file_name = os.path.splitext(os.path.basename(file))[0]
file_dict[file_name] = file
return file_dict
file_list = [os.path.basename(file) for file in files]
return file_list


def create_ingest_report(items, file_name):
Expand All @@ -43,37 +35,32 @@ def create_metadata_id_list(metadata_csv):
metadata_ids = []
with open(metadata_csv) as csvfile:
reader = csv.DictReader(csvfile)
for row in [r for r in reader if r['file_identifier'] != '']:
metadata_ids.append(row['file_identifier'])
metadata_ids = [row['file_identifier'] for row in reader
if row['file_identifier'] != '']
return metadata_ids


def match_files_to_metadata(file_dict, metadata_ids):
def match_files_to_metadata(file_list, metadata_ids):
"""Creates a list of files matched to metadata records."""
file_matches = []
for file_id, v in file_dict.items():
for metadata_id in [m for m in metadata_ids
if file_id.startswith(m)]:
file_matches.append(file_id)
file_matches = [file_id for metadata_id in metadata_ids
for file_id in file_list
if file_id.startswith(metadata_id)]
return file_matches


def match_metadata_to_files(file_dict, metadata_ids):
def match_metadata_to_files(file_list, metadata_ids):
"""Creates a list of metadata records matched to files."""
metadata_matches = []
for metadata_id in metadata_ids:
for file_id in [f for f in file_dict
if f.startswith(metadata_id)]:
metadata_matches.append(metadata_id)
metadata_matches = [metadata_id for f in file_list for metadata_id in
metadata_ids if f.startswith(metadata_id)]
return metadata_matches


def update_metadata_csv(metadata_csv, output_path, metadata_matches):
def update_metadata_csv(metadata_csv, output_directory, metadata_matches):
"""Creates an updated CSV of metadata records with matching files."""
with open(metadata_csv) as csvfile:
reader = csv.DictReader(csvfile)
upd_md_file_name = f'updated-{os.path.basename(metadata_csv)}'
with open(f'{output_path}{upd_md_file_name}', 'w') as updated_csv:
with open(f'{output_directory}{upd_md_file_name}', 'w') as updated_csv:
writer = csv.DictWriter(updated_csv, fieldnames=reader.fieldnames)
writer.writeheader()
for row in reader:
Expand Down
78 changes: 0 additions & 78 deletions dsaps/metadata.py

This file was deleted.

20 changes: 2 additions & 18 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pytest
import requests_mock

from dsaps import metadata, models
from dsaps import models


@pytest.fixture()
Expand Down Expand Up @@ -41,14 +41,6 @@ def aspace_delimited_csv():
yield reader


@pytest.fixture()
def json_metadata_delim():
json_metadata = metadata.create_json_metadata(
'tests/fixtures/metadata_delim.csv', 'delimited'
)
return json_metadata


@pytest.fixture()
def aspace_mapping():
with open('config/aspace_mapping.json') as f:
Expand All @@ -63,14 +55,6 @@ def standard_mapping():
yield mapping


@pytest.fixture()
def json_metadata_num_col():
json_metadata = metadata.create_json_metadata(
'tests/fixtures/metadata_num_col.csv', 'num_columns'
)
return json_metadata


@pytest.fixture()
def output_dir(tmp_path):
output_dir = tmp_path / 'output'
Expand All @@ -84,7 +68,7 @@ def runner():


@pytest.fixture(autouse=True)
def web_mock(input_dir):
def web_mock():
with requests_mock.Mocker() as m:
cookies = {'JSESSIONID': '11111111'}
m.post('mock://example.com/login', cookies=cookies)
Expand Down
31 changes: 16 additions & 15 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def test_additems(runner, input_dir):
'tests/fixtures/metadata_delim.csv',
'--field-map',
'tests/fixtures/standard_mapping.json',
'--directory', input_dir,
'--content-directory', input_dir,
'--file-type', 'pdf',
'--collection-handle', '333.3333'])
assert result.exit_code == 0
Expand All @@ -28,7 +28,7 @@ def test_additems(runner, input_dir):
'tests/fixtures/metadata_delim.csv',
'--field-map',
'tests/fixtures/standard_mapping.json',
'--directory', input_dir,
'--content-directory', input_dir,
'--file-type', 'pdf'])
assert result.exit_code == 0

Expand All @@ -45,16 +45,17 @@ def test_newcollection(runner, input_dir):
assert result.exit_code == 0


# def test_reconcile(runner, input_dir, output_dir):
# """Test reconcile command."""
# result = runner.invoke(main,
# ['--url', 'mock://example.com/',
# '--email', 'test@test.mock',
# '--password', '1234',
# 'reconcile',
# '--metadata_csv', 'tests/fixtures/metadata_delim.csv',
# '--file_path', input_dir,
# '--file_type', 'pdf',
# '--output_path', output_dir
# ])
# assert result.exit_code == 0
def test_reconcile(runner, input_dir, output_dir):
"""Test reconcile command."""
result = runner.invoke(main,
['--url', 'mock://example.com/',
'--email', 'test@test.mock',
'--password', '1234',
'reconcile',
'--metadata-csv',
'tests/fixtures/metadata_delim.csv',
'--output-directory', output_dir,
'--content-directory', input_dir,
'--file-type', 'pdf'
])
assert result.exit_code == 0
Loading