Skip to content

Commit 2fd06d3

Browse files
committed
PR updates
1 parent f28c217 commit 2fd06d3

File tree

4 files changed

+39
-31
lines changed

4 files changed

+39
-31
lines changed

dsaps/cli.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,16 @@
1313

1414
logger = structlog.get_logger()
1515

16+
cwd = os.getcwd()
17+
18+
19+
def validate_path(ctx, param, value):
20+
"""Validates th formatting of The submitted path"""
21+
if value[-1] == '/':
22+
return value
23+
else:
24+
raise click.BadParameter('Include / at the end of the path.')
25+
1626

1727
@click.group(chain=True)
1828
@click.option('--url', envvar='DSPACE_URL', required=True,)
@@ -51,11 +61,13 @@ def main(ctx, url, email, password):
5161

5262
@main.command()
5363
@click.option('-m', '--metadata-csv', required=True,
54-
type=click.Path(exists=True),
64+
type=click.Path(exists=True, file_okay=True, dir_okay=False),
5565
help='The path to the CSV file of metadata for the items.')
56-
@click.option('--field-map', required=True, type=click.Path(exists=True),
66+
@click.option('--field-map', required=True,
67+
type=click.Path(exists=True, file_okay=True, dir_okay=False),
5768
help='The path to JSON field mapping file.')
5869
@click.option('-d', '--content-directory', required=True,
70+
type=click.Path(exists=True, dir_okay=True, file_okay=False),
5971
help='The full path to the content, either a directory of files '
6072
'or a URL for the storage location.')
6173
@click.option('-t', '--file-type',
@@ -116,15 +128,19 @@ def newcollection(ctx, community_handle, collection_name):
116128

117129
@main.command()
118130
@click.option('-m', '--metadata-csv', required=True,
131+
type=click.Path(exists=True, file_okay=True, dir_okay=False),
119132
help='The path of the CSV file of metadata.')
120-
@click.option('-o', '--output-directory', default='', required=True,
133+
@click.option('-o', '--output-directory',
134+
type=click.Path(exists=True, dir_okay=True, file_okay=False),
135+
default=f'{cwd}/', callback=validate_path,
121136
help='The path of the output files, include / at the end of the '
122137
'path.')
123138
@click.option('-d', '--content-directory', required=True,
124139
help='The full path to the content, either a directory of files '
125140
'or a URL for the storage location.')
126-
@click.option('-t', '--file-type', required=True,
127-
help='The file type to be uploaded.')
141+
@click.option('-t', '--file-type',
142+
help='The file type to be uploaded, if limited to one file '
143+
'type.', default='*')
128144
def reconcile(metadata_csv, output_directory, content_directory, file_type):
129145
"""Runs a reconciliation of the specified files and metadata that produces
130146
reports of files with no metadata, metadata with no files, metadata

dsaps/helpers.py

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,9 @@ def create_csv_from_list(list_name, output):
1313

1414

1515
def create_file_list(file_path, file_type):
16-
"""Creates a dict of file IDs and file paths."""
16+
"""Creates a list of file names."""
1717
files = glob.glob(f'{file_path}/**/*.{file_type}', recursive=True)
18-
file_list = []
19-
for file in files:
20-
file_list.append(os.path.basename(file))
18+
file_list = [os.path.basename(file) for file in files]
2119
return file_list
2220

2321

@@ -37,37 +35,32 @@ def create_metadata_id_list(metadata_csv):
3735
metadata_ids = []
3836
with open(metadata_csv) as csvfile:
3937
reader = csv.DictReader(csvfile)
40-
for row in [r for r in reader if r['file_identifier'] != '']:
41-
metadata_ids.append(row['file_identifier'])
38+
metadata_ids = [row['file_identifier'] for row in reader
39+
if row['file_identifier'] != '']
4240
return metadata_ids
4341

4442

4543
def match_files_to_metadata(file_list, metadata_ids):
4644
"""Creates a list of files matched to metadata records."""
47-
file_matches = []
48-
for file_id in file_list:
49-
for metadata_id in [m for m in metadata_ids
50-
if file_id.startswith(m)]:
51-
file_matches.append(file_id)
45+
file_matches = [file_id for metadata_id in metadata_ids
46+
for file_id in file_list
47+
if file_id.startswith(metadata_id)]
5248
return file_matches
5349

5450

5551
def match_metadata_to_files(file_list, metadata_ids):
5652
"""Creates a list of metadata records matched to files."""
57-
metadata_matches = []
58-
for metadata_id in metadata_ids:
59-
for file_id in [f for f in file_list
60-
if f.startswith(metadata_id)]:
61-
metadata_matches.append(metadata_id)
53+
metadata_matches = [metadata_id for f in file_list for metadata_id in
54+
metadata_ids if f.startswith(metadata_id)]
6255
return metadata_matches
6356

6457

65-
def update_metadata_csv(metadata_csv, output_path, metadata_matches):
58+
def update_metadata_csv(metadata_csv, output_directory, metadata_matches):
6659
"""Creates an updated CSV of metadata records with matching files."""
6760
with open(metadata_csv) as csvfile:
6861
reader = csv.DictReader(csvfile)
6962
upd_md_file_name = f'updated-{os.path.basename(metadata_csv)}'
70-
with open(f'{output_path}{upd_md_file_name}', 'w') as updated_csv:
63+
with open(f'{output_directory}{upd_md_file_name}', 'w') as updated_csv:
7164
writer = csv.DictWriter(updated_csv, fieldnames=reader.fieldnames)
7265
writer.writeheader()
7366
for row in reader:

tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def runner():
6868

6969

7070
@pytest.fixture(autouse=True)
71-
def web_mock(input_dir):
71+
def web_mock():
7272
with requests_mock.Mocker() as m:
7373
cookies = {'JSESSIONID': '11111111'}
7474
m.post('mock://example.com/login', cookies=cookies)

tests/test_helpers.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def test_create_csv_from_list(output_dir):
1515

1616

1717
def test_create_file_list(input_dir):
18-
"""Test create_file_dict function."""
18+
"""Test create_file_listt function."""
1919
file_list = helpers.create_file_list(input_dir, 'pdf')
2020
for file_id in ['test_02.pdf', 'test_01.pdf', 'best_01.pdf']:
2121
assert file_id in file_list
@@ -46,19 +46,18 @@ def test_create_metadata_id_list(input_dir):
4646

4747
def test_match_files_to_metadata():
4848
"""Test match_files_to_metadata function."""
49-
file_dict = {'test_01': 'files/test_01.pdf'}
49+
file_list = ['test_01.pdf']
5050
metadata_ids = ['test', 'tast']
51-
file_matches = helpers.match_files_to_metadata(file_dict, metadata_ids)
51+
file_matches = helpers.match_files_to_metadata(file_list, metadata_ids)
5252
assert len(file_matches) == 1
53-
assert 'test_01' in file_matches
53+
assert 'test_01.pdf' in file_matches
5454

5555

5656
def test_match_metadata_to_files():
5757
"""Test match_metadata_to_files function."""
58-
file_dict = {'test_01': 'files/test_01.pdf',
59-
'tast_01': 'files/tast_01.pdf'}
58+
file_list = ['test_01.pdf', 'tast_01.pdf']
6059
metadata_ids = ['test']
61-
file_matches = helpers.match_metadata_to_files(file_dict, metadata_ids)
60+
file_matches = helpers.match_metadata_to_files(file_list, metadata_ids)
6261
assert len(file_matches) == 1
6362
assert 'test' in file_matches
6463

0 commit comments

Comments
 (0)