Skip to content

Commit

Permalink
Simplify function parsing for input files/directories, delete unit te…
Browse files Browse the repository at this point in the history
…st for empty directory
  • Loading branch information
Qui T Chau committed Jul 29, 2020
1 parent d104b69 commit 4a328bf
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 41 deletions.
62 changes: 28 additions & 34 deletions pds_doi_core/actions/draft.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,30 +54,35 @@ def _resolve_input_into_list_of_names(self, input):
"""Function receives an input which can be one name or a list of names separated by a comma or a directory. Function will return a list of names.
"""
o_list_of_names = []
if ',' in input:
# Get names separated by comma.
split_tokens = input.split(',')
for token in split_tokens:
# Only save the file name if it is not an empty string as in the case of a comma being the last character:
# -i https://pds-imaging.jpl.nasa.gov/data/nsyt/insight_cameras/data/collection_data.xml,
# or no name provided with just a comma:
# -i ,
if len(token) > 0:

# Split the input using a comma, then inspect each token to check if it is a directory, a filename or a URL.
split_tokens = input.split(',')
for token in split_tokens:
# Only save the file name if it is not an empty string as in the case of a comma being the last character:
# -i https://pds-imaging.jpl.nasa.gov/data/nsyt/insight_cameras/data/collection_data.xml,
# or no name provided with just a comma:
# -i ,
if len(token) > 0:
if os.path.isdir(token):
# Get all file names in a directory.
# Note that the top level directory needs to precede the file name in the for loop.
list_of_names_from_token = [os.path.join(token,f) for f in os.listdir(token) if os.path.isfile(os.path.join(token, f))]

o_list_of_names.extend(list_of_names_from_token)
elif os.path.isfile(token):
# The token is the name of a file, add it.
o_list_of_names.append(token)
elif os.path.isdir(input):
# Get all file names in a directory.
# Note that the top level directory needs to precede the file name in the for loop.
o_list_of_names = [os.path.join(input,f) for f in os.listdir(input) if os.path.isfile(os.path.join(input, f))]
else:
# Get just one name.
o_list_of_names.append(input)
else:
# The token is a URL, add it.
o_list_of_names.append(token)

return o_list_of_names

def _transform_pds4_label_into_osti_record(self, input_file, node, submitter, contributor_value):
"""Function receives an XML input file and transform it into an OSTI record.
"""

o_transformed_label = None
o_transformed_label = etree.Element("records") # If the file cannot be transformed, an XML text of an empty tree will be returned.

# parse input_file
if not input_file.startswith('http'):
Expand All @@ -86,14 +91,12 @@ def _transform_pds4_label_into_osti_record(self, input_file, node, submitter, co
xml_tree = etree.parse(input_file)
else:
logger.warn(f"Expecting .xml files only, encountering {input_file}")
return o_transformed_label
return etree.tostring(o_transformed_label).decode()
else:
# A URL gets read into memory.
response = requests.get(input_file)
xml_tree = etree.fromstring(response.content)

o_transformed_label = etree.Element("record")

doi_fields = self.m_doi_pds4_label.get_doi_fields_from_pds4(xml_tree)
doi_fields['publisher'] = self._config.get('OTHER', 'doi_publisher')
doi_fields['contributor'] = contributor_value
Expand Down Expand Up @@ -150,13 +153,6 @@ def run(self,

# The value of input can be a list of names, or a directory. Resolve that to a list of names.
list_of_names = self._resolve_input_into_list_of_names(input)
# Return immediately if there are no files found.
# Note that exit() function is not used as it would interfere with the unit test.
if len(list_of_names) == 0:
logger.error(f"Provided input parameter results in no file names parsed or input directory {input} is empty.")
# Return empty tree.
o_doi_label = etree.Element("records")
return etree.tostring(o_doi_label,pretty_print=True).decode()

# Create an empty tree with 'records' as the root tag.
# An element will be added from the output of each file parsed.
Expand All @@ -170,13 +166,11 @@ def run(self,
# Transform the PDS4 label to an OSTI record.
doi_label = self._transform_pds4_label_into_osti_record(input_file,node,submitter,contributor_value)

# The returned label can be None if it is not an expected XML file.
if doi_label:
# Concatenate each label to o_doi_labels to return.
doc = etree.fromstring(doi_label.encode())
for element in doc.iter():
if element.tag == 'record': # OSTI uses 'record' tag for each record.
o_doi_labels.append(copy.copy(element)) # Add the 'record' element to an empty tree the first time.
# Concatenate each label to o_doi_labels to return.
doc = etree.fromstring(doi_label.encode())
for element in doc.iter():
if element.tag == 'record': # OSTI uses 'record' tag for each record.
o_doi_labels.append(copy.copy(element)) # Add the 'record' element to an empty tree the first time.

# end for input_file in list_of_names:

Expand Down
7 changes: 0 additions & 7 deletions pds_doi_core/actions/test/draft_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,6 @@
class MyTestCase(unittest.TestCase):
_action = DOICoreActionDraft()

def test_local_dir_empty(self):
logger.info("test local dir empty")
osti_doi = self._action.run(input='input/draft_dir_zero_file',
node='img',
submitter='my_user@my_node.gov')
logger.info(osti_doi)

def test_local_dir_one_file(self):
logger.info("test local dir with one file")
osti_doi = self._action.run(input='input/draft_dir_one_file',
Expand Down

0 comments on commit 4a328bf

Please sign in to comment.