From fc960836f88a1c22fa37496385bbfc860aff0c81 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Fri, 24 Jul 2020 17:47:05 -0400 Subject: [PATCH] Fix bug preventing files from being processed in SciBite ingest --- .../transform_utils/scibite_cord/scibite_cord.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/kg_covid_19/transform_utils/scibite_cord/scibite_cord.py b/kg_covid_19/transform_utils/scibite_cord/scibite_cord.py index 6615332f..ceec029f 100644 --- a/kg_covid_19/transform_utils/scibite_cord/scibite_cord.py +++ b/kg_covid_19/transform_utils/scibite_cord/scibite_cord.py @@ -19,6 +19,7 @@ 'WD': 'http://www.wikidata.org/entity/' } + class ScibiteCordTransform(Transform): """ ScibiteCordTransform parses the SciBite annotations on CORD-19 dataset @@ -94,14 +95,12 @@ def parse_annotations(self, node_handle: Any, edge_handle: Any, subsets = ['pmc_json', 'pdf_json'] for subset in subsets: subset_dir = os.path.join(self.input_base_dir, subset) - for data_dir in os.listdir(subset_dir): - if os.path.isdir(os.path.join(subset_dir, data_dir)): - for filename in os.listdir(os.path.join(subset_dir, data_dir)): - file = os.path.join(subset_dir, data_dir, filename) - doc = json.load(open(file)) - self.parse_annotation_doc(node_handle, edge_handle, doc, subset) - - def parse_annotation_doc(self, node_handle, edge_handle, doc: Dict, subset: str = None) -> None: + for filename in os.listdir(subset_dir): + file = os.path.join(subset_dir, filename) + doc = json.load(open(file)) + self.parse_annotation_doc(node_handle, edge_handle, doc) + + def parse_annotation_doc(self, node_handle, edge_handle, doc: Dict) -> None: """Parse a JSON document corresponding to a publication. Args: