Skip to content

Commit

Permalink
Merge 8dc81d8 into 2f3cb47
Browse files Browse the repository at this point in the history
  • Loading branch information
deepakunni3 authored Apr 15, 2020
2 parents 2f3cb47 + 8dc81d8 commit 7fe83b2
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 12 deletions.
19 changes: 9 additions & 10 deletions kg_covid_19/load_utils/merge_kg.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import logging
from typing import Dict, List
import yaml

import networkx as nx
from kgx import Transformer, NeoTransformer
from kgx.cli.utils import get_file_types, get_transformer

from kgx.operations.graph_merge import GraphMerge

def parse_load_config(yaml_file: str) -> Dict:
"""Parse load config YAML.
Expand All @@ -21,16 +21,17 @@ def parse_load_config(yaml_file: str) -> Dict:
return config


def load_and_merge(yaml_file: str) -> Transformer:
def load_and_merge(yaml_file: str) -> nx.MultiDiGraph:
"""Load and merge sources defined in the config YAML.
Args:
yaml_file: A string pointing to a KGX compatible config YAML.
Returns:
kgx.Transformer: The merged transformer that contains the merged graph.
networkx.MultiDiGraph: The merged graph.
"""
gm = GraphMerge()
config = parse_load_config(yaml_file)
transformers: List = []

Expand All @@ -52,19 +53,17 @@ def load_and_merge(yaml_file: str) -> Transformer:
logging.error("type {} not yet supported".format(target['type']))

# merge all subgraphs into a single graph
merged_transformer = Transformer()
merged_transformer.merge_graphs([x.graph for x in transformers])
merged_transformer.report()
merged_graph = gm.merge_all_graphs([x.graph for x in transformers])

# write the merged graph
if 'destination' in config:
destination = config['destination']
if destination['type'] in ['csv', 'tsv', 'ttl', 'json', 'tar']:
destination_transformer = get_transformer(destination['type'])(merged_transformer.graph)
destination_transformer = get_transformer(destination['type'])(merged_graph)
destination_transformer.save(destination['filename'], extension=destination['type'])
elif destination['type'] == 'neo4j':
destination_transformer = NeoTransformer(
merged_transformer.graph,
merged_graph,
uri=destination['uri'],
username=destination['username'],
password=destination['password']
Expand All @@ -73,4 +72,4 @@ def load_and_merge(yaml_file: str) -> Transformer:
else:
logging.error("type {} not yet supported for KGX load-and-merge operation.".format(destination['type']))

return merged_transformer
return merged_graph
5 changes: 3 additions & 2 deletions kg_covid_19/transform_utils/scibite_cord/scibite_cord.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,10 @@ def parse_annotation_doc(self, node_handle, edge_handle, doc: Dict, subset: str
"""
terms = set()
paper_id = doc['paper_id']

title = None
if 'metadata' in doc:
metadata = doc['metadata']
title = metadata['title'].replace('\n', ' ')
# extract hits from metadata
terms.update(self.extract_termite_hits(metadata))

Expand All @@ -125,7 +126,7 @@ def parse_annotation_doc(self, node_handle, edge_handle, doc: Dict, subset: str
header=self.node_header,
data=[
f"CORD:{paper_id}",
f"{metadata['title']}",
f"{title}",
"biolink:Publication",
""
]
Expand Down

0 comments on commit 7fe83b2

Please sign in to comment.