Skip to content

Commit

Permalink
Preserve columns from input files when making edges
Browse files Browse the repository at this point in the history
  • Loading branch information
justaddcoffee committed Jul 5, 2020
1 parent c670fd9 commit f40561f
Showing 1 changed file with 8 additions and 2 deletions.
10 changes: 8 additions & 2 deletions kg_covid_19/edges.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@

def make_edges(nodes: str, edges: str, output_dir: str,
train_fraction: float, validation: bool,
min_degree: int, check_disconnected_nodes: bool = False) -> None:
min_degree: int, check_disconnected_nodes: bool = False,
remove_extra_cols: bool = False) -> None:
"""Prepare positive and negative edges for testing and training (see run.py edges
command for documentation)
Expand All @@ -25,12 +26,17 @@ def make_edges(nodes: str, edges: str, output_dir: str,
of nodes involved in the edge [2]
:param check_disconnected_nodes: should we check for disconnected nodes (i.e.
nodes with degree of 0) in input graph? [False]
:param remove_extra_cols throw out columns other than ['subject', 'object',
'relation', 'edge_label'][false]
Returns:
None.
"""
logging.info("Loading edge file %s" % edges)
edges_df: pd.DataFrame = tsv_to_df(edges, usecols=['subject', 'object', 'relation',
if remove_extra_cols:
edges_df: pd.DataFrame = tsv_to_df(edges, usecols=['subject', 'object', 'relation',
'edge_label'])
else:
edges_df: pd.DataFrame = tsv_to_df(edges)
logging.info("Loading node file %s" % nodes)
nodes_df: pd.DataFrame = tsv_to_df(nodes)

Expand Down

0 comments on commit f40561f

Please sign in to comment.