Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions spras/analysis/summary.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
from pathlib import Path
from statistics import median
from typing import Iterable
Expand Down Expand Up @@ -99,8 +100,9 @@ def summarize_networks(file_paths: Iterable[Path], node_table: pd.DataFrame, alg
# Algorithm parameters have format { algo : { hashcode : { parameter combos } } }
param_combo = algo_params[algo][hashcode]
del param_combo['_spras_run_name']
# TODO: sort parameters to provide stable summary table output
cur_nw_info.append(param_combo)
# We use json.dumps to properly serialize enums as strings,
# and sort parameters to provide stable summary table output.
cur_nw_info.append(json.dumps(param_combo, sort_keys=True))

# Save the current network information to the network summary list
nw_info.append(cur_nw_info)
Expand Down
5 changes: 0 additions & 5 deletions spras/omicsintegrator1.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,6 @@ class DummyMode(CaseInsensitiveEnum):
file = 'file'
"connect the dummy node to a specific list of nodes provided in a file"

# To make sure that DummyMode prints as `terminals`, etc.. in JSON dictionaries
# (since they use object representation internally.)
def __repr__(self) -> str:
return f"'{self.name}'"

class OmicsIntegrator1Params(BaseModel):
dummy_mode: Optional[DummyMode] = None
mu_squared: bool = False
Expand Down
25 changes: 13 additions & 12 deletions spras/omicsintegrator2.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,11 @@

class DummyMode(CaseInsensitiveEnum):
terminals = 'terminals'
"Connect to all terminals."
others = 'others'
"Connect to all nodes except for terminals."
all = 'all'
"Connect to all nodes in the interactome."

class OmicsIntegrator2Params(BaseModel):
w: float = 5
Expand All @@ -29,22 +32,17 @@ class OmicsIntegrator2Params(BaseModel):
g: float = 3
"Gamma: multiplicative edge penalty from degree of endpoints"

noise: Optional[float] = None
noise: float = 0.1
"Standard Deviation of the gaussian noise added to edges in Noisy Edges Randomizations."

noisy_edges: Optional[int] = None
noisy_edges: int = 0
"An integer specifying how many times to add noise to the given edge values and re-run."

random_terminals: Optional[int] = None
random_terminals: int = 0
"An integer specifying how many times to apply your given prizes to random nodes in the interactome and re-run"

dummy_mode: Optional[DummyMode] = None
"""
Tells the program which nodes in the interactome to connect the dummy node to. (default: terminals)
"terminals" = connect to all terminals
"others" = connect to all nodes except for terminals
"all" = connect to all nodes in the interactome.
"""
dummy_mode: DummyMode = DummyMode.terminals
"Tells the program which nodes in the interactome to connect the dummy node to."

seed: Optional[int] = None
"The random seed to use for this run."
Expand Down Expand Up @@ -106,7 +104,6 @@ def generate_inputs(data: Dataset, filename_map):
edges_df.to_csv(filename_map['edges'], sep='\t', index=False, columns=['Interactor1', 'Interactor2', 'cost'],
header=['protein1', 'protein2', 'cost'])

# TODO add reasonable default values
@staticmethod
def run(inputs, output_file, args=None, container_settings=None):
if not container_settings: container_settings = ProcessedContainerSettings()
Expand Down Expand Up @@ -195,7 +192,11 @@ def parse_output(raw_pathway_file, standardized_pathway_file, params):
df = add_rank_column(df)
df = reinsert_direction_col_undirected(df)
df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
else: # corrupted data
else:
# We get protein1, protein2, and cost if no edges were inside the solution (as networkx
# does not have any edges in the solution to loop over, and therefore never makes the column)
# and we get protein1, protein2 if no edges were present in the augmented forest at all:
# both of these outcomes should be treated as an empty network.
df = pd.DataFrame(columns=['Node1', 'Node2', 'Rank', 'Direction'])

df, has_duplicates = duplicate_edges(df)
Expand Down
18 changes: 9 additions & 9 deletions test/analysis/expected_output/expected_egfr_summary.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
Name Number of nodes Number of edges Number of connected components Density Max degree Median degree Max diameter Average path length Nodes in prize Nodes in sources Nodes in targets Nodes in active Nodes in dummy Parameter combination
test/analysis/input/egfr/tps-egfr-domino-params-V3X4RW7_pathway.txt 48 45 3 0.0398936170212766 5 2.0 16 3.882808476926124 27 0 27 27 0 {'module_threshold': 0.05, 'slice_threshold': 0.3}
test/analysis/input/egfr/tps-egfr-meo-params-GKEDDFZ_pathway.txt 1877 12845 1 0.007295700506524384 469 6.0 6 2.7973618474338107 621 1 620 621 1 {'max_path_length': 3, 'local_search': True, 'rand_restarts': 10}
test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-3THRXWW_pathway.txt 28 20 8 0.05291005291005291 4 1.0 5 1.306439393939394 28 1 27 28 1 {'dummy_mode': 'file', 'mu_squared': False, 'exclude_terms': False, 'noisy_edges': 0, 'shuffled_prizes': 0, 'random_terminals': 0, 'seed': None, 'w': 0.1, 'b': 10.0, 'd': 10, 'mu': 0.008, 'noise': None, 'g': 0.001, 'r': 0.01}
test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-5QH767V_pathway.txt 39 31 8 0.04183535762483131 6 1.0 5 1.5084498834498834 39 1 38 39 1 {'dummy_mode': 'file', 'mu_squared': False, 'exclude_terms': False, 'noisy_edges': 0, 'shuffled_prizes': 0, 'random_terminals': 0, 'seed': None, 'w': 0.1, 'b': 2.0, 'd': 10, 'mu': 0.008, 'noise': None, 'g': 0.001, 'r': 0.01}
test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-ITO5EQS_pathway.txt 14 9 5 0.0989010989010989 4 1.0 2 1.1866666666666668 14 0 14 14 0 {'dummy_mode': 'file', 'mu_squared': False, 'exclude_terms': False, 'noisy_edges': 0, 'shuffled_prizes': 0, 'random_terminals': 0, 'seed': None, 'w': 0.1, 'b': 0.55, 'd': 10, 'mu': 0.008, 'noise': None, 'g': 0.001, 'r': 0.01}
test/analysis/input/egfr/tps-egfr-omicsintegrator2-params-EHHWPMD_pathway.txt 593 591 2 0.0033669841848593955 32 1.0 30 6.72248989073389 531 1 530 531 1 {'w': 5.0, 'b': 4.0, 'g': 0.0, 'noise': None, 'noisy_edges': None, 'random_terminals': None, 'dummy_mode': None, 'seed': None}
test/analysis/input/egfr/tps-egfr-omicsintegrator2-params-IV3IPCJ_pathway.txt 704 702 2 0.002836867968446916 35 1.0 24 6.038766691954387 616 1 615 616 1 {'w': 5.0, 'b': 2.0, 'g': 3.0, 'noise': None, 'noisy_edges': None, 'random_terminals': None, 'dummy_mode': None, 'seed': None}
test/analysis/input/egfr/tps-egfr-pathlinker-params-7S4SLU6_pathway.txt 14 17 1 0.18681318681318682 6 2.0 7 2.857142857142857 6 1 5 6 1 {'k': 10}
test/analysis/input/egfr/tps-egfr-pathlinker-params-TCEMRS7_pathway.txt 25 32 1 0.10666666666666667 8 2.0 7 3.486666666666667 11 1 10 11 1 {'k': 20}
test/analysis/input/egfr/tps-egfr-domino-params-V3X4RW7_pathway.txt 48 45 3 0.0398936170212766 5 2.0 16 3.882808476926124 27 0 27 27 0 "{""module_threshold"": 0.05, ""slice_threshold"": 0.3}"
test/analysis/input/egfr/tps-egfr-meo-params-GKEDDFZ_pathway.txt 1877 12845 1 0.007295700506524384 469 6.0 6 2.7973618474338107 621 1 620 621 1 "{""local_search"": true, ""max_path_length"": 3, ""rand_restarts"": 10}"
test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-3THRXWW_pathway.txt 28 20 8 0.05291005291005291 4 1.0 5 1.306439393939394 28 1 27 28 1 "{""b"": 10.0, ""d"": 10, ""dummy_mode"": ""file"", ""exclude_terms"": false, ""g"": 0.001, ""mu"": 0.008, ""mu_squared"": false, ""noise"": null, ""noisy_edges"": 0, ""r"": 0.01, ""random_terminals"": 0, ""seed"": null, ""shuffled_prizes"": 0, ""w"": 0.1}"
test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-5QH767V_pathway.txt 39 31 8 0.04183535762483131 6 1.0 5 1.5084498834498834 39 1 38 39 1 "{""b"": 2.0, ""d"": 10, ""dummy_mode"": ""file"", ""exclude_terms"": false, ""g"": 0.001, ""mu"": 0.008, ""mu_squared"": false, ""noise"": null, ""noisy_edges"": 0, ""r"": 0.01, ""random_terminals"": 0, ""seed"": null, ""shuffled_prizes"": 0, ""w"": 0.1}"
test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-ITO5EQS_pathway.txt 14 9 5 0.0989010989010989 4 1.0 2 1.1866666666666668 14 0 14 14 0 "{""b"": 0.55, ""d"": 10, ""dummy_mode"": ""file"", ""exclude_terms"": false, ""g"": 0.001, ""mu"": 0.008, ""mu_squared"": false, ""noise"": null, ""noisy_edges"": 0, ""r"": 0.01, ""random_terminals"": 0, ""seed"": null, ""shuffled_prizes"": 0, ""w"": 0.1}"
test/analysis/input/egfr/tps-egfr-omicsintegrator2-params-EHHWPMD_pathway.txt 593 591 2 0.0033669841848593955 32 1.0 30 6.72248989073389 531 1 530 531 1 "{""b"": 4.0, ""dummy_mode"": ""terminals"", ""g"": 0.0, ""noise"": 0.1, ""noisy_edges"": 0, ""random_terminals"": 0, ""seed"": null, ""w"": 5.0}"
test/analysis/input/egfr/tps-egfr-omicsintegrator2-params-IV3IPCJ_pathway.txt 704 702 2 0.002836867968446916 35 1.0 24 6.038766691954387 616 1 615 616 1 "{""b"": 2.0, ""dummy_mode"": ""terminals"", ""g"": 3.0, ""noise"": 0.1, ""noisy_edges"": 0, ""random_terminals"": 0, ""seed"": null, ""w"": 5.0}"
test/analysis/input/egfr/tps-egfr-pathlinker-params-7S4SLU6_pathway.txt 14 17 1 0.18681318681318682 6 2.0 7 2.857142857142857 6 1 5 6 1 "{""k"": 10}"
test/analysis/input/egfr/tps-egfr-pathlinker-params-TCEMRS7_pathway.txt 25 32 1 0.10666666666666667 8 2.0 7 3.486666666666667 11 1 10 11 1 "{""k"": 20}"
22 changes: 11 additions & 11 deletions test/analysis/expected_output/expected_example_summary.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
Name Number of nodes Number of edges Number of connected components Density Max degree Median degree Max diameter Average path length Nodes in prize Nodes in active Nodes in dummy Nodes in sources Nodes in targets Parameter combination
test/analysis/input/example/data0-allpairs-params-BEH6YB2_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {}
test/analysis/input/example/data0-domino-params-V3X4RW7_pathway.txt 0 0 0 0.0 0 0.0 0 0.0 0 0 0 0 0 {'module_threshold': 0.05, 'slice_threshold': 0.3}
test/analysis/input/example/data0-meo-params-GKEDDFZ_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {'max_path_length': 3, 'local_search': True, 'rand_restarts': 10}
test/analysis/input/example/data0-mincostflow-params-SZPZVU6_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {'flow': 1, 'capacity': 1}
test/analysis/input/example/data0-omicsintegrator1-params-E3LSEZQ_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {'dummy_mode': 'file', 'mu_squared': False, 'exclude_terms': False, 'noisy_edges': 0, 'shuffled_prizes': 0, 'random_terminals': 0, 'seed': None, 'w': 0.0, 'b': 6.0, 'd': 10, 'mu': 0.0, 'noise': None, 'g': 0.001, 'r': 0.0}
test/analysis/input/example/data0-omicsintegrator1-params-NFIPHUX_pathway.txt 0 0 0 0.0 0 0.0 0 0.0 0 0 0 0 0 {'dummy_mode': 'file', 'mu_squared': False, 'exclude_terms': False, 'noisy_edges': 0, 'shuffled_prizes': 0, 'random_terminals': 0, 'seed': None, 'w': 0.0, 'b': 5.0, 'd': 10, 'mu': 0.0, 'noise': None, 'g': 0.001, 'r': 0.0}
test/analysis/input/example/data0-omicsintegrator1-params-SU2S63Y_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {'dummy_mode': 'file', 'mu_squared': False, 'exclude_terms': False, 'noisy_edges': 0, 'shuffled_prizes': 0, 'random_terminals': 0, 'seed': None, 'w': 5.0, 'b': 5.0, 'd': 10, 'mu': 0.0, 'noise': None, 'g': 0.001, 'r': 0.0}
test/analysis/input/example/data0-omicsintegrator1-params-V26JBGX_pathway.txt 0 0 0 0.0 0 0.0 0 0.0 0 0 0 0 0 {'dummy_mode': 'file', 'mu_squared': False, 'exclude_terms': False, 'noisy_edges': 0, 'shuffled_prizes': 0, 'random_terminals': 0, 'seed': None, 'w': 5.0, 'b': 6.0, 'd': 10, 'mu': 0.0, 'noise': None, 'g': 0.001, 'r': 0.0}
test/analysis/input/example/data0-omicsintegrator2-params-EHHWPMD_pathway.txt 0 0 0 0.0 0 0.0 0 0.0 0 0 0 0 0 {'w': 5.0, 'b': 4.0, 'g': 0.0, 'noise': None, 'noisy_edges': None, 'random_terminals': None, 'dummy_mode': None, 'seed': None}
test/analysis/input/example/data0-omicsintegrator2-params-IV3IPCJ_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {'w': 5.0, 'b': 2.0, 'g': 3.0, 'noise': None, 'noisy_edges': None, 'random_terminals': None, 'dummy_mode': None, 'seed': None}
test/analysis/input/example/data0-pathlinker-params-6SWY7JS_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {'k': 200}
test/analysis/input/example/data0-pathlinker-params-VQL7BDZ_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {'k': 100}
test/analysis/input/example/data0-domino-params-V3X4RW7_pathway.txt 0 0 0 0.0 0 0.0 0 0.0 0 0 0 0 0 "{""module_threshold"": 0.05, ""slice_threshold"": 0.3}"
test/analysis/input/example/data0-meo-params-GKEDDFZ_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 "{""local_search"": true, ""max_path_length"": 3, ""rand_restarts"": 10}"
test/analysis/input/example/data0-mincostflow-params-SZPZVU6_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 "{""capacity"": 1, ""flow"": 1}"
test/analysis/input/example/data0-omicsintegrator1-params-E3LSEZQ_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 "{""b"": 6.0, ""d"": 10, ""dummy_mode"": ""file"", ""exclude_terms"": false, ""g"": 0.001, ""mu"": 0.0, ""mu_squared"": false, ""noise"": null, ""noisy_edges"": 0, ""r"": 0.0, ""random_terminals"": 0, ""seed"": null, ""shuffled_prizes"": 0, ""w"": 0.0}"
test/analysis/input/example/data0-omicsintegrator1-params-NFIPHUX_pathway.txt 0 0 0 0.0 0 0.0 0 0.0 0 0 0 0 0 "{""b"": 5.0, ""d"": 10, ""dummy_mode"": ""file"", ""exclude_terms"": false, ""g"": 0.001, ""mu"": 0.0, ""mu_squared"": false, ""noise"": null, ""noisy_edges"": 0, ""r"": 0.0, ""random_terminals"": 0, ""seed"": null, ""shuffled_prizes"": 0, ""w"": 0.0}"
test/analysis/input/example/data0-omicsintegrator1-params-SU2S63Y_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 "{""b"": 5.0, ""d"": 10, ""dummy_mode"": ""file"", ""exclude_terms"": false, ""g"": 0.001, ""mu"": 0.0, ""mu_squared"": false, ""noise"": null, ""noisy_edges"": 0, ""r"": 0.0, ""random_terminals"": 0, ""seed"": null, ""shuffled_prizes"": 0, ""w"": 5.0}"
test/analysis/input/example/data0-omicsintegrator1-params-V26JBGX_pathway.txt 0 0 0 0.0 0 0.0 0 0.0 0 0 0 0 0 "{""b"": 6.0, ""d"": 10, ""dummy_mode"": ""file"", ""exclude_terms"": false, ""g"": 0.001, ""mu"": 0.0, ""mu_squared"": false, ""noise"": null, ""noisy_edges"": 0, ""r"": 0.0, ""random_terminals"": 0, ""seed"": null, ""shuffled_prizes"": 0, ""w"": 5.0}"
test/analysis/input/example/data0-omicsintegrator2-params-EHHWPMD_pathway.txt 0 0 0 0.0 0 0.0 0 0.0 0 0 0 0 0 "{""b"": 4.0, ""dummy_mode"": ""terminals"", ""g"": 0.0, ""noise"": 0.1, ""noisy_edges"": 0, ""random_terminals"": 0, ""seed"": null, ""w"": 5.0}"
test/analysis/input/example/data0-omicsintegrator2-params-IV3IPCJ_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 "{""b"": 2.0, ""dummy_mode"": ""terminals"", ""g"": 3.0, ""noise"": 0.1, ""noisy_edges"": 0, ""random_terminals"": 0, ""seed"": null, ""w"": 5.0}"
test/analysis/input/example/data0-pathlinker-params-6SWY7JS_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 "{""k"": 200}"
test/analysis/input/example/data0-pathlinker-params-VQL7BDZ_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 "{""k"": 100}"
Loading