From d7f3d9d48edf4a9fa9d9b4b80779730777736d40 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 16 Jun 2020 21:38:08 -0700 Subject: [PATCH] Better tests for nodes and edges tsv files in STRING --- .../transform_utils/string_ppi/string_ppi.py | 1 - tests/test_string.py | 46 ++++++++++++++----- 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/kg_covid_19/transform_utils/string_ppi/string_ppi.py b/kg_covid_19/transform_utils/string_ppi/string_ppi.py index c199cab9..bc321003 100644 --- a/kg_covid_19/transform_utils/string_ppi/string_ppi.py +++ b/kg_covid_19/transform_utils/string_ppi/string_ppi.py @@ -245,7 +245,6 @@ def run(self, data_file: Optional[str] = None) -> None: ) - def parse_stringdb_interactions(this_line: str, header_items: List) -> Dict: """Methods processes a line of text from Drug Central. diff --git a/tests/test_string.py b/tests/test_string.py index d7ccbf5d..d271b299 100644 --- a/tests/test_string.py +++ b/tests/test_string.py @@ -1,5 +1,6 @@ import os import tempfile +import pandas as pd from unittest import TestCase, skip from parameterized import parameterized @@ -9,15 +10,11 @@ class TestString(TestCase): """Tests the string ingest""" - @classmethod - def setUpClass(cls) -> None: - cls.input_dir = "tests/resources/string/" - cls.output_dir = tempfile.gettempdir() - cls.string_output_dir = os.path.join(cls.output_dir, "STRING") - cls.string = StringTransform(cls.input_dir, cls.output_dir) - def setUp(self) -> None: - pass + self.input_dir = "tests/resources/string/" + self.output_dir = tempfile.gettempdir() + self.string_output_dir = os.path.join(self.output_dir, "STRING") + self.string = StringTransform(self.input_dir, self.output_dir) @parameterized.expand([ ['ensembl2ncbi_map', dict, 'ENSG00000121410', 1], @@ -53,7 +50,32 @@ def test_run(self): self.assertTrue(isinstance(self.string.run, object)) self.string.run() self.assertTrue(os.path.isdir(self.string_output_dir)) - self.assertTrue( - os.path.isfile(os.path.join(self.string_output_dir, "nodes.tsv"))) - self.assertTrue( - os.path.isfile(os.path.join(self.string_output_dir, "edges.tsv"))) + + def test_nodes_file(self): + self.string.run() + node_file = os.path.join(self.string_output_dir, "nodes.tsv") + self.assertTrue(os.path.isfile(node_file)) + node_df = pd.read_csv(node_file, sep="\t", header=0) + self.assertEqual((10, 6), node_df.shape) + self.assertEqual(['id', 'name', 'category', 'description', 'alias', + 'provided_by'], list(node_df.columns)) + self.assertListEqual(['ENSEMBL:ENSP00000000233', 'ENSEMBL:ENSP00000272298', + 'ENSEMBL:ENSP00000253401', 'ENSEMBL:ENSP00000401445', + 'ENSEMBL:ENSP00000418915', 'ENSEMBL:ENSP00000327801', + 'ENSEMBL:ENSP00000466298', 'ENSEMBL:ENSP00000232564', + 'ENSEMBL:ENSP00000393379', 'ENSEMBL:ENSP00000371253'], + list(node_df.id.unique())) + + def test_edges_file(self): + self.string.run() + edge_file = os.path.join(self.string_output_dir, "edges.tsv") + self.assertTrue(os.path.isfile(edge_file)) + edge_df = pd.read_csv(edge_file, sep="\t", header=0) + self.assertEqual((9, 19), edge_df.shape) + self.assertEqual(['subject', 'edge_label', 'object', 'relation', 'provided_by', + 'combined_score', 'neighborhood', 'neighborhood_transferred', + 'fusion', 'cooccurence', 'homology', 'coexpression', + 'coexpression_transferred', 'experiments', + 'experiments_transferred', 'database', 'database_transferred', + 'textmining', 'textmining_transferred', ], + list(edge_df.columns))