flake fixes

AstraZeneca · Jun 7, 2022 · f4c332d · f4c332d
1 parent 436de16
commit f4c332d
Show file tree

Hide file tree

Showing 10 changed files with 33 additions and 26 deletions.
diff --git a/README.md b/README.md
@@ -48,7 +48,7 @@ If you find *OntoMerger* useful in your work or research, please consider adding
    title = {{OntoMerger: An Ontology Alignment Library for
              Creating Minimal and Connected Domain Knowledge
              Sub-graphs.}},
-   url = {http://arxiv.org/abs/???},
+   url = {https://arxiv.org/abs/2206.02238},
    year = {2022}
 }
 ```

diff --git a/onto_merger/alignment/merge_utils.py b/onto_merger/alignment/merge_utils.py
@@ -51,7 +51,6 @@ def post_process_alignment_results(data_repo: DataRepository,
     :param alignment_priority_order: The alignment priority order.
     :return: The produced named tables.
     """
-
     # aggregate merges
     table_aggregated_merges = _produce_named_table_aggregated_merges(
         merges=data_repo.get(TABLE_MERGES_WITH_META_DATA).dataframe,

diff --git a/onto_merger/analyser/analysis_utils.py b/onto_merger/analyser/analysis_utils.py
@@ -155,14 +155,13 @@ def produce_table_node_namespace_distribution(
         node_table = produce_table_with_namespace_column_for_node_ids(node_table)
 
     # count per NS, descending, with ratio of total
-    namespace_distribution_table = (
-        node_table
-            .groupby([ns_column])
-            .count()
-            .reset_index()
-            .sort_values(COLUMN_DEFAULT_ID, ascending=False)
-            .rename(columns={COLUMN_DEFAULT_ID: COLUMN_COUNT, ns_column: COLUMN_NAMESPACE})
-    )
+    namespace_distribution_table = node_table\
+        .groupby([ns_column])\
+        .count()\
+        .reset_index()\
+        .sort_values(COLUMN_DEFAULT_ID, ascending=False)\
+        .rename(columns={COLUMN_DEFAULT_ID: COLUMN_COUNT, ns_column: COLUMN_NAMESPACE})
+
     namespace_distribution_table[COLUMN_FREQUENCY] = namespace_distribution_table.apply(
         lambda x: f"{((x[COLUMN_COUNT] / node_table_count) * 100):.2f}%", axis=1
     )

diff --git a/onto_merger/analyser/report_analyser.py b/onto_merger/analyser/report_analyser.py
@@ -314,7 +314,7 @@ def _produce_overview_analysis(self,
         section_dataset_name = SECTION_OVERVIEW
         logger.info(f"Producing report section '{section_dataset_name}' analysis...")
 
-        node_status_df = report_analyser_utils.produce_node_status_analyses(
+        report_analyser_utils.produce_node_status_analyses(
             data_manager=self._data_manager,
             data_repo=self._data_repo
         )

diff --git a/onto_merger/analyser/report_analyser_utils.py b/onto_merger/analyser/report_analyser_utils.py
@@ -319,6 +319,18 @@ def produce_step_node_analysis_plot(
         col_b: str,
         b_start_value: int,
 ) -> None:
+    """Produce alignment or connectivity step analysis plot.
+
+    :param step_report: The report table to be analysed.
+    :param section_dataset_name: The section where the plot will be displayed.
+    :param data_manager: The data manager instance.
+    :param col_count_a:
+    :param col_a:
+    :param col_count_b:
+    :param col_b:
+    :param b_start_value:
+    :return:
+    """
     col_step_counter = "step_counter"
     col_source = "step_counter"
     total_count = 0
@@ -419,7 +431,7 @@ def _produce_ge_validation_analysis(data_manager: DataManager, ) -> dict:
 
 
 def produce_ge_validation_analysis_as_table(data_manager: DataManager, ) -> DataFrame:
-    """Produce the data test result aggregation table
+    """Produce the data test result aggregation table.
 
     :param data_manager: The data manager instance.
     :return: The aggregated result table.
@@ -631,7 +643,6 @@ def produce_node_status_analyses(
     :param data_repo: The data repository containing the produced tables.
     :return: The analysis result table.
     """
-
     # INPUT
     nodes_input = len(data_repo.get(table_name=TABLE_NODES).dataframe)
     nodes_seed = len(data_repo.get(table_name=TABLE_NODES_SEED).dataframe)
@@ -825,7 +836,6 @@ def produce_mapping_analysis_for_mapped_nss(mappings: DataFrame) -> DataFrame:
     :param mappings: The mappings to be analysed.
     :return: The analysis result table.
     """
-
     col_nss_set = 'nss_set'
     df = analysis_utils.produce_table_with_namespace_column_for_node_ids(table=mappings)
     df[col_nss_set] = df.apply(
@@ -855,7 +865,6 @@ def produce_edges_analysis_for_mapped_or_connected_nss_heatmap(edges: DataFrame,
     :param edges: The edges to be analysed.
     :return: The analysis result table.
     """
-
     cols = [analysis_utils.get_namespace_column_name_for_column(COLUMN_SOURCE_ID),
             analysis_utils.get_namespace_column_name_for_column(COLUMN_TARGET_ID)]
     df = analysis_utils.produce_table_with_namespace_column_for_node_ids(table=edges)
@@ -1043,7 +1052,6 @@ def produce_overview_hierarchy_edge_comparison(
     :param data_repo: The data repository containing the produced tables.
     :return: The analysis result tables.
     """
-
     # input
     input_edges = analysis_utils.produce_table_with_namespace_column_for_node_ids(
         table=data_repo.get(TABLE_EDGES_HIERARCHY).dataframe)
@@ -1316,7 +1324,6 @@ def produce_runtime_tables(
     :param data_repo: The data repository containing the produced tables.
     :return: The analysis result tables.
     """
-
     # table
     runtime_table = _add_elapsed_seconds_column_to_runtime(
         runtime=data_repo.get(table_name=table_name).dataframe

diff --git a/onto_merger/data/data_manager.py b/onto_merger/data/data_manager.py
@@ -493,6 +493,7 @@ def get_data_tests_path(self) -> str:
         return os.path.join(self._project_folder_path, DIRECTORY_OUTPUT, DIRECTORY_INTERMEDIATE, DIRECTORY_DATA_TESTS)
 
     def get_domain_ontology_path(self) -> str:
+        """Produce the path for the domain ontology folder."""
         return os.path.join(self._project_folder_path, DIRECTORY_OUTPUT, DIRECTORY_DOMAIN_ONTOLOGY)
 
     def get_dropped_mappings_path(self) -> str:

diff --git a/onto_merger/data/dataclasses.py b/onto_merger/data/dataclasses.py
@@ -229,7 +229,7 @@ def __init__(
         self.elapsed = 0
 
     def task_finished(self) -> None:
-        """Stops the task runtime counter.
+        """Stop the task runtime counter.
 
         :return:
         """
@@ -275,7 +275,7 @@ def __init__(self, source_id: str, count_unmapped_node_ids: int):
         self.elapsed = 0
 
     def task_finished(self) -> None:
-        """Stops the task runtime counter.
+        """Stop the task runtime counter.
 
         :return:
         """
@@ -287,7 +287,7 @@ def task_finished(self) -> None:
 def convert_runtime_steps_to_named_table(
         steps: List[RuntimeData],
 ) -> NamedTable:
-    """Converts the runtime data to a named table.
+    """Convert the runtime data to a named table.
 
     :param steps: The list of runtime step objects.
     :return: The runtime data named table.
@@ -336,7 +336,7 @@ def convert_connectivity_steps_to_named_table(
 
 
 def format_datetime(date_time: datetime) -> str:
-    """Formats a date time to string.
+    """Format a date time to string.
 
     :param date_time: The date time.
     :return: The formatted date time as a string.

diff --git a/onto_merger/report/__init__.py b/onto_merger/report/__init__.py
@@ -0,0 +1 @@
+"""HTML report to display analysis and background info about OntoMerger."""
diff --git a/onto_merger/report/section_data_loader.py b/onto_merger/report/section_data_loader.py
@@ -71,9 +71,8 @@ def _produce_section(title: str, section_name: str, subsections: List[dict], dat
         TITLE: title,
         LINK_TITLE: section_name,
         LOGO: _get_section_icon_file_name(section_name=section_name),
-        SUBSECTIONS: [
-                         _produce_section_summary_subsection(section_name=section_name, data_manager=data_manager),
-                     ] + subsections
+        SUBSECTIONS:
+            [_produce_section_summary_subsection(section_name=section_name, data_manager=data_manager)] + subsections
     }
 
 
@@ -642,5 +641,5 @@ def _get_figure_path(section_name: str, table_name: str) -> str:
 
 
 def _get_unique_id_for_description_table(section_name: str, table_name: str) -> str:
-    """Unique IDs are used in the toggle javascript."""
+    """Produce unique IDs that are used in the toggle javascript."""
     return f"{section_name}_{table_name}_description"
diff --git a/setup.cfg b/setup.cfg
@@ -11,7 +11,8 @@ long_description_content_type = text/markdown
 [flake8]
 ignore =
     E203,
-    W503
+    W503,
+    BLK100
 exclude =
     .tox,
     .git,