Fix logging spillover (#2174)

* Update pre-commit hooks versions * Fix * Ruff * Update hooks and docs * ruff --fix * Use and apply Ruff formatter * Use and apply Ruff formatter * Fix further formatting * Fix further formatting - 2 * Format linting, remove OrderedDict * Fix linting * Fix * Fix * Clean up * Fix label on Per-Sequence GC Content * Fix linting * Linting * Remove docs/modules/sentieon.md * Fix * Remove ordered dict * Fix bug in custom content * Add r specifiers to regex strings to avoid warnings. Use logger vs logging * More regex escaping * [automated] Update CHANGELOG.md --------- Co-authored-by: MultiQC Bot <multiqc-bot@seqera.io>
MultiQC · Nov 14, 2023 · 398cf1a · 398cf1a
1 parent 48af59b
commit 398cf1a
Show file tree

Hide file tree

Showing 14 changed files with 54 additions and 56 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -18,6 +18,7 @@ Highlights:
 - Software versions: allow any string as a version tag ([#2166](https://github.com/ewels/MultiQC/pull/2166))
 - Remove position:absolute from table values ([#2169](https://github.com/ewels/MultiQC/pull/2169))
 - Fix custom anchors for kraken ([#2170](https://github.com/ewels/MultiQC/pull/2170))
+- Fix logging spillover ([#2174](https://github.com/ewels/MultiQC/pull/2174))
 
 ### New Modules
 

diff --git a/multiqc/modules/bbduk/bbduk.py b/multiqc/modules/bbduk/bbduk.py
@@ -82,7 +82,7 @@ def parse_logs(self, f):
                     "Result",
                 ]
                 for cat in cats:
-                    matches = re.search(f"{cat}:\s+(\d+) reads \(([\d\.]+)%\)\s+(\d+) bases \(([\d\.]+)%\)", line)
+                    matches = re.search(rf"{cat}:\s+(\d+) reads \(([\d\.]+)%\)\s+(\d+) bases \(([\d\.]+)%\)", line)
                     if matches:
                         self.bbduk_data[s_name][cat + " reads"] = int(matches.group(1))
                         self.bbduk_data[s_name][cat + " reads percent"] = float(matches.group(2))

diff --git a/multiqc/modules/biscuit/biscuit.py b/multiqc/modules/biscuit/biscuit.py
@@ -599,7 +599,7 @@ def parse_logs_qc_cv(f, fn):
             "q40_cpg_topgc",
         ]
         for t in targets:
-            m = re.search("{}\t([\d\.]+)\t([\d\.]+)\t([\d\.]+)".format(t), f, re.MULTILINE)
+            m = re.search(rf"{t}\t([\d\.]+)\t([\d\.]+)\t([\d\.]+)", f, re.MULTILINE)
             if m is not None:
                 data[t] = {"mu": float(m.group(1)), "sigma": float(m.group(2)), "cv": float(m.group(3))}
             else:

diff --git a/multiqc/modules/busco/busco.py b/multiqc/modules/busco/busco.py
@@ -57,7 +57,7 @@ def __init__(self):
         for lin in lineages:
             self.add_section(
                 name="Lineage Assessment" if lin is None else "Lineage: {}".format(lin),
-                anchor="busco-lineage-{}".format(re.sub("\W+", "_", str(lin))),
+                anchor="busco-lineage-{}".format(re.sub(r"\W+", "_", str(lin))),
                 plot=self.busco_plot(lin),
             )
 
@@ -97,7 +97,7 @@ def busco_plot(self, lin):
 
         # Config for the plot
         config = {
-            "id": "busco_plot_{}".format(re.sub("\W+", "_", str(lin))),
+            "id": "busco_plot_{}".format(re.sub(r"\W+", "_", str(lin))),
             "title": "BUSCO: Assessment Results" if lin is None else "BUSCO Assessment Results: {}".format(lin),
             "ylab": "# BUSCOs",
             "cpswitch_counts_label": "Number of BUSCOs",

diff --git a/multiqc/modules/cutadapt/cutadapt.py b/multiqc/modules/cutadapt/cutadapt.py
@@ -70,31 +70,31 @@ def parse_cutadapt_logs(self, f):
         fh = f["f"]
         regexes = {
             "1.7": {
-                "bp_processed": "Total basepairs processed:\s*([\d,]+) bp",
-                "bp_written": "Total written \(filtered\):\s*([\d,]+) bp",
-                "quality_trimmed": "Quality-trimmed:\s*([\d,]+) bp",
-                "r_processed": "Total reads processed:\s*([\d,]+)",
-                "pairs_processed": "Total read pairs processed:\s*([\d,]+)",
-                "r_with_adapters": "Reads with adapters:\s*([\d,]+)",
-                "r1_with_adapters": "Read 1 with adapter:\s*([\d,]+)",
-                "r2_with_adapters": "Read 2 with adapter:\s*([\d,]+)",
-                "r_too_short": "Reads that were too short:\s*([\d,]+)",
-                "pairs_too_short": "Pairs that were too short:\s*([\d,]+)",
-                "r_too_long": "Reads that were too long:\s*([\d,]+)",
-                "pairs_too_long": "Pairs that were too long:\s*([\d,]+)",
-                "r_too_many_N": "Reads with too many N:\s*([\d,]+)",
-                "pairs_too_many_N": "Pairs with too many N:\s*([\d,]+)",
-                "r_written": "Reads written \(passing filters\):\s*([\d,]+)",
-                "pairs_written": "Pairs written \(passing filters\):\s*([\d,]+)",
+                "bp_processed": r"Total basepairs processed:\s*([\d,]+) bp",
+                "bp_written": r"Total written \(filtered\):\s*([\d,]+) bp",
+                "quality_trimmed": r"Quality-trimmed:\s*([\d,]+) bp",
+                "r_processed": r"Total reads processed:\s*([\d,]+)",
+                "pairs_processed": r"Total read pairs processed:\s*([\d,]+)",
+                "r_with_adapters": r"Reads with adapters:\s*([\d,]+)",
+                "r1_with_adapters": r"Read 1 with adapter:\s*([\d,]+)",
+                "r2_with_adapters": r"Read 2 with adapter:\s*([\d,]+)",
+                "r_too_short": r"Reads that were too short:\s*([\d,]+)",
+                "pairs_too_short": r"Pairs that were too short:\s*([\d,]+)",
+                "r_too_long": r"Reads that were too long:\s*([\d,]+)",
+                "pairs_too_long": r"Pairs that were too long:\s*([\d,]+)",
+                "r_too_many_N": r"Reads with too many N:\s*([\d,]+)",
+                "pairs_too_many_N": r"Pairs with too many N:\s*([\d,]+)",
+                "r_written": r"Reads written \(passing filters\):\s*([\d,]+)",
+                "pairs_written": r"Pairs written \(passing filters\):\s*([\d,]+)",
             },
             "1.6": {
-                "r_processed": "Processed reads:\s*([\d,]+)",
-                "bp_processed": "Processed bases:\s*([\d,]+) bp",
-                "r_trimmed": "Trimmed reads:\s*([\d,]+)",
-                "quality_trimmed": "Quality-trimmed:\s*([\d,]+) bp",
-                "bp_trimmed": "Trimmed bases:\s*([\d,]+) bp",
-                "too_short": "Too short reads:\s*([\d,]+)",
-                "too_long": "Too long reads:\s*([\d,]+)",
+                "r_processed": r"Processed reads:\s*([\d,]+)",
+                "bp_processed": r"Processed bases:\s*([\d,]+) bp",
+                "r_trimmed": r"Trimmed reads:\s*([\d,]+)",
+                "quality_trimmed": r"Quality-trimmed:\s*([\d,]+) bp",
+                "bp_trimmed": r"Trimmed bases:\s*([\d,]+) bp",
+                "too_short": r"Too short reads:\s*([\d,]+)",
+                "too_long": r"Too long reads:\s*([\d,]+)",
             },
         }
         s_name = None
@@ -156,13 +156,13 @@ def parse_cutadapt_logs(self, f):
                     log_section = line.strip().strip("=").strip()
 
                 # Detect whether 3' or 5'
-                end_regex = re.search("Type: regular (\d)'", line)
+                end_regex = re.search(r"Type: regular (\d)'", line)
                 if end_regex:
                     end = end_regex.group(1)
 
                 if "Overview of removed sequences" in line:
                     if "' end" in line:
-                        res = re.search("(\d)' end", line)
+                        res = re.search(r"(\d)' end", line)
                         end = res.group(1)
 
                     # Initilise dictionaries for length data if not already done
@@ -181,8 +181,8 @@ def parse_cutadapt_logs(self, f):
                     self.cutadapt_length_obsexp[end][plot_sname] = dict()
 
                     # Nested loop to read this section while the regex matches
-                    for line in fh:
-                        r_seqs = re.search("^(\d+)\s+(\d+)\s+([\d\.]+)", line)
+                    for line2 in fh:
+                        r_seqs = re.search(r"^(\d+)\s+(\d+)\s+([\d\.]+)", line2)
                         if r_seqs:
                             a_len = int(r_seqs.group(1))
                             self.cutadapt_length_counts[end][plot_sname][a_len] = int(r_seqs.group(2))

diff --git a/multiqc/modules/dragen/overall_mean_cov.py b/multiqc/modules/dragen/overall_mean_cov.py
@@ -79,15 +79,15 @@ def collect_overall_mean_cov_data(self):
 
 # Official structure of files:   _overall_mean_cov.csv
 # Accepted structure of files: .+_overall_mean_cov.*.csv
-GEN_FILE_RGX = re.compile("(.+)_overall_mean_cov(.*)\.csv$")
+GEN_FILE_RGX = re.compile(r"(.+)_overall_mean_cov(.*)\.csv$")
 
 # Special case. Coverage metrics files have the following structure:
 # <output-prefix>.<coverage-region-prefix>_overall_mean_cov<arbitrary-suffix>.csv
-COV_FILE_RGX = re.compile("(.+)\.(.+)_overall_mean_cov(.*)\.csv$")
+COV_FILE_RGX = re.compile(r"(.+)\.(.+)_overall_mean_cov(.*)\.csv$")
 
 # General structure of lines is not defined.
 # Currently only 1 metric is present in the standard. It substitutes the line's regex.
-AVG_RGX = re.compile("Average alignment coverage over ([^,]+),([^,]+)$", re.IGNORECASE)
+AVG_RGX = re.compile(r"Average alignment coverage over ([^,]+),([^,]+)$", re.IGNORECASE)
 
 
 def parse_overall_mean_cov(file_handler):
@@ -126,7 +126,7 @@ def parse_overall_mean_cov(file_handler):
 
         # Otherwise check if line is empty. If not then report it and go to the next line.
         else:
-            if not re.search("^\s*$", line):
+            if not re.search(r"^\s*$", line):
                 log_data["unknown_metrics"].append(line)
             continue
 

diff --git a/multiqc/modules/featureCounts/feature_counts.py b/multiqc/modules/featureCounts/feature_counts.py
@@ -148,7 +148,7 @@ def featureCounts_chart(self):
         headers = {}
         for h in self.featurecounts_keys:
             nice_name = h.replace("Unassigned_", "Unassigned: ").replace("_", " ")
-            nice_name = re.sub(r"([a-z])([A-Z])", "\g<1> \g<2>", nice_name)
+            nice_name = re.sub(r"([a-z])([A-Z])", r"\g<1> \g<2>", nice_name)
             headers[h] = {"name": nice_name}
 
         # Config for the plot

diff --git a/multiqc/modules/homer/tagdirectory.py b/multiqc/modules/homer/tagdirectory.py
@@ -378,7 +378,7 @@ def parse_FreqDist_interChr(self, f):
         for line in f["f"]:
             if firstline:
                 firstline = False
-                interChr = float(re.sub("\)", "", line.split(":")[1]))
+                interChr = float(re.sub(r"\)", "", line.split(":")[1]))
             else:
                 break
         parsed_data["interChr"] = interChr

diff --git a/multiqc/modules/interop/interop.py b/multiqc/modules/interop/interop.py
@@ -149,7 +149,7 @@ def parse_summary_csv(f):
                         else:
                             linedata[header[idx]] = float(data[idx])
                     except ValueError:
-                        linedata[header[idx]] = re.sub(pattern="\+/-.*", repl="", string=data[idx])
+                        linedata[header[idx]] = re.sub(pattern=r"\+/-.*", repl="", string=data[idx])
                 metrics["details"]["Lane {} - {}".format(data[0], read)] = linedata
 
         return metrics, version

diff --git a/multiqc/modules/qorts/qorts.py b/multiqc/modules/qorts/qorts.py
@@ -116,7 +116,7 @@ def qorts_alignment_barplot(self):
         cats = {}
         for k in keys:
             name = k.replace("ReadPairs_", "").replace("_", ": ")
-            name = re.sub("([a-z])([A-Z])", "\g<1> \g<2>", name)
+            name = re.sub("([a-z])([A-Z])", r"\g<1> \g<2>", name)
             cats[k] = {"name": name}
 
         # Config for the plot
@@ -168,7 +168,7 @@ def qorts_splice_loci_barplot(self):
         cats = {}
         for k in keys:
             name = k.replace("SpliceLoci_", "").replace("_", ": ")
-            name = re.sub("([a-z])([A-Z])", "\g<1> \g<2>", name)
+            name = re.sub("([a-z])([A-Z])", r"\g<1> \g<2>", name)
             cats[k] = {"name": name}
 
         # Config for the plot
@@ -224,7 +224,7 @@ def qorts_splice_events_barplot(self):
         cats = {}
         for k in keys:
             name = k.replace("SpliceEvents_", "")
-            name = re.sub("([a-z])([A-Z])", "\g<1> \g<2>", name)
+            name = re.sub("([a-z])([A-Z])", r"\g<1> \g<2>", name)
             cats[k] = {"name": name}
 
         # Config for the plot
@@ -277,7 +277,7 @@ def qorts_strandedness_plot(self):
         cats = {}
         for k in keys:
             name = k.replace("StrandTest_", "").replace("_", " ").replace("ambig", "ambig:")
-            name = re.sub("([a-z])([A-Z])", "\g<1> \g<2>", name)
+            name = re.sub("([a-z])([A-Z])", r"\g<1> \g<2>", name)
             cats[k] = {"name": name.title()}
 
         # Config for the plot

diff --git a/multiqc/modules/qualimap/QM_BamQC.py b/multiqc/modules/qualimap/QM_BamQC.py
@@ -119,7 +119,7 @@ def parse_genome_results(self, f):
             for k, r in regexes.get(section, {}).items():
                 r_search = re.search(r, line)
                 if r_search:
-                    if "\d" in r:
+                    if r"\d" in r:
                         try:
                             d[k] = float(r_search.group(1).replace(",", ""))
                         except ValueError:

diff --git a/multiqc/modules/vep/vep.py b/multiqc/modules/vep/vep.py
@@ -42,17 +42,14 @@ def __init__(self):
         # Add version information
         for sample, data in self.vep_data.items():
             if "VEP run statistics" not in data:
-                print(data.keys())
                 continue
 
-            print(data["VEP run statistics"]["VEP version (API)"])
             vep_version, api_version = data["VEP run statistics"]["VEP version (API)"].strip().split(" ")
             api_version = api_version.replace("(", "").replace(")", "")
             self.add_software_version(vep_version, sample)
             # Only add API version if it's different to VEP version
             if vep_version != api_version:
                 self.add_software_version(api_version, sample, "VEP API")
-            print()
         # Filter to strip out ignored sample names
         self.vep_data = self.ignore_samples(self.vep_data)
 
@@ -102,8 +99,8 @@ def parse_vep_html(self, f):
         # The tables with the titles given below have common format inside the javascript section
         titles = [
             "Variant classes",
-            "Consequences \(most severe\)",
-            "Consequences \(all\)",
+            r"Consequences \(most severe\)",
+            r"Consequences \(all\)",
             "Coding consequences",
             "SIFT summary",
             "PolyPhen summary",
@@ -228,7 +225,7 @@ def add_stats_table(self):
     def bar_graph_variant_classes(self):
         title = "Variant classes"
         plot_data, plot_cats, plot_config = self._prep_bar_graph(title)
-        htmlid = re.sub("\W+", "_", title).lower()
+        htmlid = re.sub(r"\W+", "_", title).lower()
         if len(plot_data) == 0:
             return
 
@@ -266,7 +263,7 @@ def bar_graph_consequences(self):
     def bar_graph_sift(self):
         title = "SIFT summary"
         plot_data, plot_cats, plot_config = self._prep_bar_graph(title)
-        htmlid = re.sub("\W+", "_", title).lower()
+        htmlid = re.sub(r"\W+", "_", title).lower()
         if len(plot_data) == 0:
             return
 
@@ -295,7 +292,7 @@ def bar_graph_sift(self):
     def bar_graph_polyphen(self):
         title = "PolyPhen summary"
         plot_data, plot_cats, plot_config = self._prep_bar_graph(title)
-        htmlid = re.sub("\W+", "_", title).lower()
+        htmlid = re.sub(r"\W+", "_", title).lower()
         if len(plot_data) == 0:
             return
 
@@ -324,7 +321,7 @@ def bar_graph_polyphen(self):
     def bar_graph_variants_by_chromosome(self):
         title = "Variants by chromosome"
         plot_data, plot_cats, plot_config = self._prep_bar_graph(title)
-        htmlid = re.sub("\W+", "_", title).lower()
+        htmlid = re.sub(r"\W+", "_", title).lower()
         if len(plot_data) == 0:
             return
 
@@ -346,7 +343,7 @@ def bar_graph_variants_by_chromosome(self):
     def bar_graph_position_in_protein(self):
         title = "Position in protein"
         plot_data, plot_cats, plot_config = self._prep_bar_graph(title)
-        htmlid = re.sub("\W+", "_", title).lower()
+        htmlid = re.sub(r"\W+", "_", title).lower()
         if len(plot_data) == 0:
             return
 
@@ -370,7 +367,7 @@ def _prep_bar_graph(self, title):
             if title in self.vep_data[s_name]:
                 plot_data[s_name] = self.vep_data[s_name][title]
         plot_cats = dict()
-        htmlid = re.sub("\W+", "_", title).lower()
+        htmlid = re.sub(r"\W+", "_", title).lower()
         plotid = "{}_plot".format(htmlid)
         plot_config = {
             "id": plotid,

diff --git a/multiqc/utils/mqc_colour.py b/multiqc/utils/mqc_colour.py
@@ -402,7 +402,7 @@ def rgb_converter(x):
 
         except Exception as e:
             # Shouldn't crash all of MultiQC just for colours
-            logging.warning(f"{self.id + ': ' if self.id else ''}Error getting colour: {e}")
+            logger.warning(f"{self.id + ': ' if self.id else ''}Error getting colour: {e}")
             return ""
 
     def get_colours(self, name="GnBu"):

diff --git a/multiqc/utils/report.py b/multiqc/utils/report.py
@@ -26,7 +26,7 @@
 
 logger = config.logger
 
-# Treat defaultdict as normal dict for YAML output
+# Treat defaultdict and OrderedDict as normal dicts for YAML output
 yaml.add_representer(defaultdict, Representer.represent_dict)
 yaml.add_representer(OrderedDict, Representer.represent_dict)