Skip to content

Commit

Permalink
Fix logging spillover (#2174)
Browse files Browse the repository at this point in the history
* Update pre-commit hooks versions

* Fix

* Ruff

* Update hooks and docs

* ruff --fix

* Use and apply Ruff formatter

* Use and apply Ruff formatter

* Fix further formatting

* Fix further formatting - 2

* Format linting, remove OrderedDict

* Fix linting

* Fix

* Fix

* Clean up

* Fix label on Per-Sequence GC Content

* Fix linting

* Linting

* Remove docs/modules/sentieon.md

* Fix

* Remove ordered dict

* Fix bug in custom content

* Add r specifiers to regex strings to avoid warnings. Use logger vs logging

* More regex escaping

* [automated] Update CHANGELOG.md

---------

Co-authored-by: MultiQC Bot <multiqc-bot@seqera.io>
  • Loading branch information
vladsavelyev and multiqc-bot committed Nov 14, 2023
1 parent 48af59b commit 398cf1a
Show file tree
Hide file tree
Showing 14 changed files with 54 additions and 56 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Highlights:
- Software versions: allow any string as a version tag ([#2166](https://github.com/ewels/MultiQC/pull/2166))
- Remove position:absolute from table values ([#2169](https://github.com/ewels/MultiQC/pull/2169))
- Fix custom anchors for kraken ([#2170](https://github.com/ewels/MultiQC/pull/2170))
- Fix logging spillover ([#2174](https://github.com/ewels/MultiQC/pull/2174))

### New Modules

Expand Down
2 changes: 1 addition & 1 deletion multiqc/modules/bbduk/bbduk.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def parse_logs(self, f):
"Result",
]
for cat in cats:
matches = re.search(f"{cat}:\s+(\d+) reads \(([\d\.]+)%\)\s+(\d+) bases \(([\d\.]+)%\)", line)
matches = re.search(rf"{cat}:\s+(\d+) reads \(([\d\.]+)%\)\s+(\d+) bases \(([\d\.]+)%\)", line)
if matches:
self.bbduk_data[s_name][cat + " reads"] = int(matches.group(1))
self.bbduk_data[s_name][cat + " reads percent"] = float(matches.group(2))
Expand Down
2 changes: 1 addition & 1 deletion multiqc/modules/biscuit/biscuit.py
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,7 @@ def parse_logs_qc_cv(f, fn):
"q40_cpg_topgc",
]
for t in targets:
m = re.search("{}\t([\d\.]+)\t([\d\.]+)\t([\d\.]+)".format(t), f, re.MULTILINE)
m = re.search(rf"{t}\t([\d\.]+)\t([\d\.]+)\t([\d\.]+)", f, re.MULTILINE)
if m is not None:
data[t] = {"mu": float(m.group(1)), "sigma": float(m.group(2)), "cv": float(m.group(3))}
else:
Expand Down
4 changes: 2 additions & 2 deletions multiqc/modules/busco/busco.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def __init__(self):
for lin in lineages:
self.add_section(
name="Lineage Assessment" if lin is None else "Lineage: {}".format(lin),
anchor="busco-lineage-{}".format(re.sub("\W+", "_", str(lin))),
anchor="busco-lineage-{}".format(re.sub(r"\W+", "_", str(lin))),
plot=self.busco_plot(lin),
)

Expand Down Expand Up @@ -97,7 +97,7 @@ def busco_plot(self, lin):

# Config for the plot
config = {
"id": "busco_plot_{}".format(re.sub("\W+", "_", str(lin))),
"id": "busco_plot_{}".format(re.sub(r"\W+", "_", str(lin))),
"title": "BUSCO: Assessment Results" if lin is None else "BUSCO Assessment Results: {}".format(lin),
"ylab": "# BUSCOs",
"cpswitch_counts_label": "Number of BUSCOs",
Expand Down
54 changes: 27 additions & 27 deletions multiqc/modules/cutadapt/cutadapt.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,31 +70,31 @@ def parse_cutadapt_logs(self, f):
fh = f["f"]
regexes = {
"1.7": {
"bp_processed": "Total basepairs processed:\s*([\d,]+) bp",
"bp_written": "Total written \(filtered\):\s*([\d,]+) bp",
"quality_trimmed": "Quality-trimmed:\s*([\d,]+) bp",
"r_processed": "Total reads processed:\s*([\d,]+)",
"pairs_processed": "Total read pairs processed:\s*([\d,]+)",
"r_with_adapters": "Reads with adapters:\s*([\d,]+)",
"r1_with_adapters": "Read 1 with adapter:\s*([\d,]+)",
"r2_with_adapters": "Read 2 with adapter:\s*([\d,]+)",
"r_too_short": "Reads that were too short:\s*([\d,]+)",
"pairs_too_short": "Pairs that were too short:\s*([\d,]+)",
"r_too_long": "Reads that were too long:\s*([\d,]+)",
"pairs_too_long": "Pairs that were too long:\s*([\d,]+)",
"r_too_many_N": "Reads with too many N:\s*([\d,]+)",
"pairs_too_many_N": "Pairs with too many N:\s*([\d,]+)",
"r_written": "Reads written \(passing filters\):\s*([\d,]+)",
"pairs_written": "Pairs written \(passing filters\):\s*([\d,]+)",
"bp_processed": r"Total basepairs processed:\s*([\d,]+) bp",
"bp_written": r"Total written \(filtered\):\s*([\d,]+) bp",
"quality_trimmed": r"Quality-trimmed:\s*([\d,]+) bp",
"r_processed": r"Total reads processed:\s*([\d,]+)",
"pairs_processed": r"Total read pairs processed:\s*([\d,]+)",
"r_with_adapters": r"Reads with adapters:\s*([\d,]+)",
"r1_with_adapters": r"Read 1 with adapter:\s*([\d,]+)",
"r2_with_adapters": r"Read 2 with adapter:\s*([\d,]+)",
"r_too_short": r"Reads that were too short:\s*([\d,]+)",
"pairs_too_short": r"Pairs that were too short:\s*([\d,]+)",
"r_too_long": r"Reads that were too long:\s*([\d,]+)",
"pairs_too_long": r"Pairs that were too long:\s*([\d,]+)",
"r_too_many_N": r"Reads with too many N:\s*([\d,]+)",
"pairs_too_many_N": r"Pairs with too many N:\s*([\d,]+)",
"r_written": r"Reads written \(passing filters\):\s*([\d,]+)",
"pairs_written": r"Pairs written \(passing filters\):\s*([\d,]+)",
},
"1.6": {
"r_processed": "Processed reads:\s*([\d,]+)",
"bp_processed": "Processed bases:\s*([\d,]+) bp",
"r_trimmed": "Trimmed reads:\s*([\d,]+)",
"quality_trimmed": "Quality-trimmed:\s*([\d,]+) bp",
"bp_trimmed": "Trimmed bases:\s*([\d,]+) bp",
"too_short": "Too short reads:\s*([\d,]+)",
"too_long": "Too long reads:\s*([\d,]+)",
"r_processed": r"Processed reads:\s*([\d,]+)",
"bp_processed": r"Processed bases:\s*([\d,]+) bp",
"r_trimmed": r"Trimmed reads:\s*([\d,]+)",
"quality_trimmed": r"Quality-trimmed:\s*([\d,]+) bp",
"bp_trimmed": r"Trimmed bases:\s*([\d,]+) bp",
"too_short": r"Too short reads:\s*([\d,]+)",
"too_long": r"Too long reads:\s*([\d,]+)",
},
}
s_name = None
Expand Down Expand Up @@ -156,13 +156,13 @@ def parse_cutadapt_logs(self, f):
log_section = line.strip().strip("=").strip()

# Detect whether 3' or 5'
end_regex = re.search("Type: regular (\d)'", line)
end_regex = re.search(r"Type: regular (\d)'", line)
if end_regex:
end = end_regex.group(1)

if "Overview of removed sequences" in line:
if "' end" in line:
res = re.search("(\d)' end", line)
res = re.search(r"(\d)' end", line)
end = res.group(1)

# Initilise dictionaries for length data if not already done
Expand All @@ -181,8 +181,8 @@ def parse_cutadapt_logs(self, f):
self.cutadapt_length_obsexp[end][plot_sname] = dict()

# Nested loop to read this section while the regex matches
for line in fh:
r_seqs = re.search("^(\d+)\s+(\d+)\s+([\d\.]+)", line)
for line2 in fh:
r_seqs = re.search(r"^(\d+)\s+(\d+)\s+([\d\.]+)", line2)
if r_seqs:
a_len = int(r_seqs.group(1))
self.cutadapt_length_counts[end][plot_sname][a_len] = int(r_seqs.group(2))
Expand Down
8 changes: 4 additions & 4 deletions multiqc/modules/dragen/overall_mean_cov.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,15 @@ def collect_overall_mean_cov_data(self):

# Official structure of files: _overall_mean_cov.csv
# Accepted structure of files: .+_overall_mean_cov.*.csv
GEN_FILE_RGX = re.compile("(.+)_overall_mean_cov(.*)\.csv$")
GEN_FILE_RGX = re.compile(r"(.+)_overall_mean_cov(.*)\.csv$")

# Special case. Coverage metrics files have the following structure:
# <output-prefix>.<coverage-region-prefix>_overall_mean_cov<arbitrary-suffix>.csv
COV_FILE_RGX = re.compile("(.+)\.(.+)_overall_mean_cov(.*)\.csv$")
COV_FILE_RGX = re.compile(r"(.+)\.(.+)_overall_mean_cov(.*)\.csv$")

# General structure of lines is not defined.
# Currently only 1 metric is present in the standard. It substitutes the line's regex.
AVG_RGX = re.compile("Average alignment coverage over ([^,]+),([^,]+)$", re.IGNORECASE)
AVG_RGX = re.compile(r"Average alignment coverage over ([^,]+),([^,]+)$", re.IGNORECASE)


def parse_overall_mean_cov(file_handler):
Expand Down Expand Up @@ -126,7 +126,7 @@ def parse_overall_mean_cov(file_handler):

# Otherwise check if line is empty. If not then report it and go to the next line.
else:
if not re.search("^\s*$", line):
if not re.search(r"^\s*$", line):
log_data["unknown_metrics"].append(line)
continue

Expand Down
2 changes: 1 addition & 1 deletion multiqc/modules/featureCounts/feature_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def featureCounts_chart(self):
headers = {}
for h in self.featurecounts_keys:
nice_name = h.replace("Unassigned_", "Unassigned: ").replace("_", " ")
nice_name = re.sub(r"([a-z])([A-Z])", "\g<1> \g<2>", nice_name)
nice_name = re.sub(r"([a-z])([A-Z])", r"\g<1> \g<2>", nice_name)
headers[h] = {"name": nice_name}

# Config for the plot
Expand Down
2 changes: 1 addition & 1 deletion multiqc/modules/homer/tagdirectory.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ def parse_FreqDist_interChr(self, f):
for line in f["f"]:
if firstline:
firstline = False
interChr = float(re.sub("\)", "", line.split(":")[1]))
interChr = float(re.sub(r"\)", "", line.split(":")[1]))
else:
break
parsed_data["interChr"] = interChr
Expand Down
2 changes: 1 addition & 1 deletion multiqc/modules/interop/interop.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def parse_summary_csv(f):
else:
linedata[header[idx]] = float(data[idx])
except ValueError:
linedata[header[idx]] = re.sub(pattern="\+/-.*", repl="", string=data[idx])
linedata[header[idx]] = re.sub(pattern=r"\+/-.*", repl="", string=data[idx])
metrics["details"]["Lane {} - {}".format(data[0], read)] = linedata

return metrics, version
Expand Down
8 changes: 4 additions & 4 deletions multiqc/modules/qorts/qorts.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def qorts_alignment_barplot(self):
cats = {}
for k in keys:
name = k.replace("ReadPairs_", "").replace("_", ": ")
name = re.sub("([a-z])([A-Z])", "\g<1> \g<2>", name)
name = re.sub("([a-z])([A-Z])", r"\g<1> \g<2>", name)
cats[k] = {"name": name}

# Config for the plot
Expand Down Expand Up @@ -168,7 +168,7 @@ def qorts_splice_loci_barplot(self):
cats = {}
for k in keys:
name = k.replace("SpliceLoci_", "").replace("_", ": ")
name = re.sub("([a-z])([A-Z])", "\g<1> \g<2>", name)
name = re.sub("([a-z])([A-Z])", r"\g<1> \g<2>", name)
cats[k] = {"name": name}

# Config for the plot
Expand Down Expand Up @@ -224,7 +224,7 @@ def qorts_splice_events_barplot(self):
cats = {}
for k in keys:
name = k.replace("SpliceEvents_", "")
name = re.sub("([a-z])([A-Z])", "\g<1> \g<2>", name)
name = re.sub("([a-z])([A-Z])", r"\g<1> \g<2>", name)
cats[k] = {"name": name}

# Config for the plot
Expand Down Expand Up @@ -277,7 +277,7 @@ def qorts_strandedness_plot(self):
cats = {}
for k in keys:
name = k.replace("StrandTest_", "").replace("_", " ").replace("ambig", "ambig:")
name = re.sub("([a-z])([A-Z])", "\g<1> \g<2>", name)
name = re.sub("([a-z])([A-Z])", r"\g<1> \g<2>", name)
cats[k] = {"name": name.title()}

# Config for the plot
Expand Down
2 changes: 1 addition & 1 deletion multiqc/modules/qualimap/QM_BamQC.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def parse_genome_results(self, f):
for k, r in regexes.get(section, {}).items():
r_search = re.search(r, line)
if r_search:
if "\d" in r:
if r"\d" in r:
try:
d[k] = float(r_search.group(1).replace(",", ""))
except ValueError:
Expand Down
19 changes: 8 additions & 11 deletions multiqc/modules/vep/vep.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,14 @@ def __init__(self):
# Add version information
for sample, data in self.vep_data.items():
if "VEP run statistics" not in data:
print(data.keys())
continue

print(data["VEP run statistics"]["VEP version (API)"])
vep_version, api_version = data["VEP run statistics"]["VEP version (API)"].strip().split(" ")
api_version = api_version.replace("(", "").replace(")", "")
self.add_software_version(vep_version, sample)
# Only add API version if it's different to VEP version
if vep_version != api_version:
self.add_software_version(api_version, sample, "VEP API")
print()
# Filter to strip out ignored sample names
self.vep_data = self.ignore_samples(self.vep_data)

Expand Down Expand Up @@ -102,8 +99,8 @@ def parse_vep_html(self, f):
# The tables with the titles given below have common format inside the javascript section
titles = [
"Variant classes",
"Consequences \(most severe\)",
"Consequences \(all\)",
r"Consequences \(most severe\)",
r"Consequences \(all\)",
"Coding consequences",
"SIFT summary",
"PolyPhen summary",
Expand Down Expand Up @@ -228,7 +225,7 @@ def add_stats_table(self):
def bar_graph_variant_classes(self):
title = "Variant classes"
plot_data, plot_cats, plot_config = self._prep_bar_graph(title)
htmlid = re.sub("\W+", "_", title).lower()
htmlid = re.sub(r"\W+", "_", title).lower()
if len(plot_data) == 0:
return

Expand Down Expand Up @@ -266,7 +263,7 @@ def bar_graph_consequences(self):
def bar_graph_sift(self):
title = "SIFT summary"
plot_data, plot_cats, plot_config = self._prep_bar_graph(title)
htmlid = re.sub("\W+", "_", title).lower()
htmlid = re.sub(r"\W+", "_", title).lower()
if len(plot_data) == 0:
return

Expand Down Expand Up @@ -295,7 +292,7 @@ def bar_graph_sift(self):
def bar_graph_polyphen(self):
title = "PolyPhen summary"
plot_data, plot_cats, plot_config = self._prep_bar_graph(title)
htmlid = re.sub("\W+", "_", title).lower()
htmlid = re.sub(r"\W+", "_", title).lower()
if len(plot_data) == 0:
return

Expand Down Expand Up @@ -324,7 +321,7 @@ def bar_graph_polyphen(self):
def bar_graph_variants_by_chromosome(self):
title = "Variants by chromosome"
plot_data, plot_cats, plot_config = self._prep_bar_graph(title)
htmlid = re.sub("\W+", "_", title).lower()
htmlid = re.sub(r"\W+", "_", title).lower()
if len(plot_data) == 0:
return

Expand All @@ -346,7 +343,7 @@ def bar_graph_variants_by_chromosome(self):
def bar_graph_position_in_protein(self):
title = "Position in protein"
plot_data, plot_cats, plot_config = self._prep_bar_graph(title)
htmlid = re.sub("\W+", "_", title).lower()
htmlid = re.sub(r"\W+", "_", title).lower()
if len(plot_data) == 0:
return

Expand All @@ -370,7 +367,7 @@ def _prep_bar_graph(self, title):
if title in self.vep_data[s_name]:
plot_data[s_name] = self.vep_data[s_name][title]
plot_cats = dict()
htmlid = re.sub("\W+", "_", title).lower()
htmlid = re.sub(r"\W+", "_", title).lower()
plotid = "{}_plot".format(htmlid)
plot_config = {
"id": plotid,
Expand Down
2 changes: 1 addition & 1 deletion multiqc/utils/mqc_colour.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ def rgb_converter(x):

except Exception as e:
# Shouldn't crash all of MultiQC just for colours
logging.warning(f"{self.id + ': ' if self.id else ''}Error getting colour: {e}")
logger.warning(f"{self.id + ': ' if self.id else ''}Error getting colour: {e}")
return ""

def get_colours(self, name="GnBu"):
Expand Down
2 changes: 1 addition & 1 deletion multiqc/utils/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

logger = config.logger

# Treat defaultdict as normal dict for YAML output
# Treat defaultdict and OrderedDict as normal dicts for YAML output
yaml.add_representer(defaultdict, Representer.represent_dict)
yaml.add_representer(OrderedDict, Representer.represent_dict)

Expand Down

0 comments on commit 398cf1a

Please sign in to comment.