Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New module: ngs-bits #2231

Merged
merged 37 commits into from
Jul 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
7d72ddf
implement ngs-bits as one module, readqc and mappingqc supported
jakobmatthes Sep 17, 2021
4db4208
CHANGELOG and README
jakobmatthes Sep 17, 2021
e02d4e7
prettier
jakobmatthes Sep 17, 2021
982f924
Merge remote-tracking branch 'upstream/master' into module_ngsbits
jakobmatthes Oct 13, 2021
0de81c3
Merge remote-tracking branch 'upstream/master' into module_ngsbits
jakobmatthes Nov 17, 2021
ff27693
add publication reference
jakobmatthes Nov 21, 2021
251dc70
Merge branch 'master' into module_ngsbits
jakobmatthes Mar 8, 2022
560e29d
Merge branch 'ewels:master' into module_ngsbits
caspargross Dec 13, 2022
6509bf0
Merge remote-tracking branch 'upstream/master' into module_ngsbits
jakobmatthes Feb 17, 2023
076477d
fix imports
jakobmatthes Feb 23, 2023
258f014
Merge branch 'module_ngsbits' of https://github.com/imgag/multiqc int…
caspargross Sep 18, 2023
ca775c4
Fix missing import
caspargross Sep 18, 2023
eadb3ca
Clean changelog
vladsavelyev Dec 14, 2023
414b97b
[automated] Update CHANGELOG.md
multiqc-bot Dec 14, 2023
77ace51
Slight refactor and linting fix
vladsavelyev Dec 14, 2023
e3ac957
Merge branch 'master' into ngsbits-2
vladsavelyev Dec 14, 2023
26a93c6
Add add_software_version calls and revert `if len(readqc)` check
vladsavelyev Dec 14, 2023
f5da41c
Merge branch 'master' into ngsbits-2
vladsavelyev Dec 15, 2023
ba0a46a
[automated] Update CHANGELOG.md
multiqc-bot Dec 15, 2023
8f47c9b
Merge branch 'main' into ngsbits-2
vladsavelyev Feb 6, 2024
c421367
[automated] Fix code linting
multiqc-bot Feb 6, 2024
5dd9223
Fix module order
vladsavelyev Feb 6, 2024
32d768d
Merge branch 'main' into ngsbits-2
vladsavelyev Feb 12, 2024
dca0ae6
[automated] Fix code linting
multiqc-bot Feb 12, 2024
d97bf79
Merge branch 'main' into ngsbits-2
vladsavelyev Feb 19, 2024
9601f35
Fix changelog
vladsavelyev Feb 19, 2024
64515bb
Fix changelog
vladsavelyev Feb 19, 2024
91de67a
Missing table titles
vladsavelyev Feb 19, 2024
fda5c83
Changelog CI: remove entry if [no changelog] was appended to the PR t…
vladsavelyev Feb 29, 2024
0318a9c
Merge branch 'main' into ngsbits-2
vladsavelyev Feb 29, 2024
b693a0a
Merge branch 'main' into ngsbits-2
vladsavelyev Feb 29, 2024
4671263
Merge branch 'main' into ngsbits-2
vladsavelyev Jul 20, 2024
fd69649
Abstract config.get_cov_thresholds function, refactor QualiMap
vladsavelyev Jul 20, 2024
13cb10a
Merge branch 'main' into ngsbits-2
vladsavelyev Jul 20, 2024
172f86b
Clean up, add gen stats, use `config.get_cov_thresholds`
vladsavelyev Jul 20, 2024
7c2e7fc
Add shared keys
vladsavelyev Jul 20, 2024
3236fa7
Merge branch 'main' into ngsbits-2
vladsavelyev Jul 21, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions multiqc/config_defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ fn_clean_exts:
- "ccs"
- "_NanoStats"
- ".cutadapt"
- ".qcML"
- ".mosdepth"
- "_gopeaks"
- ".readCounts"
Expand Down Expand Up @@ -409,6 +410,7 @@ module_order:
- mirtop
- sambamba
- glimpse
- ngsbits
# Post-alignment processing
- gopeaks
- homer
Expand Down
3 changes: 3 additions & 0 deletions multiqc/modules/ngsbits/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from multiqc.modules.ngsbits.ngsbits import MultiqcModule

__all__ = ["MultiqcModule"]
196 changes: 196 additions & 0 deletions multiqc/modules/ngsbits/mappingqc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
import logging
from copy import copy
from typing import Dict

from multiqc import config, BaseMultiqcModule
from multiqc.modules.ngsbits.utils import parse_qcml_by
from multiqc.plots import table


log = logging.getLogger(__name__)


def parse_reports(module: BaseMultiqcModule) -> int:
"""Find ngs-bits MappingQC reports and parse their data"""

mappingqc: Dict = dict()
mappingqc_keys: Dict = dict()

for f in module.find_log_files("ngsbits/mappingqc"):
values, params = parse_qcml_by(f["f"], "qualityParameter")

if len(values) > 0:
if f["s_name"] in mappingqc:
log.debug(f'Duplicate sample name found! Overwriting: {f["s_name"]}')
module.add_data_source(f, section="mappingqc")
mappingqc[f["s_name"]] = values
mappingqc_keys.update(params)

# Filter to strip out ignored sample names
mappingqc = module.ignore_samples(mappingqc)
if len(mappingqc) == 0:
return 0

# Write to file
module.write_data_file(mappingqc, "multiqc_ngsbits_mappingqc")

# Superfluous function call to confirm that it is used in this module
# Replace None with actual version if it is available
module.add_software_version(None)

# Convert numbers given in megabases to bases
mappingqc_keys["bases usable"] = ("Bases usable in total.", "")
for _, kv in mappingqc.items():
kv["bases usable"] = kv["bases usable (MB)"] * 1e6
kv.pop("bases usable (MB)")

headers: Dict = dict()
headers["bases usable"] = {
"title": "Usable",
"description": mappingqc_keys["bases usable"][0],
"format": "{:,.2f}",
"scale": "Blues",
"shared_key": "base_count",
}
headers["mapped read %"] = {
"title": "Mapped",
"description": mappingqc_keys["mapped read %"][0],
"suffix": "%",
"format": "{:,.2f}",
"max": 100,
"scale": "Reds",
}
# always available, even without target file
headers["on-target read %"] = {
"title": "On-target",
"description": mappingqc_keys["on-target read %"][0],
"suffix": "%",
"format": "{:,.2f}",
"max": 100,
"scale": "Purples",
}

# only available if duplicates marked
if "duplicate read %" in mappingqc_keys:
headers["duplicate read %"] = {
"title": "Duplicates",
"description": mappingqc_keys["duplicate read %"][0],
"suffix": "%",
"format": "{:,.2f}",
"max": 100,
"scale": "YlOrRd",
}

# only available if paired-end
try:
headers["properly-paired read %"] = {
"title": "Properly paired",
"description": mappingqc_keys["properly-paired read %"][0],
"suffix": "%",
"format": "{:,.2f}",
"max": 100,
"scale": "GnBu",
}
headers["insert size"] = {
"title": "Insert size",
"description": mappingqc_keys["insert size"][0],
"suffix": "bp",
"format": "{:,.2f}",
"scale": "RdYlGn",
}
except KeyError:
pass

# only available if target file provided
all_covs = (10, 20, 30, 50, 100, 200, 500)
table_covs = (30, 100, 500)
try:
headers["target region read depth"] = {
"title": "Target depth",
"description": mappingqc_keys["target region read depth"][0],
"suffix": "x",
"format": "{:,.2f}",
}
for x in all_covs:
headers[f"target region {x:d}x %"] = {
"title": f"Target {x:d}x",
"description": mappingqc_keys[f"target region {x:d}x %"][0],
"suffix": "%",
"format": "{:,.2f}",
"max": 100,
"scale": "YlGn",
"hidden": x not in table_covs,
}
except KeyError:
pass

headers["trimmed base %"] = {
"title": "Trimmed",
"description": mappingqc_keys["trimmed base %"][0],
"suffix": "%",
"format": "{:,.2f}",
"floor": 1,
"scale": "PuBu",
}
headers["clipped base %"] = {
"title": "Clipped",
"description": mappingqc_keys["clipped base %"][0],
"suffix": "%",
"format": "{:,.2f}",
"floor": 1,
"scale": "PuRd",
"hidden": True,
}

# only available if human
if "SNV allele frequency deviation" in mappingqc_keys:
headers["SNV allele frequency deviation"] = {
"title": "SNV AF deviation",
"description": mappingqc_keys["SNV allele frequency deviation"][0],
"suffix": "",
"format": "{:,.2f}",
"floor": 0,
"ceiling": 10,
"minRange": 10,
"scale": "Greys",
"hidden": True,
}

# overview table with all values
module.add_section(
name="MappingQC",
anchor="ngsbits-mappingqc",
description='<a href="https://github.com/imgag/ngs-bits/blob/master/doc/tools/MappingQC.md" target="_blank">MappingQC</a>'
" calculates QC metrics on mapped NGS reads.",
plot=table.plot(
mappingqc,
headers,
pconfig={
"namespace": "MappingQC",
"id": "ngsbits_mappingqc_table",
"title": "ngs-bits: MappingQC Summary",
},
),
)

gen_stats_headers = {
"bases usable": copy(headers["bases usable"]),
"mapped read %": copy(headers["mapped read %"]),
"on-target read %": copy(headers["on-target read %"]),
"duplicate read %": copy(headers["duplicate read %"]),
}
for x in table_covs:
gen_stats_headers[f"target region {x:d}x %"] = copy(headers[f"target region {x:d}x %"])

for k in gen_stats_headers:
gen_stats_headers[k]["hidden"] = True
gen_stats_headers["bases usable"]["hidden"] = False
gen_stats_headers["target region 30x %"]["hidden"] = False

module.general_stats_addcols(
mappingqc,
gen_stats_headers,
namespace="MappingQC",
)

return len(mappingqc)
39 changes: 39 additions & 0 deletions multiqc/modules/ngsbits/ngsbits.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import logging

from multiqc.base_module import BaseMultiqcModule
from multiqc.modules.ngsbits.mappingqc import parse_reports as mappingqc_parse_reports
from multiqc.modules.ngsbits.readqc import parse_reports as readqc_parse_reports


log = logging.getLogger(__name__)


class MultiqcModule(BaseMultiqcModule):
"""
The ngs-bits module parses XML output generated for several tools in the ngs-bits collection:
* [ReadQC](https://github.com/imgag/ngs-bits/blob/master/doc/tools/ReadQC.md) for statistics on FASTQ files,
* [MappingQC](https://github.com/imgag/ngs-bits/blob/master/doc/tools/MappingQC.md) for statistics on BAM files
"""

def __init__(self):
# Initialise the parent object
super(MultiqcModule, self).__init__(
name="ngs-bits",
anchor="ngsbits",
href="https://github.com/imgag/ngs-bits",
info="Calculating statistics from FASTQ, BAM, and VCF",
doi="10.1093/bioinformatics/btx032",
)

# Call submodule functions
n = dict()
n["mappingqc"] = mappingqc_parse_reports(self)
if n["mappingqc"] > 0:
log.info(f"Found {n['mappingqc']} MappingQC reports")
n["readqc"] = readqc_parse_reports(self)
if n["readqc"] > 0:
log.info(f"Found {n['readqc']} ReadQC reports")

# Exit if we didn't find anything
if sum(n.values()) == 0:
raise UserWarning
Loading
Loading