Skip to content

Commit

Permalink
Merge pull request #1021 from massiddamt/master
Browse files Browse the repository at this point in the history
adding BBMap qchist
  • Loading branch information
ewels committed Jan 31, 2022
2 parents 3b3a8bb + 72ba1ea commit 057479f
Show file tree
Hide file tree
Showing 7 changed files with 94 additions and 5 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@

### Module updates

- **bbmap**
- **BBMap**
- Correctly handle adapter stats files with additional columns ([#1556](https://github.com/ewels/MultiQC/issues/1556))
- Added handling for `qchist` output ([#1021](https://github.com/ewels/MultiQC/issues/1021))
- **bclconvert**
- Handle change in output format in v3.9.3 with new `Quality_Metrics.csv` file ([#1563](https://github.com/ewels/MultiQC/issues/1563))
- **bcftools**
Expand Down
25 changes: 24 additions & 1 deletion multiqc/modules/bbmap/bbmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from __future__ import print_function
import logging
from collections import OrderedDict

from multiqc.utils import config
from multiqc.plots import table
from multiqc.modules.base_module import BaseMultiqcModule

Expand Down Expand Up @@ -73,6 +73,29 @@ def __init__(self):
plot=self.make_basic_table(file_type),
)

# Special case - qchist metric in General Stats
if "qchist" in self.mod_data:
data = {}
fraction_gt_q30 = []
for s_name in self.mod_data["qchist"]:
for qual, d in self.mod_data["qchist"][s_name]["data"].items():
if int(qual) >= 30:
fraction_gt_q30.append(d[1])
data[s_name] = {"pct_q30": sum(fraction_gt_q30) * 100.0}

headers = {
"pct_q30": {
"title": "% Q30 bases",
"description": "BBMap qchist - Percentage of bases with phred quality score >= 30",
"suffix": " %",
"scale": "RdYlGn",
"format": "{:,.2f}",
"min": 0,
"max": 100,
}
}
self.general_stats_addcols(data, headers)

def parse_logs(self, file_type, root, s_name, fn, f, **kw):

if self.is_ignore_sample(s_name):
Expand Down
20 changes: 20 additions & 0 deletions multiqc/modules/bbmap/bbmap_filetypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from .plot_indelhist import plot_indelhist
from .plot_mhist import plot_mhist
from .plot_qahist import plot_qahist
from .plot_qchist import plot_qchist
from .plot_qhist import plot_qhist


Expand All @@ -32,6 +33,7 @@ def __getitem__(self, keys):
"indelhist",
"mhist",
"qahist",
"qchist",
"qhist",
"aqhist",
"ehist",
Expand Down Expand Up @@ -325,6 +327,24 @@ def __getitem__(self, keys):
"plot_func": plot_qahist,
"plot_params": {},
},
"qchist": {
"title": "Count of bases with each quality value",
"descr": "Histogram of base qualities (`qchist`). "
"Plot shows the number of bases at each quality score. Zero counts are shown as `0.1` due to log axis.",
"help_text": "",
"cols": odict["Quality":int, "count1":int, "fraction1":float],
"plot_func": plot_qchist,
"plot_params": {
"xPlotBands": [
{"from": 30, "to": 100, "color": "#c3e6c3"},
{"from": 20, "to": 30, "color": "#e6dcc3"},
{"from": 0, "to": 20, "color": "#e6c3c3"},
],
"yLog": True,
"xlab": "Phred Score",
"ylab": "Counts",
},
},
"qhist": {
"title": "Sequence Quality Histograms",
"descr": "Quality histogram by position (`qhist`). "
Expand Down
41 changes: 41 additions & 0 deletions multiqc/modules/bbmap/plot_qchist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from itertools import chain

from multiqc.plots import linegraph


def plot_qchist(samples, file_type, **plot_args):
"""Create line graph plot of histogram data for BBMap 'qchist' output.
The 'samples' parameter could be from the bbmap mod_data dictionary:
samples = bbmap.MultiqcModule.mod_data[file_type]
"""

sumy = sum([int(samples[sample]["data"][x][0]) for sample in samples for x in samples[sample]["data"]])

cutoff = sumy * 0.999
all_x = set()
for item in sorted(chain(*[samples[sample]["data"].items() for sample in samples])):
all_x.add(item[0])
cutoff -= item[1][0]
if cutoff < 0:
xmax = item[0]
break
else:
xmax = max(all_x)

data = {
sample: {x: samples[sample]["data"][x][0] if x in samples[sample]["data"] else 0 for x in all_x}
for sample in samples
}
# Add a count of 0.1 to zero counts, to avoid broken series in log axis
data = {s: {k: d + 0.1 if d == 0 else d for k, d in v.items()} for s, v in data.items()}

plot_params = {
"id": "bbmap-" + file_type + "_plot",
"title": "BBTools: " + plot_args["plot_title"],
"xmax": xmax,
}
plot_params.update(plot_args["plot_params"])
plot = linegraph.plot(data, plot_params)

return plot
1 change: 1 addition & 0 deletions multiqc/utils/config_defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ fn_clean_exts:
- ".lhist"
- ".mhist"
- ".qahist"
- ".qchist"
- ".qhist"
- ".rpkm"
- ".selfSM"
Expand Down
3 changes: 3 additions & 0 deletions multiqc/utils/search_patterns.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ bbmap/mhist:
bbmap/qahist:
contents: "#Deviation"
num_lines: 1
bbmap/qchist:
contents_re: '#Quality count1 fraction1\n'
num_lines: 1
bbmap/qhist:
contents: "#BaseNum Read1_linear Read1_log Read1_measured Read2_linear Read2_log Read2_measured"
num_lines: 1
Expand Down
6 changes: 3 additions & 3 deletions multiqc/utils/util_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@ def robust_rmtree(path, logger=None, max_retries=10):
except OSError:
if logger:
logger.info("Unable to remove path: {}".format(path))
logger.info("Retrying after {} seconds".format(i ** 2))
logger.info("Retrying after {} seconds".format(i**2))
else:
print("Unable to remove path: {}".format(path), file=sys.stderr)
print("Retrying after {} seconds".format(i ** 2), file=sys.stderr)
time.sleep(i ** 2)
print("Retrying after {} seconds".format(i**2), file=sys.stderr)
time.sleep(i**2)

# Final attempt, pass any Exceptions up to caller.
shutil.rmtree(path)
Expand Down

0 comments on commit 057479f

Please sign in to comment.