MultiQC · vladsavelyev · Oct 17, 2023 · Oct 13, 2023 · Oct 13, 2023 · Oct 13, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -30,6 +30,7 @@
 - **HiCPro**: fix parsing scientific notation in hicpro-ashic. Thanks @Just-Roma ([#2126](https://github.com/ewels/MultiQC/pull/2126))
 - **Picard**: MarkDuplicates: Fix parsing mixed strings/numbers, account for missing trailing `0` ([#2083](https://github.com/ewels/MultiQC/pull/2083), [#2094](https://github.com/ewels/MultiQC/pull/2094))
 - **WhatsHap**: Process truncated input with no ALL chromosome ([#2095](https://github.com/ewels/MultiQC/pull/2095))
+- **Cellranger**: Count submodule updated to parse Antibody Capture summary ([#2118](https://github.com/ewels/MultiQC/pull/2118))
 
 ## [MultiQC v1.16](https://github.com/ewels/MultiQC/releases/tag/v1.16) - 2023-09-22
 

diff --git a/multiqc/modules/cellranger/count.py b/multiqc/modules/cellranger/count.py
@@ -6,7 +6,7 @@
 from collections import OrderedDict
 
 from multiqc import config
-from multiqc.plots import linegraph, table
+from multiqc.plots import bargraph, linegraph, table
 
 from ._utils import *
 
@@ -19,18 +19,22 @@ class CellRangerCountMixin:
 
     def parse_count_html(self):
         self.cellrangercount_data = dict()
+        self.cellrangercount_antibody_data = dict()
         self.cellrangercount_general_data = dict()
         self.cellrangercount_warnings = dict()
         self.cellrangercount_plots_conf = {"bc": dict(), "genes": dict()}
         self.cellrangercount_plots_data = {"bc": dict(), "genes": dict()}
         self.count_general_data_headers = OrderedDict()
         self.count_data_headers = OrderedDict()
+        self.antibody_data_headers = OrderedDict()
         self.count_warnings_headers = OrderedDict()
 
         for f in self.find_log_files("cellranger/count_html", filehandles=True):
             self.parse_count_report(f)
 
         self.cellrangercount_data = self.ignore_samples(self.cellrangercount_data)
+        if self.cellrangercount_antibody_data:
+            self.cellrangercount_antibody_data = self.ignore_samples(self.cellrangercount_antibody_data)
         self.cellrangercount_general_data = self.ignore_samples(self.cellrangercount_general_data)
         self.cellrangercount_warnings = self.ignore_samples(self.cellrangercount_warnings)
         for k in self.cellrangercount_plots_data.keys():
@@ -71,66 +75,98 @@ def parse_count_html(self):
             ],
         )
 
+        if self.cellrangercount_antibody_data:
+            self.antibody_data_headers["reads"] = {
+                "rid": "antibody_data_reads",
+                "title": "{} Reads".format(config.read_count_prefix),
+                "description": "Number of reads ({})".format(config.read_count_desc),
+                "modify": lambda x: x * config.read_count_multiplier,
+            }
+            self.antibody_data_headers = set_hidden_cols(
+                self.antibody_data_headers,
+                ["Q30 bc", "Q30 UMI", "Q30 read", "saturation", "umi per cell", "reads in aggregate bc"],
+            )
+
         if len(self.cellrangercount_general_data) == 0:
             return 0
 
-        else:
-            self.general_stats_addcols(self.cellrangercount_general_data, self.count_general_data_headers)
+        self.general_stats_addcols(self.cellrangercount_general_data, self.count_general_data_headers)
 
-            # Write parsed report data to a file
-            self.write_data_file(self.cellrangercount_data, "multiqc_cellranger_count")
+        # Write parsed report data to a file
+        self.write_data_file(self.cellrangercount_data, "multiqc_cellranger_count")
+        if self.cellrangercount_antibody_data:
+            self.write_data_file(self.cellrangercount_antibody_data, "multiqc_cellranger_antibody_count")
+
+        # Add sections to the report
+        if len(self.cellrangercount_warnings) > 0:
+            self.add_section(
+                name="Count - Warnings",
+                anchor="cellranger-count-warnings",
+                description="Warnings encountered during the analysis",
+                plot=table.plot(self.cellrangercount_warnings, self.count_warnings_headers, {"namespace": "Count"}),
+            )
 
-            # Add sections to the report
-            if len(self.cellrangercount_warnings) > 0:
-                self.add_section(
-                    name="Count - Warnings",
-                    anchor="cellranger-count-warnings",
-                    description="Warnings encountered during the analysis",
-                    plot=table.plot(self.cellrangercount_warnings, self.count_warnings_headers, {"namespace": "Count"}),
-                )
+        self.add_section(
+            name="Count - Summary stats",
+            anchor="cellranger-count-stats",
+            description="Summary QC metrics from Cell Ranger count",
+            plot=table.plot(self.cellrangercount_data, self.count_data_headers, {"namespace": "Count"}),
+        )
 
+        if self.cellrangercount_antibody_data:
             self.add_section(
-                name="Count - Summary stats",
-                anchor="cellranger-count-stats",
+                name="Antibody - Summary stats",
+                anchor="cellranger-antibody-stats",
                 description="Summary QC metrics from Cell Ranger count",
-                plot=table.plot(self.cellrangercount_data, self.count_data_headers, {"namespace": "Count"}),
+                plot=table.plot(
+                    self.cellrangercount_antibody_data, self.antibody_data_headers, {"namespace": "Antibody"}
+                ),
             )
 
+        self.add_section(
+            name="Count - BC rank plot",
+            anchor="cellranger-count-bcrank-plot",
+            description=self.cellrangercount_plots_conf["bc"]["description"],
+            helptext=self.cellrangercount_plots_conf["bc"]["helptext"],
+            plot=linegraph.plot(self.cellrangercount_plots_data["bc"], self.cellrangercount_plots_conf["bc"]["config"]),
+        )
+
+        if "antibody_counts" in self.cellrangercount_plots_conf:
             self.add_section(
-                name="Count - BC rank plot",
-                anchor="cellranger-count-bcrank-plot",
-                description=self.cellrangercount_plots_conf["bc"]["description"],
-                helptext=self.cellrangercount_plots_conf["bc"]["helptext"],
-                plot=linegraph.plot(
-                    self.cellrangercount_plots_data["bc"], self.cellrangercount_plots_conf["bc"]["config"]
+                name="Antibody - Counts Distribution Bargraph",
+                anchor="cellranger-antibody-counts",
+                description=self.cellrangercount_plots_conf["antibody_counts"]["description"],
+                helptext=self.cellrangercount_plots_conf["antibody_counts"]["helptext"],
+                plot=bargraph.plot(
+                    self.cellrangercount_plots_data["antibody_counts"],
+                    self.cellrangercount_plots_conf["antibody_counts"]["keys"],
+                    self.cellrangercount_plots_conf["antibody_counts"]["config"],
                 ),
             )
 
+        self.add_section(
+            name="Count - Median genes",
+            anchor="cellranger-count-genes-plot",
+            description=self.cellrangercount_plots_conf["genes"]["description"],
+            helptext=self.cellrangercount_plots_conf["genes"]["helptext"],
+            plot=linegraph.plot(
+                self.cellrangercount_plots_data["genes"], self.cellrangercount_plots_conf["genes"]["config"]
+            ),
+        )
+
+        if "saturation" in self.cellrangercount_plots_data:
             self.add_section(
-                name="Count - Median genes",
-                anchor="cellranger-count-genes-plot",
-                description=self.cellrangercount_plots_conf["genes"]["description"],
-                helptext=self.cellrangercount_plots_conf["genes"]["helptext"],
+                name="Count - Saturation plot",
+                anchor="cellranger-count-saturation-plot",
+                description=self.cellrangercount_plots_conf["saturation"]["description"],
+                helptext=self.cellrangercount_plots_conf["saturation"]["helptext"],
                 plot=linegraph.plot(
-                    self.cellrangercount_plots_data["genes"], self.cellrangercount_plots_conf["genes"]["config"]
+                    self.cellrangercount_plots_data["saturation"],
+                    self.cellrangercount_plots_conf["saturation"]["config"],
                 ),
             )
 
-            try:
-                self.add_section(
-                    name="Count - Saturation plot",
-                    anchor="cellranger-count-saturation-plot",
-                    description=self.cellrangercount_plots_conf["saturation"]["description"],
-                    helptext=self.cellrangercount_plots_conf["saturation"]["helptext"],
-                    plot=linegraph.plot(
-                        self.cellrangercount_plots_data["saturation"],
-                        self.cellrangercount_plots_conf["saturation"]["config"],
-                    ),
-                )
-            except KeyError:
-                pass
-
-            return len(self.cellrangercount_general_data)
+        return len(self.cellrangercount_general_data)
 
     def parse_count_report(self, f):
         """Go through the html report of cell ranger and extract the data in a dicts"""
@@ -202,7 +238,6 @@ def parse_count_report(self, f):
         )
 
         # Store full data from cell ranger count report
-        data = dict()
         data_rows = (
             summary["summary_tab"]["sequencing"]["table"]["rows"]
             + summary["summary_tab"]["cells"]["table"]["rows"]
@@ -242,14 +277,16 @@ def parse_count_report(self, f):
             "median umi/cell": "YlGn",
             "saturation": "YlOrRd",
         }
-        data, self.count_data_headers = update_dict(
+        table, self.count_data_headers = update_dict(
             data_general_stats,
             self.count_data_headers,
             data_rows,
             col_dict,
             colours,
             "Count",
         )
+        if not table:
+            return None
 
         # Extract warnings if any
         warnings = dict()
@@ -315,23 +352,112 @@ def parse_count_report(self, f):
             "bc": parse_bcknee_data(summary["summary_tab"]["cells"]["barcode_knee_plot"]["data"], s_name),
             "genes": {s_name: transform_data(summary["analysis_tab"]["median_gene_plot"]["plot"]["data"][0])},
         }
-        try:
+        if "seq_saturation_plot" in summary["analysis_tab"]:
             plots_data["saturation"] = {
                 s_name: transform_data(summary["analysis_tab"]["seq_saturation_plot"]["plot"]["data"][0])
             }
-        except KeyError:
-            pass
 
-        if len(data) > 0:
-            if s_name in self.cellrangercount_general_data:
-                log.debug("Duplicate sample name found in {}! Overwriting: {}".format(f["fn"], s_name))
-            self.add_data_source(f, s_name, module="cellranger", section="count")
-            self.cellrangercount_data[s_name] = data
-            self.cellrangercount_general_data[s_name] = data_general_stats
-            if len(warnings) > 0:
-                self.cellrangercount_warnings[s_name] = warnings
-            self.cellrangercount_plots_conf = plots
-            for k in plots_data.keys():
-                if k not in self.cellrangercount_plots_data.keys():
-                    self.cellrangercount_plots_data[k] = dict()
-                self.cellrangercount_plots_data[k].update(plots_data[k])
+        # Store full data for ANTIBODY capture
+        antibody_data = dict()
+        if "ANTIBODY_sequencing" in summary["summary_tab"]:
+            data_rows = (
+                summary["summary_tab"]["ANTIBODY_sequencing"]["table"]["rows"]
+                + summary["summary_tab"]["ANTIBODY_application"]["table"]["rows"]
+            )
+            col_dict = {
+                "Number of Reads": "reads",
+                "Valid Barcodes": "valid bc",
+                "Valid UMIs": "valid umi",
+                "Sequencing Saturation": "saturation",
+                "Q30 Bases in Barcode": "Q30 bc",
+                "Q30 Bases in Antibody Read": "Q30 read",
+                "Q30 Bases in UMI": "Q30 UMI",
+                "Fraction Antibody Reads": "antibody reads",
+                "Fraction Antibody Reads Usable": "antibody reads usable",
+                "Antibody Reads Usable per Cell": "antibody reads usable/cell",
+                "Fraction Antibody Reads in Aggregate Barcodes": "reads in aggregate bc",
+                "Fraction Unrecognized Antibody": "unrecognized antibody",
+                "Antibody Reads in Cells": "antibody reads in cells",
+                "Median UMIs per Cell (summed over all recognized antibody barcodes)": "umi per cell",
+            }
+            colours = {
+                "reads": "YlGn",
+                "antibody reads": "RdPu",
+                "reads in cells": "Blues",
+                "reads usable": "Greens",
+                "reads usable per cell": "Purples",
+                "reads in aggregate bc": "PuBuGn",
+                "valid bc": "Spectral",
+                "valid umi": "RdYlGn",
+                "Q30 bc": "YlGn",
+                "saturation": "YlOrRd",
+            }
+            antibody_data, self.antibody_data_headers = update_dict(
+                antibody_data,
+                self.antibody_data_headers,
+                data_rows,
+                col_dict,
+                colours,
+                "Antibody",
+            )
+
+            # Extract labels and values for the bargraph data
+            combined_data = {}
+            for label, value in zip(
+                summary["antibody_tab"]["antibody_treemap_plot"]["plot"]["data"][0]["labels"],
+                summary["antibody_tab"]["antibody_treemap_plot"]["plot"]["data"][0]["values"],
+            ):
+                label_match = re.search(r"<b>(.*?)\s+\((.*?)%\)</b>", label)
+                if label_match:
+                    label_value = label_match.group(1)
+                    value_ = round(value * 100, 2)
+                    combined_data[label_value] = value_
+
+            # Extract labels and number of cells for labelling the bargraph
+            combined_label = {}
+            for label, cells in zip(
+                summary["antibody_tab"]["antibody_treemap_plot"]["plot"]["data"][0]["labels"],
+                summary["antibody_tab"]["antibody_treemap_plot"]["plot"]["data"][0]["text"],
+            ):
+                label_match = re.search(r"<b>(.*?)\s+\((.*?)%\)</b>", label)
+                if label_match:
+                    label_value = label_match.group(1)
+                    combined_label[label_value] = label_value + ": " + cells
+
+            # Use the label from `combined_label` for the plot
+            keys = dict()
+            for key, value in combined_label.items():
+                keys[key] = {"name": value}
+
+            plots["antibody_counts"] = {
+                "config": {
+                    "id": "mqc_cellranger_antibody_counts",
+                    "title": "Cell Ranger: Distribution of Antibody Counts",
+                    "ylab": "% Total UMI",
+                    "ymax": 100,
+                    "cpswitch": False,
+                    "use_legend": False,
+                    "tt_decimals": 2,
+                    "tt_suffix": "%",
+                    "tt_percentages": False,
+                },
+                "keys": keys,
+                "description": "Antibody Counts Distribution Plot",
+                "helptext": "Relative composition of antibody counts for features with at least 1 UMI. Box size represents fraction of total UMIs from cell barcodes that are derived from this antibody. Hover over a box to view more information on a particular antibody, including number of associated barcodes.",
+            }
+            plots_data["antibody_counts"] = {s_name: combined_data}
+
+        if s_name in self.cellrangercount_general_data:
+            log.debug("Duplicate sample name found in {}! Overwriting: {}".format(f["fn"], s_name))
+        self.add_data_source(f, s_name, module="cellranger", section="count")
+        self.cellrangercount_data[s_name] = table
+        if "antibody_tab" in summary:
+            self.cellrangercount_antibody_data[s_name] = antibody_data
+        self.cellrangercount_general_data[s_name] = data_general_stats
+        if len(warnings) > 0:
+            self.cellrangercount_warnings[s_name] = warnings
+        self.cellrangercount_plots_conf.update(plots)
+        for k in plots_data.keys():
+            if k not in self.cellrangercount_plots_data.keys():
+                self.cellrangercount_plots_data[k] = dict()
+            self.cellrangercount_plots_data[k].update(plots_data[k])