MultiQC · vladsavelyev · Dec 17, 2023 · Jul 7, 2021 · Jul 7, 2021 · Jul 7, 2021
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -30,6 +30,7 @@
 - **Kraken**: fix `UnboundLocalError` ([#2230](https://github.com/ewels/MultiQC/pull/2230))
 - **kraken**: fixed column keys in genstats ([#2205](https://github.com/ewels/MultiQC/pull/2205))
 - **QualiMap**: BamQC: fix for global-only stats ([#2207](https://github.com/ewels/MultiQC/pull/2207))
+- **Salmon**: add `library_types`, `compatible_fragment_ratio`, `strand_mapping_bias` to the general stats table ([#1485](https://github.com/ewels/MultiQC/pull/1485))
 
 ## [MultiQC v1.18](https://github.com/ewels/MultiQC/releases/tag/v1.18) - 2023-11-17
 

diff --git a/multiqc/modules/salmon/salmon.py b/multiqc/modules/salmon/salmon.py
@@ -1,6 +1,5 @@
 """ MultiQC module to parse output from Salmon """
 
-
 import json
 import logging
 import os
@@ -27,10 +26,11 @@ def __init__(self):
         self.salmon_meta = dict()
         for f in self.find_log_files("salmon/meta"):
             # Get the s_name from the parent directory
-            s_name = os.path.basename(os.path.dirname(f["root"]))
-            s_name = self.clean_s_name(s_name, f)
-            self.salmon_meta[s_name] = json.loads(f["f"])
-            self.add_software_version(self.salmon_meta[s_name]["salmon_version"], s_name)
+            if os.path.basename(f["root"]) in ["aux_info", "aux"]:
+                s_name = os.path.basename(os.path.dirname(f["root"]))
+                s_name = self.clean_s_name(s_name, f)
+                self.salmon_meta[s_name] = json.loads(f["f"])
+                self.add_software_version(self.salmon_meta[s_name]["salmon_version"], s_name)
 
         # Parse Fragment Length Distribution logs
         self.salmon_fld = dict()
@@ -48,41 +48,84 @@ def __init__(self):
                     self.add_data_source(f, s_name)
                     self.salmon_fld[s_name] = parsed
 
+        # Parse Library Format Counts information. JSON file expected
+        self.salmon_lfc = dict()
+        for f in self.find_log_files("salmon/lfc"):
+            s_name = os.path.basename(f["root"])  # lfc file located at root folder
+            s_name = self.clean_s_name(s_name, f)
+            self.salmon_lfc[s_name] = json.loads(f["f"])
+
         # Filter to strip out ignored sample names
         self.salmon_meta = self.ignore_samples(self.salmon_meta)
         self.salmon_fld = self.ignore_samples(self.salmon_fld)
+        self.salmon_lfc = self.ignore_samples(self.salmon_lfc)
 
-        if len(self.salmon_meta) == 0 and len(self.salmon_fld) == 0:
+        if len(self.salmon_meta) == 0 and len(self.salmon_fld) == 0 and len(self.salmon_lfc) == 0:
             raise ModuleNoSamplesFound
 
         if len(self.salmon_meta) > 0:
             log.info(f"Found {len(self.salmon_meta)} meta reports")
             self.write_data_file(self.salmon_meta, "multiqc_salmon")
         if len(self.salmon_fld) > 0:
             log.info(f"Found {len(self.salmon_fld)} fragment length distributions")
+        if len(self.salmon_lfc) > 0:
+            log.info(f"Found {len(self.salmon_lfc)} library format counts reports")
 
-        # Add alignment rate to the general stats table
-        headers = {
-            "percent_mapped": {
-                "title": "% Aligned",
-                "description": "% Mapped reads",
-                "max": 100,
-                "min": 0,
-                "suffix": "%",
-                "scale": "YlGn",
-            },
-            "num_mapped": {
-                "title": "M Aligned",
-                "description": "Mapped reads (millions)",
-                "min": 0,
-                "scale": "PuRd",
-                "modify": lambda x: float(x) / 1000000,
-                "shared_key": "read_count",
-            },
-        }
-        self.general_stats_addcols(self.salmon_meta, headers)
+        if self.salmon_meta:
+            # Add alignment rate to the general stats table
+            # Convert library types to string:
+            for d in self.salmon_meta.values():
+                if "library_types" in d:
+                    d["library_types"] = ", ".join(d["library_types"])
 
-        if len(self.salmon_fld) > 0:
+            headers = {
+                "percent_mapped": {
+                    "title": "% Aligned",
+                    "description": "% Mapped reads",
+                    "max": 100,
+                    "min": 0,
+                    "suffix": "%",
+                    "scale": "YlGn",
+                },
+                "num_mapped": {
+                    "title": "M Aligned",
+                    "description": "Mapped reads (millions)",
+                    "min": 0,
+                    "scale": "PuRd",
+                    "modify": lambda x: float(x) / 1000000,
+                    "shared_key": "read_count",
+                },
+                "library_types": {
+                    "title": "Library types",
+                    "description": "Library types",
+                    "scale": False,
+                    # Hide if all samples have the same value
+                    "hidden": len(set(d.get("library_types") for d in self.salmon_meta.values())) == 1,
+                },
+            }
+            self.general_stats_addcols(self.salmon_meta, headers)
+
+        if self.salmon_lfc:
+            # Compatible fragments ratios data
+            lfc_headers = {
+                "compatible_fragment_ratio": {
+                    "title": "CFR",
+                    "description": "Compatible fragment ratio",
+                    "min": 0.0,
+                    "max": 1.0,
+                    "scale": "YlGn",
+                },
+                "strand_mapping_bias": {
+                    "title": "M Bias",
+                    "description": "Strand mapping bias",
+                    "scale": "BuGn",
+                    "max": 1.0,
+                },
+            }
+            # add strand mapping bias data
+            self.general_stats_addcols(self.salmon_lfc, lfc_headers)
+
+        if self.salmon_fld:
             # Fragment length distribution plot
             pconfig = {
                 "smooth_points": 500,

diff --git a/multiqc/utils/search_patterns.yaml b/multiqc/utils/search_patterns.yaml
@@ -733,6 +733,10 @@ rseqc/tin:
 salmon/meta:
   fn: "meta_info.json"
   contents: "salmon_version"
+  num_lines: 10
+  max_filesize: 50000
+salmon/lfc:
+  fn: "lib_format_counts.json"
 salmon/fld:
   fn: "flenDist.txt"
 sambamba/markdup: