-
Notifications
You must be signed in to change notification settings - Fork 583
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Qualimap: address NBSP as thousand separators (#2282)
* Qualimap: address NBSP as thousand separators in rnaseqqc * [automated] Update CHANGELOG.md * Qualimap: address NBSP in BamQC as well * Fix changelog * Fix changelog * Determine decimal format once per file. Reuse function between BamQC and rnaseqqc submodules. More efficient parsing, only call re once per line * Handle error of mixed formats * Update multiqc/modules/qualimap/__init__.py Co-authored-by: Phil Ewels <phil.ewels@seqera.io> --------- Co-authored-by: MultiQC Bot <multiqc-bot@seqera.io> Co-authored-by: Phil Ewels <phil.ewels@seqera.io>
- Loading branch information
1 parent
a33c6eb
commit 41c0228
Showing
4 changed files
with
190 additions
and
86 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,94 @@ | ||
import logging | ||
from typing import Dict, Union | ||
|
||
from .qualimap import MultiqcModule | ||
|
||
__all__ = ["MultiqcModule"] | ||
|
||
log = logging.getLogger(__name__) | ||
|
||
|
||
def parse_numerals( | ||
preparsed_d: Dict[str, str], | ||
float_metrics: Dict[str, str], | ||
int_metrics: Dict[str, str], | ||
rate_metrics: Dict[str, str], | ||
fpath: str, | ||
) -> Dict[str, Union[int, float, str]]: | ||
""" | ||
Take pre-parsed Qualimap report (keys to string values), and properly parse | ||
numeral values, taking regional formats into account. | ||
""" | ||
# Determine if decimal separator is dot or comma | ||
decimalcomma = None | ||
for k in rate_metrics: | ||
if k in preparsed_d: | ||
val = preparsed_d[k] | ||
if "," in val and "." in val: | ||
log.error( | ||
f"Couldn't determine decimal separator for file {fpath}, as both . and , are " | ||
f"found in a rational value: {val}" | ||
) | ||
return {} | ||
if "," in val: | ||
if decimalcomma is False: | ||
log.error( | ||
f"Couldn't determine decimal separator for file {fpath}, as differently formatted" | ||
f"rational values are found" | ||
) | ||
return {} | ||
decimalcomma = True | ||
if "." in val: | ||
if decimalcomma is True: | ||
log.error( | ||
f"Couldn't determine decimal separator for file {fpath}, as differently formatted" | ||
f"rational values are found" | ||
) | ||
return {} | ||
decimalcomma = False | ||
if decimalcomma is None: | ||
# All expected float numbers are integer, so attempt to instead determine the | ||
# thousands separator from large int values. | ||
for k in int_metrics: | ||
if k in preparsed_d: | ||
val = preparsed_d[k] | ||
if "," in val and "." in val: | ||
log.error( | ||
f"Couldn't determine decimal separator for file {fpath}, as both . and , are " | ||
f"found in a rational value: {val}" | ||
) | ||
return {} | ||
if "," in val: | ||
if decimalcomma is True: | ||
log.error( | ||
f"Couldn't determine decimal separator for file {fpath}, as differently formatted" | ||
f"rational values are found" | ||
) | ||
return {} | ||
decimalcomma = False | ||
if "." in val: | ||
if decimalcomma is False: | ||
log.error( | ||
f"Couldn't determine decimal separator for file {fpath}, as differently formatted" | ||
f"rational values are found" | ||
) | ||
return {} | ||
decimalcomma = True | ||
if decimalcomma is None: | ||
log.debug(f"Couldn't determine decimal separator for file {fpath}") | ||
|
||
d = {} | ||
for k, v in preparsed_d.items(): | ||
v = v.strip("X").strip("%") | ||
if k in float_metrics or k in rate_metrics or k in int_metrics: | ||
if decimalcomma is True: | ||
v = v.replace(".", "").replace(",", ".") | ||
v = v.replace(",", "") | ||
if k in int_metrics: | ||
d[int_metrics[k]] = int(v) | ||
elif k in float_metrics: | ||
d[float_metrics[k]] = float(v) | ||
elif k in rate_metrics: | ||
d[rate_metrics[k]] = float(v) | ||
|
||
return d |