Skip to content
This repository has been archived by the owner on Aug 22, 2023. It is now read-only.

Commit

Permalink
Merge 0e6be84 into 7094e9e
Browse files Browse the repository at this point in the history
  • Loading branch information
karlnyr committed Feb 15, 2021
2 parents 7094e9e + 0e6be84 commit 424e07b
Show file tree
Hide file tree
Showing 23 changed files with 48,408 additions and 24 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ dist/
demux.egg-info/

DEVELOP/
.idea
.idea/
# Distribution / packaging
*.egg-info/
*.egg
Expand Down
2 changes: 1 addition & 1 deletion .idea/demultiplexing.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions demux/cli/base.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
"""Demultiplexing base demux command"""

import logging

import click
import coloredlogs
import logging
import yaml

from .basemask import basemask
from demux import __version__
from .indexreport import indexreport
from .samplesheet import sheet

from .basemask import basemask

LOG = logging.getLogger(__name__)
LEVELS = ["DEBUG", "INFO", "WARNING", "ERROR"]

Expand All @@ -36,3 +35,4 @@ def demux(context, log_level, config):

demux.add_command(sheet)
demux.add_command(basemask)
demux.add_command(indexreport)
70 changes: 70 additions & 0 deletions demux/cli/indexreport.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import click
import logging

from pathlib import Path

from demux.constants import REFERENCE_REPORT_HEADER, REPORT_TABLES_INDEX
from demux.utils.indexreport import IndexReport


LOG = logging.getLogger(__name__)


@click.group()
def indexreport():
"""Index report commands"""


@indexreport.command()
@click.option(
"--cluster-counts",
type=int,
default=1000000,
help=(
f"Cluster count cut-off, any samples (besides indexcheck ones) with lower cluster counts are"
f" included in the summary"
),
)
@click.option(
"--dry-run", help="Dry of the function, will not write any report", is_flag=True
)
@click.option(
"--index-report-path",
type=str,
required=True,
help="Path to bcl2fastq indexcheck report (laneBarcode.html)",
)
@click.option(
"--out-dir",
type=str,
required=True,
help="Path of outdirectory for summary report",
)
@click.option(
"--run-parameters-path",
type=str,
required=True,
help="Path to RunParameters.xml file for the flowcell",
)
def summary(
cluster_counts: int,
dry_run: bool,
index_report_path: str,
out_dir: str,
run_parameters_path: str,
):
"""Create a summary of the indexcheck report, extracting information on samples with low number of clusters
and the topmost common unknown indexes"""
index_report = IndexReport(
cluster_counts=cluster_counts,
index_report_path=Path(index_report_path),
out_dir=Path(out_dir),
report_tables_index=REPORT_TABLES_INDEX,
run_parameters_path=Path(run_parameters_path),
)
LOG.info(f"Creating summary of laneBarcode.html for FC: {index_report.flowcell_id}")
index_report.validate(reference_report_header=REFERENCE_REPORT_HEADER)
if not dry_run:
index_report.write_summary(report_tables_index=REPORT_TABLES_INDEX)
else:
LOG.info("This is a dry-run, will not write a summary report")
5 changes: 5 additions & 0 deletions demux/constants/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from .indexreport import (
REFERENCE_REPORT_HEADER,
REPORT_TABLES_INDEX,
FLOWCELL_VERSION_LANE_COUNT,
)
23 changes: 23 additions & 0 deletions demux/constants/indexreport.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""
Constants for indexcheck report
"""


REFERENCE_REPORT_HEADER = [
"Lane",
"Project",
"Sample",
"Barcode sequence",
"PF Clusters",
"% of the lane",
"% Perfect barcode",
"% One mismatch barcode",
"Yield (Mbases)",
"% PF Clusters",
"% >= Q30 bases",
"Mean Quality Score",
]

REPORT_TABLES_INDEX = {"cluster_count_table": 1, "top_unknown_barcode_table": 2}

FLOWCELL_VERSION_LANE_COUNT = {"S1": 2, "S4": 4}
18 changes: 18 additions & 0 deletions demux/exc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""
Demultiplexing exceptions
"""


class DemuxError(Exception):
"""
Base exception for the package
"""

def __init__(self):
super(DemuxError, self).__init__()


class IndexReportError(DemuxError):
"""
Exception for errors in index report module
"""
39 changes: 39 additions & 0 deletions demux/utils/html.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import bs4
import re


from pathlib import Path
from typing import Tuple


def parse_html_header(html_column_header: bs4.element.Tag) -> str:
"""Purify html header into a string without html syntax"""

column_header = re.sub("<br/>", " ", str(html_column_header))
header = re.sub("<.*?>", "", column_header)

return header


def parse_html_project_cluster_counts(
header_index: dict, project_row: bs4.element.Tag
) -> Tuple[str, int]:
"""Purify a html project cluster count row from html syntax"""

project = re.sub(
"<.*?>", "", str(project_row.find_all("td")[header_index["Project"]])
)
cluster_count = re.sub(
"<.*?>", "", str(project_row.find_all("td")[header_index["PF Clusters"]])
)
cluster_count = int(cluster_count.replace(",", ""))

return project, cluster_count


def get_html_content(index_report_path: Path) -> bs4.BeautifulSoup:
"""Get the content of the report"""

with index_report_path.open() as f:
html_content = bs4.BeautifulSoup(f, "html.parser")
return html_content

0 comments on commit 424e07b

Please sign in to comment.