In [24]:
import multiqc
from collections import defaultdict
from multiqc.plots import table

In [25]:
multiqc.reset()

In [26]:
multiqc.parse_logs(
    [
        "/Users/vlad/tmp/nextflow/assembly",
        "/Users/vlad/tmp/nextflow/fastp",
        "/Users/vlad/tmp/nextflow/variants",
    ],
    force=True
)
print(multiqc.list_modules())

/// https://multiqc.info 🔍 | v1.22.dev0 (ab20fed)
[34m|       init_config[0m | Using log filenames for sample names
[34m|       file_search[0m | Search path : /Users/vlad/tmp/nextflow/assembly
[34m|       file_search[0m | Search path : /Users/vlad/tmp/nextflow/fastp
[34m|       file_search[0m | Search path : /Users/vlad/tmp/nextflow/variants
[34m|         searching [0m| ████████████████████████████████████████ 100% 10/10
[34m|          cutadapt[0m | Found 1 reports
[34m|             fastp[0m | Found 3 reports
['Cutadapt', 'fastp']


In [27]:
multiqc.list_data_sources()

['/Users/vlad/tmp/nextflow/assembly/cutadapt/log/SAMPLE2_PE.primer_trim.cutadapt.log',
 '/Users/vlad/tmp/nextflow/fastp/SAMPLE3_SE.fastp.json',
 '/Users/vlad/tmp/nextflow/fastp/SAMPLE1_PE.fastp.json',
 '/Users/vlad/tmp/nextflow/fastp/SAMPLE2_PE.fastp.json']

In [28]:
multiqc.parse_logs(
    "/Users/vlad/tmp/nextflow/fastqc",
    module_order=[
        dict(
            fastqc=dict(
                name="FastQC (trimmed)",
                anchor="fastqc_trimmed",
                info="This section of the report shows FastQC results after adapter trimming.",
                target="",
                path_filters=["*/trim/*"],
            )
        ),
        dict(
            fastqc=dict(
                name="FastQC (raw)",
                anchor="fastqc_raw",
                path_filters=["*/raw/*"],
            )
        ),
    ]
)
print(multiqc.list_modules())

/// https://multiqc.info 🔍 | v1.22.dev0 (ab20fed)
[34m|       init_config[0m | Using log filenames for sample names
[34m|       file_search[0m | Search path : /Users/vlad/tmp/nextflow/fastqc
[34m|         searching [0m| ████████████████████████████████████████ 100% 16/16
[34m|            fastqc[0m | Found 3 reports
[34m|            fastqc[0m | Found 5 reports
['Cutadapt', 'fastp', 'FastQC (trimmed)', 'FastQC (raw)']


In [29]:
multiqc.list_data_sources()

['/Users/vlad/tmp/nextflow/assembly/cutadapt/log/SAMPLE2_PE.primer_trim.cutadapt.log',
 '/Users/vlad/tmp/nextflow/fastp/SAMPLE3_SE.fastp.json',
 '/Users/vlad/tmp/nextflow/fastp/SAMPLE1_PE.fastp.json',
 '/Users/vlad/tmp/nextflow/fastp/SAMPLE2_PE.fastp.json',
 '/Users/vlad/tmp/nextflow/fastqc/trim/SAMPLE3_SE_fastqc.zip',
 '/Users/vlad/tmp/nextflow/fastqc/trim/SAMPLE2_PE_2_fastqc.zip',
 '/Users/vlad/tmp/nextflow/fastqc/trim/SAMPLE2_PE_1_fastqc.zip',
 '/Users/vlad/tmp/nextflow/fastqc/raw/SAMPLE1_PE_1_fastqc.zip',
 '/Users/vlad/tmp/nextflow/fastqc/raw/SAMPLE3_SE_fastqc.zip',
 '/Users/vlad/tmp/nextflow/fastqc/raw/SAMPLE2_PE_2_fastqc.zip',
 '/Users/vlad/tmp/nextflow/fastqc/raw/SAMPLE2_PE_1_fastqc.zip',
 '/Users/vlad/tmp/nextflow/fastqc/raw/SAMPLE1_PE_2_fastqc.zip']

In [30]:
multiqc.parse_logs(
    "/Users/vlad/tmp/nextflow/kraken2",
    verbose=True
)
print(multiqc.list_modules())

/// https://multiqc.info 🔍 | v1.22.dev0 (ab20fed)
[32m[2024-04-24 21:12:44][0m [34mmultiqc.core.init_config                          [0m [1;30m[DEBUG  ][0m  [2mThis is MultiQC v1.22.dev0 (ab20fed)[0m
[32m[2024-04-24 21:12:44][0m [34mmultiqc.core.init_config                          [0m [1;30m[DEBUG  ][0m  [2mUsing temporary directory: /var/folders/zn/9rrn2wv97lncdpqrp79p5rpm0000gn/T/tmppp5q1xx7[0m
[32m[2024-04-24 21:12:44][0m [34mmultiqc.core.init_config                          [0m [1;30m[DEBUG  ][0m  [2mCommand used: /Users/vlad/git/MultiQC/venv/lib/python3.12/site-packages/ipykernel_launcher.py -f /Users/vlad/Library/Jupyter/runtime/kernel-40eae667-fed6-4535-b6da-c1458161cf5a.json[0m
[32m[2024-04-24 21:12:45][0m [34mmultiqc.core.init_config                          [0m [1;30m[DEBUG  ][0m  [2mLatest MultiQC version is v1.21, released 2024-02-28[0m
[32m[2024-04-24 21:12:45][0m [34mmultiqc.core.init_config                          [0m [1;30m[INFO   

Now add additional table to replace the general stats table

In [31]:
summarized_data = defaultdict(dict)
for s in multiqc.list_samples():
    if data := multiqc.get_module_data(sample=s, module="fastp"):
        summarized_data[s]["input_reads"] = data["summary"]["before_filtering"]["total_reads"]
    if (data := multiqc.get_module_data(sample=s, module="Cutadapt")) and "r_written" in data:
        summarized_data[s]["trimmed_reads"] = data["r_written"]
    if (data := multiqc.get_general_stats_data(sample=s)) and "pct_unclassified" in data:
        summarized_data[s]["non_host_reads"] = data["pct_unclassified"]
print(dict(summarized_data))

{'SAMPLE1_PE': {'input_reads': 55442}, 'SAMPLE2_PE': {'input_reads': 42962}, 'SAMPLE3_SE': {'input_reads': 49202}}


In [32]:
multiqc.add_custom_content_section(
    plot=table.plot(
        data=summarized_data,
        headers={
            "input_reads": {
                "title": "# Input reads",
                "description": "Number of reads before filtering",
                "scale": False,
                "format": "{:,.0f}",
            },
            "trimmed_reads": {
                "title": "# Trimmed reads (Cutadapt)",
                "description": "Number of reads after adapter trimming",
                "scale": False,
                "format": "{:,.0f}",
            },
            "non_host_reads": {
                "title": "% Non-host reads (Kraken 2)",
                "description": "Percentage of reads classified as non-host",
                "scale": False,
                "format": "{:.2f}",
            },
        },
        pconfig={
            "id": "summary_assembly_metrics",
            "title": "Summary Assembly Metrics",
            "description": "Summary of input reads, trimmed reads, and non-host reads.",
        },
    ),
    name="Summary Assembly Metrics",
    anchor="summary_assembly",
    description="Summary of input reads, trimmed reads, and non-host reads.",
)

[32m[2024-04-24 21:12:46][0m [34mmultiqc.utils.util_functions                      [0m [1;30m[DEBUG  ][0m  [2mWrote data file summary_assembly_metrics.txt[0m


In [34]:
# Now generate report from the loaded data + summarized_data
multiqc.write_report(
    excluded_modules=["general_stats"],
    force=True,
    verbose=True
)

/// https://multiqc.info 🔍 | v1.22.dev0 (ab20fed)
[32m[2024-04-24 21:12:53][0m [34mmultiqc.core.init_config                          [0m [1;30m[DEBUG  ][0m  [2mThis is MultiQC v1.22.dev0 (ab20fed)[0m
[32m[2024-04-24 21:12:53][0m [34mmultiqc.core.init_config                          [0m [1;30m[DEBUG  ][0m  [2mUsing temporary directory: /var/folders/zn/9rrn2wv97lncdpqrp79p5rpm0000gn/T/tmppp5q1xx7[0m
[32m[2024-04-24 21:12:53][0m [34mmultiqc.core.init_config                          [0m [1;30m[DEBUG  ][0m  [2mCommand used: /Users/vlad/git/MultiQC/venv/lib/python3.12/site-packages/ipykernel_launcher.py -f /Users/vlad/Library/Jupyter/runtime/kernel-40eae667-fed6-4535-b6da-c1458161cf5a.json[0m
[32m[2024-04-24 21:12:53][0m [34mmultiqc.core.init_config                          [0m [1;30m[DEBUG  ][0m  [2mLatest MultiQC version is v1.21, released 2024-02-28[0m
[32m[2024-04-24 21:12:53][0m [34mmultiqc.core.init_config                          [0m [1;30m[INFO   

In [35]:
multiqc.list_plots()

['cutadapt_filtered_reads_plot',
 'cutadapt_trimmed_sequences_plot_default (2 datasets)',
 'fastp_filtered_reads_plot',
 'fastp-insert-size-plot',
 'fastp-seq-quality-plot (4 datasets)',
 'fastp-seq-content-gc-plot (4 datasets)',
 'fastp-seq-content-n-plot (4 datasets)',
 'fastqc_sequence_counts_plot',
 'fastqc_per_base_sequence_quality_plot',
 'fastqc_per_sequence_quality_scores_plot',
 'fastqc_per_sequence_gc_content_plot (2 datasets)',
 'fastqc_per_base_n_content_plot',
 'fastqc_sequence_length_distribution_plot',
 'fastqc_sequence_duplication_levels_plot',
 'fastqc_overrepresented_sequences_plot',
 'fastqc_top_overrepresented_sequences_table',
 'fastqc-status-check-heatmap',
 'fastqc_sequence_counts_plot-1',
 'fastqc_per_base_sequence_quality_plot-1',
 'fastqc_per_sequence_quality_scores_plot-1',
 'fastqc_per_sequence_gc_content_plot-1 (2 datasets)',
 'fastqc_per_base_n_content_plot-1',
 'fastqc_sequence_length_distribution_plot-1',
 'fastqc_sequence_duplication_levels_plot-1',
 'f

In [36]:
multiqc.show_plot("summary_assembly_metrics")

Sample Name,# Input reads
SAMPLE1_PE,55442
SAMPLE2_PE,42962
SAMPLE3_SE,49202
