In [None]:
# Ch05-1-qc-data

In [None]:
# Get data
! wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR390/SRR390728/SRR390728_1.fastq.gz 
! wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR390/SRR390728/SRR390728_2.fastq.gz 
! mv SRR390728_1.fastq.gz data/
! mv SRR390728_2.fastq.gz data/

In [None]:
# If you want to install FastQC from within your notebook use this
! yes | conda install -c bioconda fastqc

In [None]:
# Install MultiQC if desired
! yes | conda install -c bioconda multiqc

In [None]:
# Import Libaries
import os
import subprocess

In [None]:
def run_fastqc(input_dir, output_dir):
    """
    Function to Run FastQC on all FASTQ files in the input directory.
    """
    # Create an output directory for the FastQC reports
    os.makedirs(output_dir, exist_ok=True)
    # List all of the FASTQ files in the input directory
    fastq_files = [f for f in os.listdir(input_dir) if f.endswith((".fastq", ".fastq.gz"))]
    if not fastq_files:
        print("Could not find any FASTQ files in the input directory.")
        return
    print("Running FastQC...")
    # Build the FastQC command
    fastqc_command = ["fastqc", "-o", output_dir] + [os.path.join(input_dir, f) for f in fastq_files]
    # Execute FastQC
    subprocess.run(fastqc_command)
    print("FastQC analysis Completed.")

In [None]:
def run_multiqc(input_dir, output_dir):
    """
    Run MultiQC for aggregation of FastQC results.
    """
    # Create output directory for the reports
    os.makedirs(output_dir, exist_ok=True)
    print("Running MultiQC...")
    # Build the MultiQC command
    multiqc_command = ["multiqc", input_dir, "-o", output_dir]
    # Execute the MultiQC commands
    subprocess.run(multiqc_command)
    print("Finished...MultiQC report(s) generated.")

In [None]:
def main():
    """
    Main function to perform quality control using FastQC and MultiQC.
    """
    # Define the input and output directories
    input_dir = "./data"  # We placed our fastq files in the data subdirectory
    fastqc_output_dir = "fastqc_output"
    multiqc_output_dir = "multiqc_output"
    # Run FastQC
    run_fastqc(input_dir, fastqc_output_dir)
    # Run MultiQC
    run_multiqc(fastqc_output_dir, multiqc_output_dir)
    print(f"MultiQC report saved in: {os.path.abspath(multiqc_output_dir)}")
if __name__ == "__main__":
    main()

In [16]:
# Review the report
! open multiqc_output/multiqc_report.html 

In [None]:
# End of Notebook #