<a href="https://colab.research.google.com/github/HV28/binfotron/blob/master/fastqc_practicalCourse.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
################################# fastqc ###############################
# Install required tools
!apt-get update -qq
!apt-get install -y sra-toolkit fastqc unzip

# Install SRA toolkit
!wget --output-document sratoolkit.tar.gz https://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/current/sratoolkit.current-ubuntu64.tar.gz
!tar -xzf sratoolkit.tar.gz
!rm sratoolkit.tar.gz
!mv sratoolkit.* sratoolkit

# Add SRA toolkit to PATH
import os
os.environ['PATH'] = os.environ['PATH'] + ':' + os.getcwd() + '/sratoolkit/bin'
!echo $PATH

# Create a folder to store the FASTQ files
!mkdir -p mhanna/
!mkdir -p mhanna/data

# Dictionary mapping accessions to meaningful names
samples = {
    "SRR28356705": "tripod-94-2882",
    "SRR28356707": "tripod-94-2880",
    "SRR28356706": "tripod-94-2881",
    "SRR28356708": "tripod-94-2879"
}

# Download and convert directly to uncompressed FASTQ with meaningful names
for accession, sample_name in samples.items():
    print(f"\n=== Processing {accession} -> {sample_name} ===")

    # Download SRA file
    print(f"Downloading {accession}...")
    !prefetch {accession}

    # Convert directly to uncompressed FASTQ with custom name
    print(f"Converting to FASTQ as {sample_name}.fastq...")
    !fastq-dump --outdir final_fastq --stdout {accession} > mhanna/data/{sample_name}.fastq

    # Clean up SRA file
    !rm -rf ~/ncbi/public/sra/{accession}.sra

    print(f"✓ Created mhanna/data/{sample_name}.fastq")

# Verify results
print("\n=== Final Results ===")
!ls -lh mhanna/data/


W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
unzip is already the newest version (6.0-26ubuntu3.2).
The following additional packages will be installed:
  blends-common default-jre default-jre-headless fonts-dejavu-core
  fonts-dejavu-extra libapache-pom-java libargs4j-java libatk-wrapper-java
  libatk-wrapper-java-jni libcommons-compress-java libcommons-io-java
  libcommons-jexl2-java libcommons-lang3-java libcommons-logging-java
  libcommons-math3-java libcommons-parent-java libfindbin-libs-perl
  libhtsjdk-java libjbzip2-java libjson-simple-java libkdf5-2 libncbi-vdb2
  libncbi-wvdb2 libngs-java libngs-sdk-dev libngs-sdk2 libsis-base-java
  libsis-base-jni libsis-jhdf5-java libsis-jhdf5-jni libsnappy-java
  libsnappy-jni libxtst6 libxxf86dga1 med

In [None]:
# Run FastQC directly on the uncompressed files
print("\n=== Running FastQC ===")
!mkdir -p mhanna/fastqc_results
!fastqc mhanna/data/*.fastq -o mhanna/fastqc_results/

print("\n=== FastQC Complete ===")
!ls -lh mhanna/fastqc_results/


=== Running FastQC ===
Started analysis of tripod-94-2879.fastq
Approx 5% complete for tripod-94-2879.fastq
Approx 10% complete for tripod-94-2879.fastq
Approx 15% complete for tripod-94-2879.fastq
Approx 20% complete for tripod-94-2879.fastq
Approx 25% complete for tripod-94-2879.fastq
Approx 30% complete for tripod-94-2879.fastq
Approx 35% complete for tripod-94-2879.fastq
Approx 40% complete for tripod-94-2879.fastq
Approx 45% complete for tripod-94-2879.fastq
Approx 50% complete for tripod-94-2879.fastq
Approx 55% complete for tripod-94-2879.fastq
Approx 60% complete for tripod-94-2879.fastq
Approx 65% complete for tripod-94-2879.fastq
Approx 70% complete for tripod-94-2879.fastq
Approx 75% complete for tripod-94-2879.fastq
Approx 80% complete for tripod-94-2879.fastq
Approx 85% complete for tripod-94-2879.fastq
Approx 90% complete for tripod-94-2879.fastq
Approx 95% complete for tripod-94-2879.fastq
Analysis complete for tripod-94-2879.fastq
Started analysis of tripod-94-2880.fas

In [None]:
import os
import glob
import zipfile
from google.colab import files

# Download all HTML files as a zip
def download_all_reports():
    """Create and download a zip file with all HTML reports"""
    html_files = glob.glob("mhanna/fastqc_results/*.html")

    if not html_files:
        print("No HTML files found!")
        return

    # Create zip file
    zip_filename = "fastqc_reports.zip"
    with zipfile.ZipFile(zip_filename, 'w') as zipf:
        for html_file in html_files:
            zipf.write(html_file, os.path.basename(html_file))

    print(f"Created {zip_filename} with {len(html_files)} HTML reports")
    files.download(zip_filename)

download_all_reports()