#### Title: Differential Gene Expression using RNA-Seq (Workflow)
* Created by: Abhishek Shrestha
* Created on: 09.01.2024

In [3]:
# Import necessary libraries
from Bio import SeqIO
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import os
import subprocess

#### Step 0: Initial setup of folders and subfolders

In [13]:
import os
import subprocess

# Define root and output folders
root_folder = "RNA_seq_workflow"
fastq_dir = os.path.join(root_folder, "input")
output_folder = os.path.join(root_folder, "output")
fastqc_dir = os.path.join(output_folder, "1_initial_qc")
trimmed_dir = os.path.join(output_folder, "2_trimmed_output")
aligned_sequences_dir = os.path.join(output_folder, "3_aligned_sequences")
aligned_bam_dir = os.path.join(aligned_sequences_dir, "aligned_bam")
aligned_logs_dir = os.path.join(aligned_sequences_dir, "aligned_logs")
sorted_bam_dir = os.path.join(aligned_sequences_dir, "sorted_bam")
sorted_bam_logs = os.path.join(aligned_sequences_dir, "sorted_logs")
final_counts_dir = os.path.join(output_folder, "4_final_counts")
multiQC_dir = os.path.join(output_folder, "5_multiQC")
hisat2_index_dir = os.path.join(root_folder, "hisat2_index")
genome_dir = os.path.join(root_folder, "genome")
annotation_dir = os.path.join(root_folder, "annotation")
adapter_file = os.path.join(root_folder, "adapters", "TruSeq3-PE.fa")

# Ensure directories exist
os.makedirs(fastq_dir, exist_ok=True)
os.makedirs(output_folder, exist_ok=True)
os.makedirs(fastqc_dir, exist_ok=True)
os.makedirs(trimmed_dir, exist_ok=True)
os.makedirs(aligned_sequences_dir, exist_ok=True)
os.makedirs(aligned_bam_dir, exist_ok= True)
os.makedirs(aligned_logs_dir, exist_ok= True)
os.makedirs(sorted_bam_dir, exist_ok= True)
os.makedirs(sorted_bam_logs, exist_ok= True)
os.makedirs(final_counts_dir, exist_ok=True)
os.makedirs(multiQC_dir, exist_ok=True)
os.makedirs(hisat2_index_dir, exist_ok=True)
os.makedirs(genome_dir, exist_ok=True)
os.makedirs(annotation_dir, exist_ok=True)
os.makedirs(adapter_file, exist_ok=True)


#### Step 1: Download required files

##### Step 1.1: Download genome and annotation file

In [3]:
import os
import subprocess

# Define the output directories
root_folder = "RNA_seq_workflow"
genome_dir = os.path.join(root_folder, "genome")
annotation_dir = os.path.join(root_folder, "annotation")
os.makedirs(genome_dir, exist_ok=True)
os.makedirs(annotation_dir, exist_ok=True)

# URLs for the files
urls = {
    "genome": "https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-60/fasta/hordeum_vulgare/dna/Hordeum_vulgare.MorexV3_pseudomolecules_assembly.dna.toplevel.fa.gz",
    "annotation": "https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-60/gff3/hordeum_vulgare/Hordeum_vulgare.MorexV3_pseudomolecules_assembly.60.chr.gff3.gz",
}

# File paths
output_files = {
    "genome": os.path.join(genome_dir, "Hordeum_vulgare.MorexV3_pseudomolecules_assembly.dna.toplevel.fa.gz"),
    "annotation": os.path.join(annotation_dir, "Hordeum_vulgare.MorexV3_pseudomolecules_assembly.60.chr.gff3.gz"),
}

# Function to download and decompress files using wget and gunzip
def download_and_decompress_wget(url, output_path):
    # Download file using wget
    try:
        print(f"Downloading {output_path}...")
        subprocess.run(["wget", "-q", "-O", output_path, url], check=True)
        print(f"Downloaded: {output_path}")
    except subprocess.CalledProcessError as e:
        print(f"Failed to download {output_path}. Error: {e}")
        return

    # Decompress file using gunzip
    try:
        print(f"Decompressing {output_path}...")
        subprocess.run(["gunzip", "-f", output_path], check=True)
        print(f"Decompressed to: {output_path[:-3]}")
    except subprocess.CalledProcessError as e:
        print(f"Failed to decompress {output_path}. Error: {e}")

# Download and decompress files
for file_type, url in urls.items():
    download_and_decompress_wget(url, output_files[file_type])

print("All files downloaded and decompressed successfully!")

Downloading RNA_seq_workflow/genome/Hordeum_vulgare.MorexV3_pseudomolecules_assembly.dna.toplevel.fa.gz...
Downloaded: RNA_seq_workflow/genome/Hordeum_vulgare.MorexV3_pseudomolecules_assembly.dna.toplevel.fa.gz
Decompressing RNA_seq_workflow/genome/Hordeum_vulgare.MorexV3_pseudomolecules_assembly.dna.toplevel.fa.gz...
Decompressed to: RNA_seq_workflow/genome/Hordeum_vulgare.MorexV3_pseudomolecules_assembly.dna.toplevel.fa
Downloading RNA_seq_workflow/annotation/Hordeum_vulgare.MorexV3_pseudomolecules_assembly.60.chr.gff3.gz...
Downloaded: RNA_seq_workflow/annotation/Hordeum_vulgare.MorexV3_pseudomolecules_assembly.60.chr.gff3.gz
Decompressing RNA_seq_workflow/annotation/Hordeum_vulgare.MorexV3_pseudomolecules_assembly.60.chr.gff3.gz...
Decompressed to: RNA_seq_workflow/annotation/Hordeum_vulgare.MorexV3_pseudomolecules_assembly.60.chr.gff3
All files downloaded and decompressed successfully!


##### Step 1.2: Download fastq files of control and pathogen treated barley plants

In [7]:
# Step 1.2 Download fastq files 
import os
import subprocess

# Define output directory for FASTQ files
root_folder = "RNA_seq_workflow"
fastq_dir = os.path.join(root_folder, "input")
os.makedirs(fastq_dir, exist_ok=True)  # Ensure the output directory exists

# Download FASTQ files using SRA Toolkit
def download_fastq(srr_id, fastq_dir):
    print(f"Downloading FASTQ files for {srr_id}...")
    # Use prefetch to download the SRA file
    subprocess.run(["prefetch", srr_id], check=True)
    
    # Use fasterq-dump to extract FASTQ files to the specified directory
    subprocess.run([
        "fasterq-dump", "--split-files", "--outdir", fastq_dir, srr_id
    ], check=True)
    
    # Compress the resulting FASTQ files
    for fq_file in os.listdir(fastq_dir):
        if fq_file.startswith(srr_id) and fq_file.endswith(".fastq"):
            fq_path = os.path.join(fastq_dir, fq_file)
            subprocess.run(["gzip", fq_path], check=True)
            print(f"Compressed: {fq_path}.gz")
    print(f"Downloaded and saved FASTQ files for {srr_id} to {fastq_dir}")

# List of SRR IDs
srr_ids = ["SRR8437484","SRR8437485", "SRR8437482", "SRR8437483", "SRR8437480", "SRR8437481","SRR8437478", "SRR8437479"]

# Download each SRR ID
for srr_id in srr_ids:
    download_fastq(srr_id, fastq_dir)

print(f"All FASTQ files are saved in: {fastq_dir}")

Downloading FASTQ files for SRR8437478...
2025-01-10T18:51:32 prefetch.3.1.1: 1) Resolving 'SRR8437478'...
2025-01-10T18:51:33 prefetch.3.1.1: Current preference is set to retrieve SRA Normalized Format files with full base quality scores
2025-01-10T18:51:33 prefetch.3.1.1: 1) 'SRR8437478' is found locally 
2025-01-10T18:51:33 prefetch.3.1.1: 'SRR8437478' has 0 unresolved dependencies


spots read      : 23,939,223
reads read      : 47,878,446
reads written   : 47,878,446


Compressed: RNA_seq_workflow/input/SRR8437478_1.fastq.gz
Compressed: RNA_seq_workflow/input/SRR8437478_2.fastq.gz
Downloaded and saved FASTQ files for SRR8437478 to RNA_seq_workflow/input
Downloading FASTQ files for SRR8437479...
2025-01-10T19:14:12 prefetch.3.1.1: 1) Resolving 'SRR8437479'...
2025-01-10T19:14:13 prefetch.3.1.1: Current preference is set to retrieve SRA Normalized Format files with full base quality scores
2025-01-10T19:14:14 prefetch.3.1.1: 1) Downloading 'SRR8437479'...
2025-01-10T19:14:14 prefetch.3.1.1:  SRA Normalized Format file is being retrieved
2025-01-10T19:14:14 prefetch.3.1.1:  Downloading via HTTPS...
2025-01-10T19:18:07 prefetch.3.1.1:  HTTPS download succeed
2025-01-10T19:18:22 prefetch.3.1.1:  'SRR8437479' is valid: 3700880202 bytes were streamed from 3700873763
2025-01-10T19:18:22 prefetch.3.1.1: 1) 'SRR8437479' was downloaded successfully
2025-01-10T19:18:23 prefetch.3.1.1: 'SRR8437479' has 0 unresolved dependencies


spots read      : 26,772,466
reads read      : 53,544,932
reads written   : 53,544,932


Compressed: RNA_seq_workflow/input/SRR8437479_2.fastq.gz
Compressed: RNA_seq_workflow/input/SRR8437479_1.fastq.gz
Downloaded and saved FASTQ files for SRR8437479 to RNA_seq_workflow/input
All FASTQ files are saved in: RNA_seq_workflow/input


##### Step 2: Perform quality control using fastQC and trim reads using trimmomatic

In [17]:
import os
import subprocess

def run_fastqc(fastq_file, output_dir):
    """Run quality control using FastQC."""
    print(f"Running FastQC on {fastq_file}...")
    subprocess.run(["fastqc", fastq_file, "--outdir", output_dir], check=True)
    print(f"FastQC complete for {fastq_file}. Reports saved in {output_dir}")

def trim_reads(fastq1, fastq2, output_fastq1, output_fastq2, adapter_file):
    """Trim low-quality reads and adapters using Trimmomatic."""
    if not os.path.exists(adapter_file):
        raise FileNotFoundError(f"Adapter file not found: {adapter_file}")
    print(f"Trimming reads: {fastq1} and {fastq2} using adapters from {adapter_file}")
    subprocess.run([
        "trimmomatic", "PE", "-threads", "8",
        fastq1, fastq2,  # Input files
        output_fastq1, "unpaired_1.fastq.gz",  # Paired and unpaired outputs
        output_fastq2, "unpaired_2.fastq.gz",
        f"ILLUMINACLIP:{adapter_file}:2:30:10", "LEADING:3", "TRAILING:3", "SLIDINGWINDOW:4:20", "MINLEN:50"
    ], check=True)
    print(f"Trimming complete. Trimmed files: {output_fastq1}, {output_fastq2}")

# Set up directories
output_folder = "RNA_seq_workflow/output"
fastqc_dir = os.path.join(output_folder, "1_initial_qc")
os.makedirs(fastqc_dir, exist_ok=True)

trimmed_dir = os.path.join(output_folder,"2_trimmed_output")
os.makedirs(trimmed_dir, exist_ok=True)

# Define adapter file path
adapter_file = "RNA_seq_workflow/adapters/TruSeq3-PE.fa"

# Define input FASTQ files for each sample
samples = {
    "SRR8437484": ["RNA_seq_workflow/input/SRR8437484_1.fastq.gz", "RNA_seq_workflow/input/SRR8437484_2.fastq.gz"], # control 1
    "SRR8437485": ["RNA_seq_workflow/input/SRR8437485_1.fastq.gz", "RNA_seq_workflow/input/SRR8437485_2.fastq.gz"], # control 2
    "SRR8437482": ["RNA_seq_workflow/input/SRR8437482_1.fastq.gz", "RNA_seq_workflow/input/SRR8437482_2.fastq.gz"], # control 3
    "SRR8437483": ["RNA_seq_workflow/input/SRR8437483_1.fastq.gz", "RNA_seq_workflow/input/SRR8437483_2.fastq.gz"], # control 4
    "SRR8437480": ["RNA_seq_workflow/input/SRR8437480_1.fastq.gz", "RNA_seq_workflow/input/SRR8437480_2.fastq.gz"], # Pst 1
    "SRR8437481": ["RNA_seq_workflow/input/SRR8437481_1.fastq.gz", "RNA_seq_workflow/input/SRR8437481_2.fastq.gz"], # Pst 2
    "SRR8437478": ["RNA_seq_workflow/input/SRR8437478_1.fastq.gz", "RNA_seq_workflow/input/SRR8437478_2.fastq.gz"], # Pst 3
    "SRR8437479": ["RNA_seq_workflow/input/SRR8437479_1.fastq.gz", "RNA_seq_workflow/input/SRR8437479_2.fastq.gz"], # Pst 4
}

# Perform QC and trimming for each sample
for sample, files in samples.items():
    fastq1, fastq2 = files

    # Run QC on raw reads
    run_fastqc(fastq1, fastqc_dir)
    run_fastqc(fastq2, fastqc_dir)

    # Prepare paths for trimmed outputs
    output_fastq1 = os.path.join(trimmed_dir, f"{sample}_1.trimmed.fastq.gz")
    output_fastq2 = os.path.join(trimmed_dir, f"{sample}_2.trimmed.fastq.gz")

    # Trim reads
    trim_reads(fastq1, fastq2, output_fastq1, output_fastq2, adapter_file)

    # Run QC on trimmed reads
    run_fastqc(output_fastq1, fastqc_dir)
    run_fastqc(output_fastq2, fastqc_dir)

print("Quality control and trimming complete for all samples.")

Running FastQC on RNA_seq_workflow/input/SRR8437484_1.fastq.gz...
application/gzip


Started analysis of SRR8437484_1.fastq.gz
Approx 5% complete for SRR8437484_1.fastq.gz
Approx 10% complete for SRR8437484_1.fastq.gz
Approx 15% complete for SRR8437484_1.fastq.gz
Approx 20% complete for SRR8437484_1.fastq.gz
Approx 25% complete for SRR8437484_1.fastq.gz
Approx 30% complete for SRR8437484_1.fastq.gz
Approx 35% complete for SRR8437484_1.fastq.gz
Approx 40% complete for SRR8437484_1.fastq.gz
Approx 45% complete for SRR8437484_1.fastq.gz
Approx 50% complete for SRR8437484_1.fastq.gz
Approx 55% complete for SRR8437484_1.fastq.gz
Approx 60% complete for SRR8437484_1.fastq.gz
Approx 65% complete for SRR8437484_1.fastq.gz
Approx 70% complete for SRR8437484_1.fastq.gz
Approx 75% complete for SRR8437484_1.fastq.gz
Approx 80% complete for SRR8437484_1.fastq.gz
Approx 85% complete for SRR8437484_1.fastq.gz
Approx 90% complete for SRR8437484_1.fastq.gz
Approx 95% complete for SRR8437484_1.fastq.gz


Analysis complete for SRR8437484_1.fastq.gz
FastQC complete for RNA_seq_workflow/input/SRR8437484_1.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Running FastQC on RNA_seq_workflow/input/SRR8437484_2.fastq.gz...
application/gzip


Started analysis of SRR8437484_2.fastq.gz
Approx 5% complete for SRR8437484_2.fastq.gz
Approx 10% complete for SRR8437484_2.fastq.gz
Approx 15% complete for SRR8437484_2.fastq.gz
Approx 20% complete for SRR8437484_2.fastq.gz
Approx 25% complete for SRR8437484_2.fastq.gz
Approx 30% complete for SRR8437484_2.fastq.gz
Approx 35% complete for SRR8437484_2.fastq.gz
Approx 40% complete for SRR8437484_2.fastq.gz
Approx 45% complete for SRR8437484_2.fastq.gz
Approx 50% complete for SRR8437484_2.fastq.gz
Approx 55% complete for SRR8437484_2.fastq.gz
Approx 60% complete for SRR8437484_2.fastq.gz
Approx 65% complete for SRR8437484_2.fastq.gz
Approx 70% complete for SRR8437484_2.fastq.gz
Approx 75% complete for SRR8437484_2.fastq.gz
Approx 80% complete for SRR8437484_2.fastq.gz
Approx 85% complete for SRR8437484_2.fastq.gz
Approx 90% complete for SRR8437484_2.fastq.gz
Approx 95% complete for SRR8437484_2.fastq.gz


Analysis complete for SRR8437484_2.fastq.gz
FastQC complete for RNA_seq_workflow/input/SRR8437484_2.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Trimming reads: RNA_seq_workflow/input/SRR8437484_1.fastq.gz and RNA_seq_workflow/input/SRR8437484_2.fastq.gz using adapters from RNA_seq_workflow/adapters/TruSeq3-PE.fa


TrimmomaticPE: Started with arguments:
 -threads 8 RNA_seq_workflow/input/SRR8437484_1.fastq.gz RNA_seq_workflow/input/SRR8437484_2.fastq.gz RNA_seq_workflow/output/2_trimmed_output/SRR8437484_1.trimmed.fastq.gz unpaired_1.fastq.gz RNA_seq_workflow/output/2_trimmed_output/SRR8437484_2.trimmed.fastq.gz unpaired_2.fastq.gz ILLUMINACLIP:RNA_seq_workflow/adapters/TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:20 MINLEN:50
Using PrefixPair: 'TACACTCTTTCCCTACACGACGCTCTTCCGATCT' and 'GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT'
ILLUMINACLIP: Using 1 prefix pairs, 0 forward/reverse sequences, 0 forward only sequences, 0 reverse only sequences
Quality encoding detected as phred33
Input Read Pairs: 23659826 Both Surviving: 20851928 (88.13%) Forward Only Surviving: 2097457 (8.87%) Reverse Only Surviving: 289495 (1.22%) Dropped: 420946 (1.78%)
TrimmomaticPE: Completed successfully
Started analysis of SRR8437484_1.trimmed.fastq.gz


Trimming complete. Trimmed files: RNA_seq_workflow/output/2_trimmed_output/SRR8437484_1.trimmed.fastq.gz, RNA_seq_workflow/output/2_trimmed_output/SRR8437484_2.trimmed.fastq.gz
Running FastQC on RNA_seq_workflow/output/2_trimmed_output/SRR8437484_1.trimmed.fastq.gz...
application/gzip


Approx 5% complete for SRR8437484_1.trimmed.fastq.gz
Approx 10% complete for SRR8437484_1.trimmed.fastq.gz
Approx 15% complete for SRR8437484_1.trimmed.fastq.gz
Approx 20% complete for SRR8437484_1.trimmed.fastq.gz
Approx 25% complete for SRR8437484_1.trimmed.fastq.gz
Approx 30% complete for SRR8437484_1.trimmed.fastq.gz
Approx 35% complete for SRR8437484_1.trimmed.fastq.gz
Approx 40% complete for SRR8437484_1.trimmed.fastq.gz
Approx 45% complete for SRR8437484_1.trimmed.fastq.gz
Approx 50% complete for SRR8437484_1.trimmed.fastq.gz
Approx 55% complete for SRR8437484_1.trimmed.fastq.gz
Approx 60% complete for SRR8437484_1.trimmed.fastq.gz
Approx 65% complete for SRR8437484_1.trimmed.fastq.gz
Approx 70% complete for SRR8437484_1.trimmed.fastq.gz
Approx 75% complete for SRR8437484_1.trimmed.fastq.gz
Approx 80% complete for SRR8437484_1.trimmed.fastq.gz
Approx 85% complete for SRR8437484_1.trimmed.fastq.gz
Approx 90% complete for SRR8437484_1.trimmed.fastq.gz
Approx 95% complete for SRR84

Analysis complete for SRR8437484_1.trimmed.fastq.gz
FastQC complete for RNA_seq_workflow/output/2_trimmed_output/SRR8437484_1.trimmed.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Running FastQC on RNA_seq_workflow/output/2_trimmed_output/SRR8437484_2.trimmed.fastq.gz...
application/gzip


Started analysis of SRR8437484_2.trimmed.fastq.gz
Approx 5% complete for SRR8437484_2.trimmed.fastq.gz
Approx 10% complete for SRR8437484_2.trimmed.fastq.gz
Approx 15% complete for SRR8437484_2.trimmed.fastq.gz
Approx 20% complete for SRR8437484_2.trimmed.fastq.gz
Approx 25% complete for SRR8437484_2.trimmed.fastq.gz
Approx 30% complete for SRR8437484_2.trimmed.fastq.gz
Approx 35% complete for SRR8437484_2.trimmed.fastq.gz
Approx 40% complete for SRR8437484_2.trimmed.fastq.gz
Approx 45% complete for SRR8437484_2.trimmed.fastq.gz
Approx 50% complete for SRR8437484_2.trimmed.fastq.gz
Approx 55% complete for SRR8437484_2.trimmed.fastq.gz
Approx 60% complete for SRR8437484_2.trimmed.fastq.gz
Approx 65% complete for SRR8437484_2.trimmed.fastq.gz
Approx 70% complete for SRR8437484_2.trimmed.fastq.gz
Approx 75% complete for SRR8437484_2.trimmed.fastq.gz
Approx 80% complete for SRR8437484_2.trimmed.fastq.gz
Approx 85% complete for SRR8437484_2.trimmed.fastq.gz
Approx 90% complete for SRR843748

Analysis complete for SRR8437484_2.trimmed.fastq.gz
FastQC complete for RNA_seq_workflow/output/2_trimmed_output/SRR8437484_2.trimmed.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Running FastQC on RNA_seq_workflow/input/SRR8437485_1.fastq.gz...
application/gzip


Started analysis of SRR8437485_1.fastq.gz
Approx 5% complete for SRR8437485_1.fastq.gz
Approx 10% complete for SRR8437485_1.fastq.gz
Approx 15% complete for SRR8437485_1.fastq.gz
Approx 20% complete for SRR8437485_1.fastq.gz
Approx 25% complete for SRR8437485_1.fastq.gz
Approx 30% complete for SRR8437485_1.fastq.gz
Approx 35% complete for SRR8437485_1.fastq.gz
Approx 40% complete for SRR8437485_1.fastq.gz
Approx 45% complete for SRR8437485_1.fastq.gz
Approx 50% complete for SRR8437485_1.fastq.gz
Approx 55% complete for SRR8437485_1.fastq.gz
Approx 60% complete for SRR8437485_1.fastq.gz
Approx 65% complete for SRR8437485_1.fastq.gz
Approx 70% complete for SRR8437485_1.fastq.gz
Approx 75% complete for SRR8437485_1.fastq.gz
Approx 80% complete for SRR8437485_1.fastq.gz
Approx 85% complete for SRR8437485_1.fastq.gz
Approx 90% complete for SRR8437485_1.fastq.gz
Approx 95% complete for SRR8437485_1.fastq.gz


Analysis complete for SRR8437485_1.fastq.gz
FastQC complete for RNA_seq_workflow/input/SRR8437485_1.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Running FastQC on RNA_seq_workflow/input/SRR8437485_2.fastq.gz...
application/gzip


Started analysis of SRR8437485_2.fastq.gz
Approx 5% complete for SRR8437485_2.fastq.gz
Approx 10% complete for SRR8437485_2.fastq.gz
Approx 15% complete for SRR8437485_2.fastq.gz
Approx 20% complete for SRR8437485_2.fastq.gz
Approx 25% complete for SRR8437485_2.fastq.gz
Approx 30% complete for SRR8437485_2.fastq.gz
Approx 35% complete for SRR8437485_2.fastq.gz
Approx 40% complete for SRR8437485_2.fastq.gz
Approx 45% complete for SRR8437485_2.fastq.gz
Approx 50% complete for SRR8437485_2.fastq.gz
Approx 55% complete for SRR8437485_2.fastq.gz
Approx 60% complete for SRR8437485_2.fastq.gz
Approx 65% complete for SRR8437485_2.fastq.gz
Approx 70% complete for SRR8437485_2.fastq.gz
Approx 75% complete for SRR8437485_2.fastq.gz
Approx 80% complete for SRR8437485_2.fastq.gz
Approx 85% complete for SRR8437485_2.fastq.gz
Approx 90% complete for SRR8437485_2.fastq.gz
Approx 95% complete for SRR8437485_2.fastq.gz


Analysis complete for SRR8437485_2.fastq.gz
FastQC complete for RNA_seq_workflow/input/SRR8437485_2.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Trimming reads: RNA_seq_workflow/input/SRR8437485_1.fastq.gz and RNA_seq_workflow/input/SRR8437485_2.fastq.gz using adapters from RNA_seq_workflow/adapters/TruSeq3-PE.fa


TrimmomaticPE: Started with arguments:
 -threads 8 RNA_seq_workflow/input/SRR8437485_1.fastq.gz RNA_seq_workflow/input/SRR8437485_2.fastq.gz RNA_seq_workflow/output/2_trimmed_output/SRR8437485_1.trimmed.fastq.gz unpaired_1.fastq.gz RNA_seq_workflow/output/2_trimmed_output/SRR8437485_2.trimmed.fastq.gz unpaired_2.fastq.gz ILLUMINACLIP:RNA_seq_workflow/adapters/TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:20 MINLEN:50
Using PrefixPair: 'TACACTCTTTCCCTACACGACGCTCTTCCGATCT' and 'GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT'
ILLUMINACLIP: Using 1 prefix pairs, 0 forward/reverse sequences, 0 forward only sequences, 0 reverse only sequences
Quality encoding detected as phred33
Input Read Pairs: 22759949 Both Surviving: 20199517 (88.75%) Forward Only Surviving: 1896475 (8.33%) Reverse Only Surviving: 293494 (1.29%) Dropped: 370463 (1.63%)
TrimmomaticPE: Completed successfully
Started analysis of SRR8437485_1.trimmed.fastq.gz


Trimming complete. Trimmed files: RNA_seq_workflow/output/2_trimmed_output/SRR8437485_1.trimmed.fastq.gz, RNA_seq_workflow/output/2_trimmed_output/SRR8437485_2.trimmed.fastq.gz
Running FastQC on RNA_seq_workflow/output/2_trimmed_output/SRR8437485_1.trimmed.fastq.gz...
application/gzip


Approx 5% complete for SRR8437485_1.trimmed.fastq.gz
Approx 10% complete for SRR8437485_1.trimmed.fastq.gz
Approx 15% complete for SRR8437485_1.trimmed.fastq.gz
Approx 20% complete for SRR8437485_1.trimmed.fastq.gz
Approx 25% complete for SRR8437485_1.trimmed.fastq.gz
Approx 30% complete for SRR8437485_1.trimmed.fastq.gz
Approx 35% complete for SRR8437485_1.trimmed.fastq.gz
Approx 40% complete for SRR8437485_1.trimmed.fastq.gz
Approx 45% complete for SRR8437485_1.trimmed.fastq.gz
Approx 50% complete for SRR8437485_1.trimmed.fastq.gz
Approx 55% complete for SRR8437485_1.trimmed.fastq.gz
Approx 60% complete for SRR8437485_1.trimmed.fastq.gz
Approx 65% complete for SRR8437485_1.trimmed.fastq.gz
Approx 70% complete for SRR8437485_1.trimmed.fastq.gz
Approx 75% complete for SRR8437485_1.trimmed.fastq.gz
Approx 80% complete for SRR8437485_1.trimmed.fastq.gz
Approx 85% complete for SRR8437485_1.trimmed.fastq.gz
Approx 90% complete for SRR8437485_1.trimmed.fastq.gz
Approx 95% complete for SRR84

Analysis complete for SRR8437485_1.trimmed.fastq.gz
FastQC complete for RNA_seq_workflow/output/2_trimmed_output/SRR8437485_1.trimmed.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Running FastQC on RNA_seq_workflow/output/2_trimmed_output/SRR8437485_2.trimmed.fastq.gz...
application/gzip


Started analysis of SRR8437485_2.trimmed.fastq.gz
Approx 5% complete for SRR8437485_2.trimmed.fastq.gz
Approx 10% complete for SRR8437485_2.trimmed.fastq.gz
Approx 15% complete for SRR8437485_2.trimmed.fastq.gz
Approx 20% complete for SRR8437485_2.trimmed.fastq.gz
Approx 25% complete for SRR8437485_2.trimmed.fastq.gz
Approx 30% complete for SRR8437485_2.trimmed.fastq.gz
Approx 35% complete for SRR8437485_2.trimmed.fastq.gz
Approx 40% complete for SRR8437485_2.trimmed.fastq.gz
Approx 45% complete for SRR8437485_2.trimmed.fastq.gz
Approx 50% complete for SRR8437485_2.trimmed.fastq.gz
Approx 55% complete for SRR8437485_2.trimmed.fastq.gz
Approx 60% complete for SRR8437485_2.trimmed.fastq.gz
Approx 65% complete for SRR8437485_2.trimmed.fastq.gz
Approx 70% complete for SRR8437485_2.trimmed.fastq.gz
Approx 75% complete for SRR8437485_2.trimmed.fastq.gz
Approx 80% complete for SRR8437485_2.trimmed.fastq.gz
Approx 85% complete for SRR8437485_2.trimmed.fastq.gz
Approx 90% complete for SRR843748

Analysis complete for SRR8437485_2.trimmed.fastq.gz
FastQC complete for RNA_seq_workflow/output/2_trimmed_output/SRR8437485_2.trimmed.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Running FastQC on RNA_seq_workflow/input/SRR8437482_1.fastq.gz...
application/gzip


Started analysis of SRR8437482_1.fastq.gz
Approx 5% complete for SRR8437482_1.fastq.gz
Approx 10% complete for SRR8437482_1.fastq.gz
Approx 15% complete for SRR8437482_1.fastq.gz
Approx 20% complete for SRR8437482_1.fastq.gz
Approx 25% complete for SRR8437482_1.fastq.gz
Approx 30% complete for SRR8437482_1.fastq.gz
Approx 35% complete for SRR8437482_1.fastq.gz
Approx 40% complete for SRR8437482_1.fastq.gz
Approx 45% complete for SRR8437482_1.fastq.gz
Approx 50% complete for SRR8437482_1.fastq.gz
Approx 55% complete for SRR8437482_1.fastq.gz
Approx 60% complete for SRR8437482_1.fastq.gz
Approx 65% complete for SRR8437482_1.fastq.gz
Approx 70% complete for SRR8437482_1.fastq.gz
Approx 75% complete for SRR8437482_1.fastq.gz
Approx 80% complete for SRR8437482_1.fastq.gz
Approx 85% complete for SRR8437482_1.fastq.gz
Approx 90% complete for SRR8437482_1.fastq.gz
Approx 95% complete for SRR8437482_1.fastq.gz


Analysis complete for SRR8437482_1.fastq.gz
FastQC complete for RNA_seq_workflow/input/SRR8437482_1.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Running FastQC on RNA_seq_workflow/input/SRR8437482_2.fastq.gz...
application/gzip


Started analysis of SRR8437482_2.fastq.gz
Approx 5% complete for SRR8437482_2.fastq.gz
Approx 10% complete for SRR8437482_2.fastq.gz
Approx 15% complete for SRR8437482_2.fastq.gz
Approx 20% complete for SRR8437482_2.fastq.gz
Approx 25% complete for SRR8437482_2.fastq.gz
Approx 30% complete for SRR8437482_2.fastq.gz
Approx 35% complete for SRR8437482_2.fastq.gz
Approx 40% complete for SRR8437482_2.fastq.gz
Approx 45% complete for SRR8437482_2.fastq.gz
Approx 50% complete for SRR8437482_2.fastq.gz
Approx 55% complete for SRR8437482_2.fastq.gz
Approx 60% complete for SRR8437482_2.fastq.gz
Approx 65% complete for SRR8437482_2.fastq.gz
Approx 70% complete for SRR8437482_2.fastq.gz
Approx 75% complete for SRR8437482_2.fastq.gz
Approx 80% complete for SRR8437482_2.fastq.gz
Approx 85% complete for SRR8437482_2.fastq.gz
Approx 90% complete for SRR8437482_2.fastq.gz
Approx 95% complete for SRR8437482_2.fastq.gz


Analysis complete for SRR8437482_2.fastq.gz
FastQC complete for RNA_seq_workflow/input/SRR8437482_2.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Trimming reads: RNA_seq_workflow/input/SRR8437482_1.fastq.gz and RNA_seq_workflow/input/SRR8437482_2.fastq.gz using adapters from RNA_seq_workflow/adapters/TruSeq3-PE.fa


TrimmomaticPE: Started with arguments:
 -threads 8 RNA_seq_workflow/input/SRR8437482_1.fastq.gz RNA_seq_workflow/input/SRR8437482_2.fastq.gz RNA_seq_workflow/output/2_trimmed_output/SRR8437482_1.trimmed.fastq.gz unpaired_1.fastq.gz RNA_seq_workflow/output/2_trimmed_output/SRR8437482_2.trimmed.fastq.gz unpaired_2.fastq.gz ILLUMINACLIP:RNA_seq_workflow/adapters/TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:20 MINLEN:50
Using PrefixPair: 'TACACTCTTTCCCTACACGACGCTCTTCCGATCT' and 'GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT'
ILLUMINACLIP: Using 1 prefix pairs, 0 forward/reverse sequences, 0 forward only sequences, 0 reverse only sequences
Quality encoding detected as phred33
Input Read Pairs: 29462794 Both Surviving: 25824523 (87.65%) Forward Only Surviving: 2770787 (9.40%) Reverse Only Surviving: 317255 (1.08%) Dropped: 550229 (1.87%)
TrimmomaticPE: Completed successfully
Started analysis of SRR8437482_1.trimmed.fastq.gz


Trimming complete. Trimmed files: RNA_seq_workflow/output/2_trimmed_output/SRR8437482_1.trimmed.fastq.gz, RNA_seq_workflow/output/2_trimmed_output/SRR8437482_2.trimmed.fastq.gz
Running FastQC on RNA_seq_workflow/output/2_trimmed_output/SRR8437482_1.trimmed.fastq.gz...
application/gzip


Approx 5% complete for SRR8437482_1.trimmed.fastq.gz
Approx 10% complete for SRR8437482_1.trimmed.fastq.gz
Approx 15% complete for SRR8437482_1.trimmed.fastq.gz
Approx 20% complete for SRR8437482_1.trimmed.fastq.gz
Approx 25% complete for SRR8437482_1.trimmed.fastq.gz
Approx 30% complete for SRR8437482_1.trimmed.fastq.gz
Approx 35% complete for SRR8437482_1.trimmed.fastq.gz
Approx 40% complete for SRR8437482_1.trimmed.fastq.gz
Approx 45% complete for SRR8437482_1.trimmed.fastq.gz
Approx 50% complete for SRR8437482_1.trimmed.fastq.gz
Approx 55% complete for SRR8437482_1.trimmed.fastq.gz
Approx 60% complete for SRR8437482_1.trimmed.fastq.gz
Approx 65% complete for SRR8437482_1.trimmed.fastq.gz
Approx 70% complete for SRR8437482_1.trimmed.fastq.gz
Approx 75% complete for SRR8437482_1.trimmed.fastq.gz
Approx 80% complete for SRR8437482_1.trimmed.fastq.gz
Approx 85% complete for SRR8437482_1.trimmed.fastq.gz
Approx 90% complete for SRR8437482_1.trimmed.fastq.gz
Approx 95% complete for SRR84

Analysis complete for SRR8437482_1.trimmed.fastq.gz
FastQC complete for RNA_seq_workflow/output/2_trimmed_output/SRR8437482_1.trimmed.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Running FastQC on RNA_seq_workflow/output/2_trimmed_output/SRR8437482_2.trimmed.fastq.gz...
application/gzip


Started analysis of SRR8437482_2.trimmed.fastq.gz
Approx 5% complete for SRR8437482_2.trimmed.fastq.gz
Approx 10% complete for SRR8437482_2.trimmed.fastq.gz
Approx 15% complete for SRR8437482_2.trimmed.fastq.gz
Approx 20% complete for SRR8437482_2.trimmed.fastq.gz
Approx 25% complete for SRR8437482_2.trimmed.fastq.gz
Approx 30% complete for SRR8437482_2.trimmed.fastq.gz
Approx 35% complete for SRR8437482_2.trimmed.fastq.gz
Approx 40% complete for SRR8437482_2.trimmed.fastq.gz
Approx 45% complete for SRR8437482_2.trimmed.fastq.gz
Approx 50% complete for SRR8437482_2.trimmed.fastq.gz
Approx 55% complete for SRR8437482_2.trimmed.fastq.gz
Approx 60% complete for SRR8437482_2.trimmed.fastq.gz
Approx 65% complete for SRR8437482_2.trimmed.fastq.gz
Approx 70% complete for SRR8437482_2.trimmed.fastq.gz
Approx 75% complete for SRR8437482_2.trimmed.fastq.gz
Approx 80% complete for SRR8437482_2.trimmed.fastq.gz
Approx 85% complete for SRR8437482_2.trimmed.fastq.gz
Approx 90% complete for SRR843748

Analysis complete for SRR8437482_2.trimmed.fastq.gz
FastQC complete for RNA_seq_workflow/output/2_trimmed_output/SRR8437482_2.trimmed.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Running FastQC on RNA_seq_workflow/input/SRR8437483_1.fastq.gz...
application/gzip


Started analysis of SRR8437483_1.fastq.gz
Approx 5% complete for SRR8437483_1.fastq.gz
Approx 10% complete for SRR8437483_1.fastq.gz
Approx 15% complete for SRR8437483_1.fastq.gz
Approx 20% complete for SRR8437483_1.fastq.gz
Approx 25% complete for SRR8437483_1.fastq.gz
Approx 30% complete for SRR8437483_1.fastq.gz
Approx 35% complete for SRR8437483_1.fastq.gz
Approx 40% complete for SRR8437483_1.fastq.gz
Approx 45% complete for SRR8437483_1.fastq.gz
Approx 50% complete for SRR8437483_1.fastq.gz
Approx 55% complete for SRR8437483_1.fastq.gz
Approx 60% complete for SRR8437483_1.fastq.gz
Approx 65% complete for SRR8437483_1.fastq.gz
Approx 70% complete for SRR8437483_1.fastq.gz
Approx 75% complete for SRR8437483_1.fastq.gz
Approx 80% complete for SRR8437483_1.fastq.gz
Approx 85% complete for SRR8437483_1.fastq.gz
Approx 90% complete for SRR8437483_1.fastq.gz
Approx 95% complete for SRR8437483_1.fastq.gz


Analysis complete for SRR8437483_1.fastq.gz
FastQC complete for RNA_seq_workflow/input/SRR8437483_1.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Running FastQC on RNA_seq_workflow/input/SRR8437483_2.fastq.gz...
application/gzip


Started analysis of SRR8437483_2.fastq.gz
Approx 5% complete for SRR8437483_2.fastq.gz
Approx 10% complete for SRR8437483_2.fastq.gz
Approx 15% complete for SRR8437483_2.fastq.gz
Approx 20% complete for SRR8437483_2.fastq.gz
Approx 25% complete for SRR8437483_2.fastq.gz
Approx 30% complete for SRR8437483_2.fastq.gz
Approx 35% complete for SRR8437483_2.fastq.gz
Approx 40% complete for SRR8437483_2.fastq.gz
Approx 45% complete for SRR8437483_2.fastq.gz
Approx 50% complete for SRR8437483_2.fastq.gz
Approx 55% complete for SRR8437483_2.fastq.gz
Approx 60% complete for SRR8437483_2.fastq.gz
Approx 65% complete for SRR8437483_2.fastq.gz
Approx 70% complete for SRR8437483_2.fastq.gz
Approx 75% complete for SRR8437483_2.fastq.gz
Approx 80% complete for SRR8437483_2.fastq.gz
Approx 85% complete for SRR8437483_2.fastq.gz
Approx 90% complete for SRR8437483_2.fastq.gz
Approx 95% complete for SRR8437483_2.fastq.gz


Analysis complete for SRR8437483_2.fastq.gz
FastQC complete for RNA_seq_workflow/input/SRR8437483_2.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Trimming reads: RNA_seq_workflow/input/SRR8437483_1.fastq.gz and RNA_seq_workflow/input/SRR8437483_2.fastq.gz using adapters from RNA_seq_workflow/adapters/TruSeq3-PE.fa


TrimmomaticPE: Started with arguments:
 -threads 8 RNA_seq_workflow/input/SRR8437483_1.fastq.gz RNA_seq_workflow/input/SRR8437483_2.fastq.gz RNA_seq_workflow/output/2_trimmed_output/SRR8437483_1.trimmed.fastq.gz unpaired_1.fastq.gz RNA_seq_workflow/output/2_trimmed_output/SRR8437483_2.trimmed.fastq.gz unpaired_2.fastq.gz ILLUMINACLIP:RNA_seq_workflow/adapters/TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:20 MINLEN:50
Using PrefixPair: 'TACACTCTTTCCCTACACGACGCTCTTCCGATCT' and 'GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT'
ILLUMINACLIP: Using 1 prefix pairs, 0 forward/reverse sequences, 0 forward only sequences, 0 reverse only sequences
Quality encoding detected as phred33
Input Read Pairs: 21815089 Both Surviving: 19203613 (88.03%) Forward Only Surviving: 1948496 (8.93%) Reverse Only Surviving: 259321 (1.19%) Dropped: 403659 (1.85%)
TrimmomaticPE: Completed successfully
Started analysis of SRR8437483_1.trimmed.fastq.gz


Trimming complete. Trimmed files: RNA_seq_workflow/output/2_trimmed_output/SRR8437483_1.trimmed.fastq.gz, RNA_seq_workflow/output/2_trimmed_output/SRR8437483_2.trimmed.fastq.gz
Running FastQC on RNA_seq_workflow/output/2_trimmed_output/SRR8437483_1.trimmed.fastq.gz...
application/gzip


Approx 5% complete for SRR8437483_1.trimmed.fastq.gz
Approx 10% complete for SRR8437483_1.trimmed.fastq.gz
Approx 15% complete for SRR8437483_1.trimmed.fastq.gz
Approx 20% complete for SRR8437483_1.trimmed.fastq.gz
Approx 25% complete for SRR8437483_1.trimmed.fastq.gz
Approx 30% complete for SRR8437483_1.trimmed.fastq.gz
Approx 35% complete for SRR8437483_1.trimmed.fastq.gz
Approx 40% complete for SRR8437483_1.trimmed.fastq.gz
Approx 45% complete for SRR8437483_1.trimmed.fastq.gz
Approx 50% complete for SRR8437483_1.trimmed.fastq.gz
Approx 55% complete for SRR8437483_1.trimmed.fastq.gz
Approx 60% complete for SRR8437483_1.trimmed.fastq.gz
Approx 65% complete for SRR8437483_1.trimmed.fastq.gz
Approx 70% complete for SRR8437483_1.trimmed.fastq.gz
Approx 75% complete for SRR8437483_1.trimmed.fastq.gz
Approx 80% complete for SRR8437483_1.trimmed.fastq.gz
Approx 85% complete for SRR8437483_1.trimmed.fastq.gz
Approx 90% complete for SRR8437483_1.trimmed.fastq.gz
Approx 95% complete for SRR84

Analysis complete for SRR8437483_1.trimmed.fastq.gz
FastQC complete for RNA_seq_workflow/output/2_trimmed_output/SRR8437483_1.trimmed.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Running FastQC on RNA_seq_workflow/output/2_trimmed_output/SRR8437483_2.trimmed.fastq.gz...
application/gzip


Started analysis of SRR8437483_2.trimmed.fastq.gz
Approx 5% complete for SRR8437483_2.trimmed.fastq.gz
Approx 10% complete for SRR8437483_2.trimmed.fastq.gz
Approx 15% complete for SRR8437483_2.trimmed.fastq.gz
Approx 20% complete for SRR8437483_2.trimmed.fastq.gz
Approx 25% complete for SRR8437483_2.trimmed.fastq.gz
Approx 30% complete for SRR8437483_2.trimmed.fastq.gz
Approx 35% complete for SRR8437483_2.trimmed.fastq.gz
Approx 40% complete for SRR8437483_2.trimmed.fastq.gz
Approx 45% complete for SRR8437483_2.trimmed.fastq.gz
Approx 50% complete for SRR8437483_2.trimmed.fastq.gz
Approx 55% complete for SRR8437483_2.trimmed.fastq.gz
Approx 60% complete for SRR8437483_2.trimmed.fastq.gz
Approx 65% complete for SRR8437483_2.trimmed.fastq.gz
Approx 70% complete for SRR8437483_2.trimmed.fastq.gz
Approx 75% complete for SRR8437483_2.trimmed.fastq.gz
Approx 80% complete for SRR8437483_2.trimmed.fastq.gz
Approx 85% complete for SRR8437483_2.trimmed.fastq.gz
Approx 90% complete for SRR843748

Analysis complete for SRR8437483_2.trimmed.fastq.gz
FastQC complete for RNA_seq_workflow/output/2_trimmed_output/SRR8437483_2.trimmed.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Running FastQC on RNA_seq_workflow/input/SRR8437480_1.fastq.gz...
application/gzip


Started analysis of SRR8437480_1.fastq.gz
Approx 5% complete for SRR8437480_1.fastq.gz
Approx 10% complete for SRR8437480_1.fastq.gz
Approx 15% complete for SRR8437480_1.fastq.gz
Approx 20% complete for SRR8437480_1.fastq.gz
Approx 25% complete for SRR8437480_1.fastq.gz
Approx 30% complete for SRR8437480_1.fastq.gz
Approx 35% complete for SRR8437480_1.fastq.gz
Approx 40% complete for SRR8437480_1.fastq.gz
Approx 45% complete for SRR8437480_1.fastq.gz
Approx 50% complete for SRR8437480_1.fastq.gz
Approx 55% complete for SRR8437480_1.fastq.gz
Approx 60% complete for SRR8437480_1.fastq.gz
Approx 65% complete for SRR8437480_1.fastq.gz
Approx 70% complete for SRR8437480_1.fastq.gz
Approx 75% complete for SRR8437480_1.fastq.gz
Approx 80% complete for SRR8437480_1.fastq.gz
Approx 85% complete for SRR8437480_1.fastq.gz
Approx 90% complete for SRR8437480_1.fastq.gz
Approx 95% complete for SRR8437480_1.fastq.gz


Analysis complete for SRR8437480_1.fastq.gz
FastQC complete for RNA_seq_workflow/input/SRR8437480_1.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Running FastQC on RNA_seq_workflow/input/SRR8437480_2.fastq.gz...
application/gzip


Started analysis of SRR8437480_2.fastq.gz
Approx 5% complete for SRR8437480_2.fastq.gz
Approx 10% complete for SRR8437480_2.fastq.gz
Approx 15% complete for SRR8437480_2.fastq.gz
Approx 20% complete for SRR8437480_2.fastq.gz
Approx 25% complete for SRR8437480_2.fastq.gz
Approx 30% complete for SRR8437480_2.fastq.gz
Approx 35% complete for SRR8437480_2.fastq.gz
Approx 40% complete for SRR8437480_2.fastq.gz
Approx 45% complete for SRR8437480_2.fastq.gz
Approx 50% complete for SRR8437480_2.fastq.gz
Approx 55% complete for SRR8437480_2.fastq.gz
Approx 60% complete for SRR8437480_2.fastq.gz
Approx 65% complete for SRR8437480_2.fastq.gz
Approx 70% complete for SRR8437480_2.fastq.gz
Approx 75% complete for SRR8437480_2.fastq.gz
Approx 80% complete for SRR8437480_2.fastq.gz
Approx 85% complete for SRR8437480_2.fastq.gz
Approx 90% complete for SRR8437480_2.fastq.gz
Approx 95% complete for SRR8437480_2.fastq.gz


Analysis complete for SRR8437480_2.fastq.gz
FastQC complete for RNA_seq_workflow/input/SRR8437480_2.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Trimming reads: RNA_seq_workflow/input/SRR8437480_1.fastq.gz and RNA_seq_workflow/input/SRR8437480_2.fastq.gz using adapters from RNA_seq_workflow/adapters/TruSeq3-PE.fa


TrimmomaticPE: Started with arguments:
 -threads 8 RNA_seq_workflow/input/SRR8437480_1.fastq.gz RNA_seq_workflow/input/SRR8437480_2.fastq.gz RNA_seq_workflow/output/2_trimmed_output/SRR8437480_1.trimmed.fastq.gz unpaired_1.fastq.gz RNA_seq_workflow/output/2_trimmed_output/SRR8437480_2.trimmed.fastq.gz unpaired_2.fastq.gz ILLUMINACLIP:RNA_seq_workflow/adapters/TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:20 MINLEN:50
Using PrefixPair: 'TACACTCTTTCCCTACACGACGCTCTTCCGATCT' and 'GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT'
ILLUMINACLIP: Using 1 prefix pairs, 0 forward/reverse sequences, 0 forward only sequences, 0 reverse only sequences
Quality encoding detected as phred33
Input Read Pairs: 22080043 Both Surviving: 18380335 (83.24%) Forward Only Surviving: 2623619 (11.88%) Reverse Only Surviving: 464017 (2.10%) Dropped: 612072 (2.77%)
TrimmomaticPE: Completed successfully
Started analysis of SRR8437480_1.trimmed.fastq.gz


Trimming complete. Trimmed files: RNA_seq_workflow/output/2_trimmed_output/SRR8437480_1.trimmed.fastq.gz, RNA_seq_workflow/output/2_trimmed_output/SRR8437480_2.trimmed.fastq.gz
Running FastQC on RNA_seq_workflow/output/2_trimmed_output/SRR8437480_1.trimmed.fastq.gz...
application/gzip


Approx 5% complete for SRR8437480_1.trimmed.fastq.gz
Approx 10% complete for SRR8437480_1.trimmed.fastq.gz
Approx 15% complete for SRR8437480_1.trimmed.fastq.gz
Approx 20% complete for SRR8437480_1.trimmed.fastq.gz
Approx 25% complete for SRR8437480_1.trimmed.fastq.gz
Approx 30% complete for SRR8437480_1.trimmed.fastq.gz
Approx 35% complete for SRR8437480_1.trimmed.fastq.gz
Approx 40% complete for SRR8437480_1.trimmed.fastq.gz
Approx 45% complete for SRR8437480_1.trimmed.fastq.gz
Approx 50% complete for SRR8437480_1.trimmed.fastq.gz
Approx 55% complete for SRR8437480_1.trimmed.fastq.gz
Approx 60% complete for SRR8437480_1.trimmed.fastq.gz
Approx 65% complete for SRR8437480_1.trimmed.fastq.gz
Approx 70% complete for SRR8437480_1.trimmed.fastq.gz
Approx 75% complete for SRR8437480_1.trimmed.fastq.gz
Approx 80% complete for SRR8437480_1.trimmed.fastq.gz
Approx 85% complete for SRR8437480_1.trimmed.fastq.gz
Approx 90% complete for SRR8437480_1.trimmed.fastq.gz
Approx 95% complete for SRR84

Analysis complete for SRR8437480_1.trimmed.fastq.gz
FastQC complete for RNA_seq_workflow/output/2_trimmed_output/SRR8437480_1.trimmed.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Running FastQC on RNA_seq_workflow/output/2_trimmed_output/SRR8437480_2.trimmed.fastq.gz...
application/gzip


Started analysis of SRR8437480_2.trimmed.fastq.gz
Approx 5% complete for SRR8437480_2.trimmed.fastq.gz
Approx 10% complete for SRR8437480_2.trimmed.fastq.gz
Approx 15% complete for SRR8437480_2.trimmed.fastq.gz
Approx 20% complete for SRR8437480_2.trimmed.fastq.gz
Approx 25% complete for SRR8437480_2.trimmed.fastq.gz
Approx 30% complete for SRR8437480_2.trimmed.fastq.gz
Approx 35% complete for SRR8437480_2.trimmed.fastq.gz
Approx 40% complete for SRR8437480_2.trimmed.fastq.gz
Approx 45% complete for SRR8437480_2.trimmed.fastq.gz
Approx 50% complete for SRR8437480_2.trimmed.fastq.gz
Approx 55% complete for SRR8437480_2.trimmed.fastq.gz
Approx 60% complete for SRR8437480_2.trimmed.fastq.gz
Approx 65% complete for SRR8437480_2.trimmed.fastq.gz
Approx 70% complete for SRR8437480_2.trimmed.fastq.gz
Approx 75% complete for SRR8437480_2.trimmed.fastq.gz
Approx 80% complete for SRR8437480_2.trimmed.fastq.gz
Approx 85% complete for SRR8437480_2.trimmed.fastq.gz
Approx 90% complete for SRR843748

Analysis complete for SRR8437480_2.trimmed.fastq.gz
FastQC complete for RNA_seq_workflow/output/2_trimmed_output/SRR8437480_2.trimmed.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Running FastQC on RNA_seq_workflow/input/SRR8437481_1.fastq.gz...
application/gzip


Started analysis of SRR8437481_1.fastq.gz
Approx 5% complete for SRR8437481_1.fastq.gz
Approx 10% complete for SRR8437481_1.fastq.gz
Approx 15% complete for SRR8437481_1.fastq.gz
Approx 20% complete for SRR8437481_1.fastq.gz
Approx 25% complete for SRR8437481_1.fastq.gz
Approx 30% complete for SRR8437481_1.fastq.gz
Approx 35% complete for SRR8437481_1.fastq.gz
Approx 40% complete for SRR8437481_1.fastq.gz
Approx 45% complete for SRR8437481_1.fastq.gz
Approx 50% complete for SRR8437481_1.fastq.gz
Approx 55% complete for SRR8437481_1.fastq.gz
Approx 60% complete for SRR8437481_1.fastq.gz
Approx 65% complete for SRR8437481_1.fastq.gz
Approx 70% complete for SRR8437481_1.fastq.gz
Approx 75% complete for SRR8437481_1.fastq.gz
Approx 80% complete for SRR8437481_1.fastq.gz
Approx 85% complete for SRR8437481_1.fastq.gz
Approx 90% complete for SRR8437481_1.fastq.gz
Approx 95% complete for SRR8437481_1.fastq.gz


Analysis complete for SRR8437481_1.fastq.gz
FastQC complete for RNA_seq_workflow/input/SRR8437481_1.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Running FastQC on RNA_seq_workflow/input/SRR8437481_2.fastq.gz...
application/gzip


Started analysis of SRR8437481_2.fastq.gz
Approx 5% complete for SRR8437481_2.fastq.gz
Approx 10% complete for SRR8437481_2.fastq.gz
Approx 15% complete for SRR8437481_2.fastq.gz
Approx 20% complete for SRR8437481_2.fastq.gz
Approx 25% complete for SRR8437481_2.fastq.gz
Approx 30% complete for SRR8437481_2.fastq.gz
Approx 35% complete for SRR8437481_2.fastq.gz
Approx 40% complete for SRR8437481_2.fastq.gz
Approx 45% complete for SRR8437481_2.fastq.gz
Approx 50% complete for SRR8437481_2.fastq.gz
Approx 55% complete for SRR8437481_2.fastq.gz
Approx 60% complete for SRR8437481_2.fastq.gz
Approx 65% complete for SRR8437481_2.fastq.gz
Approx 70% complete for SRR8437481_2.fastq.gz
Approx 75% complete for SRR8437481_2.fastq.gz
Approx 80% complete for SRR8437481_2.fastq.gz
Approx 85% complete for SRR8437481_2.fastq.gz
Approx 90% complete for SRR8437481_2.fastq.gz
Approx 95% complete for SRR8437481_2.fastq.gz


Analysis complete for SRR8437481_2.fastq.gz
FastQC complete for RNA_seq_workflow/input/SRR8437481_2.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Trimming reads: RNA_seq_workflow/input/SRR8437481_1.fastq.gz and RNA_seq_workflow/input/SRR8437481_2.fastq.gz using adapters from RNA_seq_workflow/adapters/TruSeq3-PE.fa


TrimmomaticPE: Started with arguments:
 -threads 8 RNA_seq_workflow/input/SRR8437481_1.fastq.gz RNA_seq_workflow/input/SRR8437481_2.fastq.gz RNA_seq_workflow/output/2_trimmed_output/SRR8437481_1.trimmed.fastq.gz unpaired_1.fastq.gz RNA_seq_workflow/output/2_trimmed_output/SRR8437481_2.trimmed.fastq.gz unpaired_2.fastq.gz ILLUMINACLIP:RNA_seq_workflow/adapters/TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:20 MINLEN:50
Using PrefixPair: 'TACACTCTTTCCCTACACGACGCTCTTCCGATCT' and 'GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT'
ILLUMINACLIP: Using 1 prefix pairs, 0 forward/reverse sequences, 0 forward only sequences, 0 reverse only sequences
Quality encoding detected as phred33
Input Read Pairs: 28694901 Both Surviving: 25544038 (89.02%) Forward Only Surviving: 2397921 (8.36%) Reverse Only Surviving: 281674 (0.98%) Dropped: 471268 (1.64%)
TrimmomaticPE: Completed successfully
Started analysis of SRR8437481_1.trimmed.fastq.gz


Trimming complete. Trimmed files: RNA_seq_workflow/output/2_trimmed_output/SRR8437481_1.trimmed.fastq.gz, RNA_seq_workflow/output/2_trimmed_output/SRR8437481_2.trimmed.fastq.gz
Running FastQC on RNA_seq_workflow/output/2_trimmed_output/SRR8437481_1.trimmed.fastq.gz...
application/gzip


Approx 5% complete for SRR8437481_1.trimmed.fastq.gz
Approx 10% complete for SRR8437481_1.trimmed.fastq.gz
Approx 15% complete for SRR8437481_1.trimmed.fastq.gz
Approx 20% complete for SRR8437481_1.trimmed.fastq.gz
Approx 25% complete for SRR8437481_1.trimmed.fastq.gz
Approx 30% complete for SRR8437481_1.trimmed.fastq.gz
Approx 35% complete for SRR8437481_1.trimmed.fastq.gz
Approx 40% complete for SRR8437481_1.trimmed.fastq.gz
Approx 45% complete for SRR8437481_1.trimmed.fastq.gz
Approx 50% complete for SRR8437481_1.trimmed.fastq.gz
Approx 55% complete for SRR8437481_1.trimmed.fastq.gz
Approx 60% complete for SRR8437481_1.trimmed.fastq.gz
Approx 65% complete for SRR8437481_1.trimmed.fastq.gz
Approx 70% complete for SRR8437481_1.trimmed.fastq.gz
Approx 75% complete for SRR8437481_1.trimmed.fastq.gz
Approx 80% complete for SRR8437481_1.trimmed.fastq.gz
Approx 85% complete for SRR8437481_1.trimmed.fastq.gz
Approx 90% complete for SRR8437481_1.trimmed.fastq.gz
Approx 95% complete for SRR84

Analysis complete for SRR8437481_1.trimmed.fastq.gz
FastQC complete for RNA_seq_workflow/output/2_trimmed_output/SRR8437481_1.trimmed.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Running FastQC on RNA_seq_workflow/output/2_trimmed_output/SRR8437481_2.trimmed.fastq.gz...
application/gzip


Started analysis of SRR8437481_2.trimmed.fastq.gz
Approx 5% complete for SRR8437481_2.trimmed.fastq.gz
Approx 10% complete for SRR8437481_2.trimmed.fastq.gz
Approx 15% complete for SRR8437481_2.trimmed.fastq.gz
Approx 20% complete for SRR8437481_2.trimmed.fastq.gz
Approx 25% complete for SRR8437481_2.trimmed.fastq.gz
Approx 30% complete for SRR8437481_2.trimmed.fastq.gz
Approx 35% complete for SRR8437481_2.trimmed.fastq.gz
Approx 40% complete for SRR8437481_2.trimmed.fastq.gz
Approx 45% complete for SRR8437481_2.trimmed.fastq.gz
Approx 50% complete for SRR8437481_2.trimmed.fastq.gz
Approx 55% complete for SRR8437481_2.trimmed.fastq.gz
Approx 60% complete for SRR8437481_2.trimmed.fastq.gz
Approx 65% complete for SRR8437481_2.trimmed.fastq.gz
Approx 70% complete for SRR8437481_2.trimmed.fastq.gz
Approx 75% complete for SRR8437481_2.trimmed.fastq.gz
Approx 80% complete for SRR8437481_2.trimmed.fastq.gz
Approx 85% complete for SRR8437481_2.trimmed.fastq.gz
Approx 90% complete for SRR843748

Analysis complete for SRR8437481_2.trimmed.fastq.gz
FastQC complete for RNA_seq_workflow/output/2_trimmed_output/SRR8437481_2.trimmed.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Running FastQC on RNA_seq_workflow/input/SRR8437478_1.fastq.gz...
application/gzip


Started analysis of SRR8437478_1.fastq.gz
Approx 5% complete for SRR8437478_1.fastq.gz
Approx 10% complete for SRR8437478_1.fastq.gz
Approx 15% complete for SRR8437478_1.fastq.gz
Approx 20% complete for SRR8437478_1.fastq.gz
Approx 25% complete for SRR8437478_1.fastq.gz
Approx 30% complete for SRR8437478_1.fastq.gz
Approx 35% complete for SRR8437478_1.fastq.gz
Approx 40% complete for SRR8437478_1.fastq.gz
Approx 45% complete for SRR8437478_1.fastq.gz
Approx 50% complete for SRR8437478_1.fastq.gz
Approx 55% complete for SRR8437478_1.fastq.gz
Approx 60% complete for SRR8437478_1.fastq.gz
Approx 65% complete for SRR8437478_1.fastq.gz
Approx 70% complete for SRR8437478_1.fastq.gz
Approx 75% complete for SRR8437478_1.fastq.gz
Approx 80% complete for SRR8437478_1.fastq.gz
Approx 85% complete for SRR8437478_1.fastq.gz
Approx 90% complete for SRR8437478_1.fastq.gz
Approx 95% complete for SRR8437478_1.fastq.gz


Analysis complete for SRR8437478_1.fastq.gz
FastQC complete for RNA_seq_workflow/input/SRR8437478_1.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Running FastQC on RNA_seq_workflow/input/SRR8437478_2.fastq.gz...
application/gzip


Started analysis of SRR8437478_2.fastq.gz
Approx 5% complete for SRR8437478_2.fastq.gz
Approx 10% complete for SRR8437478_2.fastq.gz
Approx 15% complete for SRR8437478_2.fastq.gz
Approx 20% complete for SRR8437478_2.fastq.gz
Approx 25% complete for SRR8437478_2.fastq.gz
Approx 30% complete for SRR8437478_2.fastq.gz
Approx 35% complete for SRR8437478_2.fastq.gz
Approx 40% complete for SRR8437478_2.fastq.gz
Approx 45% complete for SRR8437478_2.fastq.gz
Approx 50% complete for SRR8437478_2.fastq.gz
Approx 55% complete for SRR8437478_2.fastq.gz
Approx 60% complete for SRR8437478_2.fastq.gz
Approx 65% complete for SRR8437478_2.fastq.gz
Approx 70% complete for SRR8437478_2.fastq.gz
Approx 75% complete for SRR8437478_2.fastq.gz
Approx 80% complete for SRR8437478_2.fastq.gz
Approx 85% complete for SRR8437478_2.fastq.gz
Approx 90% complete for SRR8437478_2.fastq.gz
Approx 95% complete for SRR8437478_2.fastq.gz


Analysis complete for SRR8437478_2.fastq.gz
FastQC complete for RNA_seq_workflow/input/SRR8437478_2.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Trimming reads: RNA_seq_workflow/input/SRR8437478_1.fastq.gz and RNA_seq_workflow/input/SRR8437478_2.fastq.gz using adapters from RNA_seq_workflow/adapters/TruSeq3-PE.fa


TrimmomaticPE: Started with arguments:
 -threads 8 RNA_seq_workflow/input/SRR8437478_1.fastq.gz RNA_seq_workflow/input/SRR8437478_2.fastq.gz RNA_seq_workflow/output/2_trimmed_output/SRR8437478_1.trimmed.fastq.gz unpaired_1.fastq.gz RNA_seq_workflow/output/2_trimmed_output/SRR8437478_2.trimmed.fastq.gz unpaired_2.fastq.gz ILLUMINACLIP:RNA_seq_workflow/adapters/TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:20 MINLEN:50
Using PrefixPair: 'TACACTCTTTCCCTACACGACGCTCTTCCGATCT' and 'GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT'
ILLUMINACLIP: Using 1 prefix pairs, 0 forward/reverse sequences, 0 forward only sequences, 0 reverse only sequences
Quality encoding detected as phred33
Input Read Pairs: 23939223 Both Surviving: 20140139 (84.13%) Forward Only Surviving: 2717732 (11.35%) Reverse Only Surviving: 477646 (2.00%) Dropped: 603706 (2.52%)
TrimmomaticPE: Completed successfully
Started analysis of SRR8437478_1.trimmed.fastq.gz


Trimming complete. Trimmed files: RNA_seq_workflow/output/2_trimmed_output/SRR8437478_1.trimmed.fastq.gz, RNA_seq_workflow/output/2_trimmed_output/SRR8437478_2.trimmed.fastq.gz
Running FastQC on RNA_seq_workflow/output/2_trimmed_output/SRR8437478_1.trimmed.fastq.gz...
application/gzip


Approx 5% complete for SRR8437478_1.trimmed.fastq.gz
Approx 10% complete for SRR8437478_1.trimmed.fastq.gz
Approx 15% complete for SRR8437478_1.trimmed.fastq.gz
Approx 20% complete for SRR8437478_1.trimmed.fastq.gz
Approx 25% complete for SRR8437478_1.trimmed.fastq.gz
Approx 30% complete for SRR8437478_1.trimmed.fastq.gz
Approx 35% complete for SRR8437478_1.trimmed.fastq.gz
Approx 40% complete for SRR8437478_1.trimmed.fastq.gz
Approx 45% complete for SRR8437478_1.trimmed.fastq.gz
Approx 50% complete for SRR8437478_1.trimmed.fastq.gz
Approx 55% complete for SRR8437478_1.trimmed.fastq.gz
Approx 60% complete for SRR8437478_1.trimmed.fastq.gz
Approx 65% complete for SRR8437478_1.trimmed.fastq.gz
Approx 70% complete for SRR8437478_1.trimmed.fastq.gz
Approx 75% complete for SRR8437478_1.trimmed.fastq.gz
Approx 80% complete for SRR8437478_1.trimmed.fastq.gz
Approx 85% complete for SRR8437478_1.trimmed.fastq.gz
Approx 90% complete for SRR8437478_1.trimmed.fastq.gz
Approx 95% complete for SRR84

Analysis complete for SRR8437478_1.trimmed.fastq.gz
FastQC complete for RNA_seq_workflow/output/2_trimmed_output/SRR8437478_1.trimmed.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Running FastQC on RNA_seq_workflow/output/2_trimmed_output/SRR8437478_2.trimmed.fastq.gz...
application/gzip


Started analysis of SRR8437478_2.trimmed.fastq.gz
Approx 5% complete for SRR8437478_2.trimmed.fastq.gz
Approx 10% complete for SRR8437478_2.trimmed.fastq.gz
Approx 15% complete for SRR8437478_2.trimmed.fastq.gz
Approx 20% complete for SRR8437478_2.trimmed.fastq.gz
Approx 25% complete for SRR8437478_2.trimmed.fastq.gz
Approx 30% complete for SRR8437478_2.trimmed.fastq.gz
Approx 35% complete for SRR8437478_2.trimmed.fastq.gz
Approx 40% complete for SRR8437478_2.trimmed.fastq.gz
Approx 45% complete for SRR8437478_2.trimmed.fastq.gz
Approx 50% complete for SRR8437478_2.trimmed.fastq.gz
Approx 55% complete for SRR8437478_2.trimmed.fastq.gz
Approx 60% complete for SRR8437478_2.trimmed.fastq.gz
Approx 65% complete for SRR8437478_2.trimmed.fastq.gz
Approx 70% complete for SRR8437478_2.trimmed.fastq.gz
Approx 75% complete for SRR8437478_2.trimmed.fastq.gz
Approx 80% complete for SRR8437478_2.trimmed.fastq.gz
Approx 85% complete for SRR8437478_2.trimmed.fastq.gz
Approx 90% complete for SRR843747

Analysis complete for SRR8437478_2.trimmed.fastq.gz
FastQC complete for RNA_seq_workflow/output/2_trimmed_output/SRR8437478_2.trimmed.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Running FastQC on RNA_seq_workflow/input/SRR8437479_1.fastq.gz...
application/gzip


Started analysis of SRR8437479_1.fastq.gz
Approx 5% complete for SRR8437479_1.fastq.gz
Approx 10% complete for SRR8437479_1.fastq.gz
Approx 15% complete for SRR8437479_1.fastq.gz
Approx 20% complete for SRR8437479_1.fastq.gz
Approx 25% complete for SRR8437479_1.fastq.gz
Approx 30% complete for SRR8437479_1.fastq.gz
Approx 35% complete for SRR8437479_1.fastq.gz
Approx 40% complete for SRR8437479_1.fastq.gz
Approx 45% complete for SRR8437479_1.fastq.gz
Approx 50% complete for SRR8437479_1.fastq.gz
Approx 55% complete for SRR8437479_1.fastq.gz
Approx 60% complete for SRR8437479_1.fastq.gz
Approx 65% complete for SRR8437479_1.fastq.gz
Approx 70% complete for SRR8437479_1.fastq.gz
Approx 75% complete for SRR8437479_1.fastq.gz
Approx 80% complete for SRR8437479_1.fastq.gz
Approx 85% complete for SRR8437479_1.fastq.gz
Approx 90% complete for SRR8437479_1.fastq.gz
Approx 95% complete for SRR8437479_1.fastq.gz


Analysis complete for SRR8437479_1.fastq.gz
FastQC complete for RNA_seq_workflow/input/SRR8437479_1.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Running FastQC on RNA_seq_workflow/input/SRR8437479_2.fastq.gz...
application/gzip


Started analysis of SRR8437479_2.fastq.gz
Approx 5% complete for SRR8437479_2.fastq.gz
Approx 10% complete for SRR8437479_2.fastq.gz
Approx 15% complete for SRR8437479_2.fastq.gz
Approx 20% complete for SRR8437479_2.fastq.gz
Approx 25% complete for SRR8437479_2.fastq.gz
Approx 30% complete for SRR8437479_2.fastq.gz
Approx 35% complete for SRR8437479_2.fastq.gz
Approx 40% complete for SRR8437479_2.fastq.gz
Approx 45% complete for SRR8437479_2.fastq.gz
Approx 50% complete for SRR8437479_2.fastq.gz
Approx 55% complete for SRR8437479_2.fastq.gz
Approx 60% complete for SRR8437479_2.fastq.gz
Approx 65% complete for SRR8437479_2.fastq.gz
Approx 70% complete for SRR8437479_2.fastq.gz
Approx 75% complete for SRR8437479_2.fastq.gz
Approx 80% complete for SRR8437479_2.fastq.gz
Approx 85% complete for SRR8437479_2.fastq.gz
Approx 90% complete for SRR8437479_2.fastq.gz
Approx 95% complete for SRR8437479_2.fastq.gz


Analysis complete for SRR8437479_2.fastq.gz
FastQC complete for RNA_seq_workflow/input/SRR8437479_2.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Trimming reads: RNA_seq_workflow/input/SRR8437479_1.fastq.gz and RNA_seq_workflow/input/SRR8437479_2.fastq.gz using adapters from RNA_seq_workflow/adapters/TruSeq3-PE.fa


TrimmomaticPE: Started with arguments:
 -threads 8 RNA_seq_workflow/input/SRR8437479_1.fastq.gz RNA_seq_workflow/input/SRR8437479_2.fastq.gz RNA_seq_workflow/output/2_trimmed_output/SRR8437479_1.trimmed.fastq.gz unpaired_1.fastq.gz RNA_seq_workflow/output/2_trimmed_output/SRR8437479_2.trimmed.fastq.gz unpaired_2.fastq.gz ILLUMINACLIP:RNA_seq_workflow/adapters/TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:20 MINLEN:50
Using PrefixPair: 'TACACTCTTTCCCTACACGACGCTCTTCCGATCT' and 'GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT'
ILLUMINACLIP: Using 1 prefix pairs, 0 forward/reverse sequences, 0 forward only sequences, 0 reverse only sequences
Quality encoding detected as phred33
Input Read Pairs: 26772466 Both Surviving: 23016090 (85.97%) Forward Only Surviving: 2735806 (10.22%) Reverse Only Surviving: 429579 (1.60%) Dropped: 590991 (2.21%)
TrimmomaticPE: Completed successfully
Started analysis of SRR8437479_1.trimmed.fastq.gz


Trimming complete. Trimmed files: RNA_seq_workflow/output/2_trimmed_output/SRR8437479_1.trimmed.fastq.gz, RNA_seq_workflow/output/2_trimmed_output/SRR8437479_2.trimmed.fastq.gz
Running FastQC on RNA_seq_workflow/output/2_trimmed_output/SRR8437479_1.trimmed.fastq.gz...
application/gzip


Approx 5% complete for SRR8437479_1.trimmed.fastq.gz
Approx 10% complete for SRR8437479_1.trimmed.fastq.gz
Approx 15% complete for SRR8437479_1.trimmed.fastq.gz
Approx 20% complete for SRR8437479_1.trimmed.fastq.gz
Approx 25% complete for SRR8437479_1.trimmed.fastq.gz
Approx 30% complete for SRR8437479_1.trimmed.fastq.gz
Approx 35% complete for SRR8437479_1.trimmed.fastq.gz
Approx 40% complete for SRR8437479_1.trimmed.fastq.gz
Approx 45% complete for SRR8437479_1.trimmed.fastq.gz
Approx 50% complete for SRR8437479_1.trimmed.fastq.gz
Approx 55% complete for SRR8437479_1.trimmed.fastq.gz
Approx 60% complete for SRR8437479_1.trimmed.fastq.gz
Approx 65% complete for SRR8437479_1.trimmed.fastq.gz
Approx 70% complete for SRR8437479_1.trimmed.fastq.gz
Approx 75% complete for SRR8437479_1.trimmed.fastq.gz
Approx 80% complete for SRR8437479_1.trimmed.fastq.gz
Approx 85% complete for SRR8437479_1.trimmed.fastq.gz
Approx 90% complete for SRR8437479_1.trimmed.fastq.gz
Approx 95% complete for SRR84

Analysis complete for SRR8437479_1.trimmed.fastq.gz
FastQC complete for RNA_seq_workflow/output/2_trimmed_output/SRR8437479_1.trimmed.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Running FastQC on RNA_seq_workflow/output/2_trimmed_output/SRR8437479_2.trimmed.fastq.gz...
application/gzip


Started analysis of SRR8437479_2.trimmed.fastq.gz
Approx 5% complete for SRR8437479_2.trimmed.fastq.gz
Approx 10% complete for SRR8437479_2.trimmed.fastq.gz
Approx 15% complete for SRR8437479_2.trimmed.fastq.gz
Approx 20% complete for SRR8437479_2.trimmed.fastq.gz
Approx 25% complete for SRR8437479_2.trimmed.fastq.gz
Approx 30% complete for SRR8437479_2.trimmed.fastq.gz
Approx 35% complete for SRR8437479_2.trimmed.fastq.gz
Approx 40% complete for SRR8437479_2.trimmed.fastq.gz
Approx 45% complete for SRR8437479_2.trimmed.fastq.gz
Approx 50% complete for SRR8437479_2.trimmed.fastq.gz
Approx 55% complete for SRR8437479_2.trimmed.fastq.gz
Approx 60% complete for SRR8437479_2.trimmed.fastq.gz
Approx 65% complete for SRR8437479_2.trimmed.fastq.gz
Approx 70% complete for SRR8437479_2.trimmed.fastq.gz
Approx 75% complete for SRR8437479_2.trimmed.fastq.gz
Approx 80% complete for SRR8437479_2.trimmed.fastq.gz
Approx 85% complete for SRR8437479_2.trimmed.fastq.gz
Approx 90% complete for SRR843747

Analysis complete for SRR8437479_2.trimmed.fastq.gz
FastQC complete for RNA_seq_workflow/output/2_trimmed_output/SRR8437479_2.trimmed.fastq.gz. Reports saved in RNA_seq_workflow/output/1_initial_qc
Quality control and trimming complete for all samples.


#### Step 3: Index the reference genome and align reads to the reference genome
##### 3.1 Index the reference genome

In [5]:
def index_reference_genome_hisat2(reference_genome, hisat2_index_dir):
    """
    Index the reference genome using HISAT2.

    Parameters:
        reference_genome (str): Path to the reference genome FASTA file.
        hisat2_index_dir (str): Directory to save HISAT2 index files.
    """
    os.makedirs(hisat2_index_dir, exist_ok=True)  # Create index directory if it doesn't exist
    print(f"Indexing reference genome with HISAT2: {reference_genome}...")
    
    # HISAT2 index files will be prefixed with the directory path
    hisat2_index_prefix = os.path.join(hisat2_index_dir, "hisat2_index")
    
    subprocess.run([
        "hisat2-build",
        reference_genome,
        hisat2_index_prefix
    ], check=True)
    
    print(f"Indexing complete. Index saved in: {hisat2_index_dir}")

# Define reference genome and index directory
root_folder = "RNA_seq_workflow"
reference_genome = os.path.join(root_folder, "genome", "Hordeum_vulgare.MorexV3_pseudomolecules_assembly.dna.toplevel.fa")
hisat2_index_dir = os.path.join(root_folder, "hisat2_index")

# Index the reference genome
index_reference_genome_hisat2(reference_genome, hisat2_index_dir)

Indexing reference genome with HISAT2: RNA_seq_workflow/genome/Hordeum_vulgare.MorexV3_pseudomolecules_assembly.dna.toplevel.fa...


Settings:
  Output files: "RNA_seq_workflow/hisat2_index/hisat2_index.*.ht2l"
  Line rate: 7 (line is 128 bytes)
  Lines per side: 1 (side is 128 bytes)
  Offset rate: 4 (one in 16)
  FTable chars: 10
  Strings: unpacked
  Local offset rate: 3 (one in 8)
  Local fTable chars: 6
  Local sequence length: 57344
  Local sequence overlap between two consecutive indexes: 1024
  Endianness: little
  Actual local endianness: little
  Sanity checking: disabled
  Assertions: disabled
  Random seed: 0
  Sizeofs: void*:8, int:4, long:8, size_t:8
Input files DNA, FASTA:
  RNA_seq_workflow/genome/Hordeum_vulgare.MorexV3_pseudomolecules_assembly.dna.toplevel.fa
Reading reference sizes
  Time reading reference sizes: 00:00:16
Calculating joined length
Writing header
Reserving space for joined string
Joining reference sequences
  Time to join reference sequences: 00:00:06
  Time to read SNPs and splice sites: 00:00:00
Using parameters --bmax 792047199 --dcv 1024
  Doing ahead-of-time memory usage test


Building DifferenceCoverSample
  Building sPrime
  Building sPrimeOrder
  V-Sorting samples
  V-Sorting samples time: 00:01:08
  Allocating rank array
  Ranking v-sort output
  Ranking v-sort output time: 00:00:29
  Invoking Larsson-Sadakane on ranks
  Invoking Larsson-Sadakane on ranks time: 00:00:32
  Sanity-checking and returning
Building samples
Reserving space for 12 sample suffixes
Generating random suffixes
QSorting 12 sample offsets, eliminating duplicates
QSorting sample offsets, eliminating duplicates time: 00:00:00
Multikey QSorting 12 samples
  (Using difference cover)
  Multikey QSorting samples time: 00:00:00
Calculating bucket sizes
Splitting and merging
  Splitting and merging time: 00:00:00
Split 1, merged 7; iterating...
Splitting and merging
  Splitting and merging time: 00:00:00
Avg bucket size: 6.03465e+08 (target: 792047198)
Getting block 1 of 7
  Reserving size (792047199) for bucket 1
  Calculating Z arrays for bucket 1
  Entering block accumulator loop for buck

Converting suffix-array elements to index image
Allocating ftab, absorbFtab
Entering GFM loop


  bucket 1: 10%
  bucket 1: 20%
  bucket 1: 30%
  bucket 1: 40%
  bucket 1: 50%
  bucket 1: 60%
  bucket 1: 70%
  bucket 1: 80%
  bucket 1: 90%
  bucket 1: 100%
  Sorting block of length 691135630 for bucket 1
  (Using difference cover)
  Sorting block time: 00:03:18
Returning block of 691135631 for bucket 1
Getting block 2 of 7
  Reserving size (792047199) for bucket 2
  Calculating Z arrays for bucket 2
  Entering block accumulator loop for bucket 2:
  bucket 2: 10%
  bucket 2: 20%
  bucket 2: 30%
  bucket 2: 40%
  bucket 2: 50%
  bucket 2: 60%
  bucket 2: 70%
  bucket 2: 80%
  bucket 2: 90%
  bucket 2: 100%
  Sorting block of length 707380320 for bucket 2
  (Using difference cover)
  Sorting block time: 00:03:24
Returning block of 707380321 for bucket 2
Getting block 3 of 7
  Reserving size (792047199) for bucket 3
  Calculating Z arrays for bucket 3
  Entering block accumulator loop for bucket 3:
  bucket 3: 10%
  bucket 3: 20%
  bucket 3: 30%
  bucket 3: 40%
  bucket 3: 50%
  buck

Exited GFM loop
fchr[A]: 0
fchr[C]: 1171821760
fchr[G]: 2111002342
fchr[T]: 3050122814
fchr[$]: 4224251725
Exiting GFM::buildToDisk()
Returning from initFromVector
Wrote 1416521038 bytes to primary GFM file: RNA_seq_workflow/hisat2_index/hisat2_index.1.ht2l
Wrote 2112125868 bytes to secondary GFM file: RNA_seq_workflow/hisat2_index/hisat2_index.2.ht2l
Re-opening _in1 and _in2 as input streams
Returning from GFM constructor


Indexing complete. Index saved in: RNA_seq_workflow/hisat2_index


Returning from initFromVector
Wrote 1855673013 bytes to primary GFM file: RNA_seq_workflow/hisat2_index/hisat2_index.5.ht2l
Wrote 1075374496 bytes to secondary GFM file: RNA_seq_workflow/hisat2_index/hisat2_index.6.ht2l
Re-opening _in5 and _in5 as input streams
Returning from HGFM constructor
Headers:
    len: 4224251725
    gbwtLen: 4224251726
    nodes: 4224251726
    sz: 1056062932
    gbwtSz: 1056062932
    lineRate: 7
    offRate: 4
    offMask: 0xfffffffffffffff0
    ftabChars: 10
    eftabLen: 0
    eftabSz: 0
    ftabLen: 1048577
    ftabSz: 8388616
    offsLen: 264015733
    offsSz: 2112125864
    lineSz: 128
    sideSz: 128
    sideGbwtSz: 96
    sideGbwtLen: 384
    numSides: 11000656
    numLines: 11000656
    gbwtTotLen: 1408083968
    gbwtTotSz: 1408083968
    reverse: 0
    linearFM: Yes
Total time for call to driver() for forward index: 00:36:11


##### Step 3.2: Align reads to the reference genome using HISAT2 and sort SAM files into BAM files


In [None]:
# Step 3.2: Align reads to the reference genome using HISAT2
def align_reads_hisat2(hisat2_index_base, fastq1, fastq2, aligned_bam_dir, sample_id):
    """
    Align reads using HISAT2 and save as sorted BAM files.

    Parameters:
    - hisat2_index_base: Base name for HISAT2 index.
    - fastq1: Path to the first FASTQ file (read 1).
    - fastq2: Path to the second FASTQ file (read 2).
    - aligned_bam_dir: Directory to save aligned BAM files.
    - sample_id: Identifier for the sample, used in output file naming.

    Returns:
    None
    """
    os.makedirs(aligned_bam_dir, exist_ok=True)  # Create output directory if it doesn't exist
    sam_file = os.path.join(aligned_bam_dir, f"{sample_id}.sam")
    output_bam = os.path.join(aligned_bam_dir, f"{sample_id}.sorted.bam")
    summary_file = os.path.join(aligned_bam_dir, f"{sample_id}_summary.txt")
    
    print(f"Aligning reads with HISAT2 for sample {sample_id}: {fastq1} and {fastq2}...")
    
   # Run HISAT2
    hisat2_cmd = [
        "hisat2",
        "-x", hisat2_index_base,
        "-1", fastq1,
        "-2", fastq2,
        "-p", "8",
        "--summary-file", summary_file,
        "-S", sam_file
    ]
    subprocess.run(hisat2_cmd, check=True)

    # Run SAMTOOLS (converting SAM files to sorted BAM files)
    samtools_cmd = ["samtools", "sort", "-@", "4", "-o", output_bam, sam_file]
    subprocess.run(samtools_cmd, check=True)

    print(f"Alignment complete for sample {sample_id}. BAM file saved at: {output_bam}")
    os.remove(sam_file)  # Clean up intermediate SAM file
    
    #subprocess.run([
        #"hisat2",
        #"-x", hisat2_index_base,
        #"-1", fastq1,
        #"-2", fastq2,
        #"-p", "8",  # Number of threads
        #"--summary-file", os.path.join(aligned_bam_dir, f"{sample_id}_summary.txt"),
        #"|", "samtools", "sort", "-@", "4", "-o", output_bam
    #], shell=True, check=True)
    
    #print(f"Alignment complete for sample {sample_id}. Aligned BAM saved as: {output_bam}")

# Step 2: Align reads
output_folder = os.path.join(root_folder, "output")
aligned_sequences_dir = os.path.join(output_folder, "3_aligned_sequences")
aligned_bam_dir = os.path.join(aligned_sequences_dir, "aligned_bam")
aligned_logs_dir = os.path.join(aligned_logs_dir, "aligned_logs")

# Define samples and their FASTQ files
samples = {
    "SRR8437484": ["output/2_trimmed_output/SRR8437484_1.trimmed.fastq.gz", "output/2_trimmed_output/SRR8437484_2.trimmed.fastq.gz"],  # control 1
    "SRR8437485": ["output/2_trimmed_output/SRR8437485_1.trimmed.fastq.gz", "output/2_trimmed_output/SRR8437485_2.trimmed.fastq.gz"],  # control 2
    "SRR8437482": ["output/2_trimmed_output/SRR8437482_1.trimmed.fastq.gz", "output/2_trimmed_output/SRR8437482_2.trimmed.fastq.gz"],  # control 3
    "SRR8437483": ["output/2_trimmed_output/SRR8437483_1.trimmed.fastq.gz", "output/2_trimmed_output/SRR8437483_2.trimmed.fastq.gz"],  # control 4
    "SRR8437480": ["output/2_trimmed_output/SRR8437480_1.trimmed.fastq.gz", "output/2_trimmed_output/SRR8437480_2.trimmed.fastq.gz"],  # Pst 1
    "SRR8437481": ["output/2_trimmed_output/SRR8437481_1.trimmed.fastq.gz", "output/2_trimmed_output/SRR8437481_2.trimmed.fastq.gz"],  # Pst 2
    "SRR8437478": ["output/2_trimmed_output/SRR8437478_1.trimmed.fastq.gz", "output/2_trimmed_output/SRR8437478_2.trimmed.fastq.gz"],  # Pst 3
    "SRR8437479": ["output/2_trimmed_output/SRR8437479_1.trimmed.fastq.gz", "output/2_trimmed_output/SRR8437479_2.trimmed.fastq.gz"],  # Pst 4
}

# HISAT2 index base path
hisat2_index_dir = os.path.join(root_folder, "hisat2_index")
hisat2_index_base = os.path.join(hisat2_index_dir, "hisat2_index")

# Align each sample using HISAT2
for sample_id, files in samples.items():
    fastq1, fastq2 = [os.path.join(root_folder, file_path) for file_path in files]
    align_reads_hisat2(hisat2_index_base, fastq1, fastq2, aligned_bam_dir, sample_id)

print(f"Alignment complete. Results are saved in: {aligned_bam_dir}")

Aligning reads with HISAT2 for sample SRR8437484: RNA_seq_workflow/output/2_trimmed_output/SRR8437484_1.trimmed.fastq.gz and RNA_seq_workflow/output/2_trimmed_output/SRR8437484_2.trimmed.fastq.gz...


20851928 reads; of these:
  20851928 (100.00%) were paired; of these:
    1980122 (9.50%) aligned concordantly 0 times
    17803690 (85.38%) aligned concordantly exactly 1 time
    1068116 (5.12%) aligned concordantly >1 times
    ----
    1980122 pairs aligned concordantly 0 times; of these:
      111565 (5.63%) aligned discordantly 1 time
    ----
    1868557 pairs aligned 0 times concordantly or discordantly; of these:
      3737114 mates make up the pairs; of these:
        2406937 (64.41%) aligned 0 times
        1209045 (32.35%) aligned exactly 1 time
        121132 (3.24%) aligned >1 times
94.23% overall alignment rate
[bam_sort_core] merging from 5 files and 4 in-memory blocks...


Alignment complete for sample SRR8437484. BAM file saved at: RNA_seq_workflow/output/3_aligned_sequences/aligned_bam/SRR8437484.sorted.bam
Aligning reads with HISAT2 for sample SRR8437485: RNA_seq_workflow/output/2_trimmed_output/SRR8437485_1.trimmed.fastq.gz and RNA_seq_workflow/output/2_trimmed_output/SRR8437485_2.trimmed.fastq.gz...


20199517 reads; of these:
  20199517 (100.00%) were paired; of these:
    2570192 (12.72%) aligned concordantly 0 times
    16838011 (83.36%) aligned concordantly exactly 1 time
    791314 (3.92%) aligned concordantly >1 times
    ----
    2570192 pairs aligned concordantly 0 times; of these:
      120575 (4.69%) aligned discordantly 1 time
    ----
    2449617 pairs aligned 0 times concordantly or discordantly; of these:
      4899234 mates make up the pairs; of these:
        3534113 (72.14%) aligned 0 times
        1292765 (26.39%) aligned exactly 1 time
        72356 (1.48%) aligned >1 times
91.25% overall alignment rate
[bam_sort_core] merging from 5 files and 4 in-memory blocks...


Alignment complete for sample SRR8437485. BAM file saved at: RNA_seq_workflow/output/3_aligned_sequences/aligned_bam/SRR8437485.sorted.bam
Aligning reads with HISAT2 for sample SRR8437482: RNA_seq_workflow/output/2_trimmed_output/SRR8437482_1.trimmed.fastq.gz and RNA_seq_workflow/output/2_trimmed_output/SRR8437482_2.trimmed.fastq.gz...


25824523 reads; of these:
  25824523 (100.00%) were paired; of these:
    2435790 (9.43%) aligned concordantly 0 times
    21827781 (84.52%) aligned concordantly exactly 1 time
    1560952 (6.04%) aligned concordantly >1 times
    ----
    2435790 pairs aligned concordantly 0 times; of these:
      156385 (6.42%) aligned discordantly 1 time
    ----
    2279405 pairs aligned 0 times concordantly or discordantly; of these:
      4558810 mates make up the pairs; of these:
        2675687 (58.69%) aligned 0 times
        1759921 (38.60%) aligned exactly 1 time
        123202 (2.70%) aligned >1 times
94.82% overall alignment rate
[bam_sort_core] merging from 6 files and 4 in-memory blocks...


Alignment complete for sample SRR8437482. BAM file saved at: RNA_seq_workflow/output/3_aligned_sequences/aligned_bam/SRR8437482.sorted.bam
Aligning reads with HISAT2 for sample SRR8437483: RNA_seq_workflow/output/2_trimmed_output/SRR8437483_1.trimmed.fastq.gz and RNA_seq_workflow/output/2_trimmed_output/SRR8437483_2.trimmed.fastq.gz...


19203613 reads; of these:
  19203613 (100.00%) were paired; of these:
    1919614 (10.00%) aligned concordantly 0 times
    16230636 (84.52%) aligned concordantly exactly 1 time
    1053363 (5.49%) aligned concordantly >1 times
    ----
    1919614 pairs aligned concordantly 0 times; of these:
      110289 (5.75%) aligned discordantly 1 time
    ----
    1809325 pairs aligned 0 times concordantly or discordantly; of these:
      3618650 mates make up the pairs; of these:
        2203110 (60.88%) aligned 0 times
        1304755 (36.06%) aligned exactly 1 time
        110785 (3.06%) aligned >1 times
94.26% overall alignment rate
[bam_sort_core] merging from 5 files and 4 in-memory blocks...


Alignment complete for sample SRR8437483. BAM file saved at: RNA_seq_workflow/output/3_aligned_sequences/aligned_bam/SRR8437483.sorted.bam
Aligning reads with HISAT2 for sample SRR8437480: RNA_seq_workflow/output/2_trimmed_output/SRR8437480_1.trimmed.fastq.gz and RNA_seq_workflow/output/2_trimmed_output/SRR8437480_2.trimmed.fastq.gz...


18380335 reads; of these:
  18380335 (100.00%) were paired; of these:
    1917840 (10.43%) aligned concordantly 0 times
    15617799 (84.97%) aligned concordantly exactly 1 time
    844696 (4.60%) aligned concordantly >1 times
    ----
    1917840 pairs aligned concordantly 0 times; of these:
      130963 (6.83%) aligned discordantly 1 time
    ----
    1786877 pairs aligned 0 times concordantly or discordantly; of these:
      3573754 mates make up the pairs; of these:
        2152505 (60.23%) aligned 0 times
        1339146 (37.47%) aligned exactly 1 time
        82103 (2.30%) aligned >1 times
94.14% overall alignment rate
[bam_sort_core] merging from 4 files and 4 in-memory blocks...


Alignment complete for sample SRR8437480. BAM file saved at: RNA_seq_workflow/output/3_aligned_sequences/aligned_bam/SRR8437480.sorted.bam
Aligning reads with HISAT2 for sample SRR8437481: RNA_seq_workflow/output/2_trimmed_output/SRR8437481_1.trimmed.fastq.gz and RNA_seq_workflow/output/2_trimmed_output/SRR8437481_2.trimmed.fastq.gz...


25544038 reads; of these:
  25544038 (100.00%) were paired; of these:
    6569077 (25.72%) aligned concordantly 0 times
    18200941 (71.25%) aligned concordantly exactly 1 time
    774020 (3.03%) aligned concordantly >1 times
    ----
    6569077 pairs aligned concordantly 0 times; of these:
      94766 (1.44%) aligned discordantly 1 time
    ----
    6474311 pairs aligned 0 times concordantly or discordantly; of these:
      12948622 mates make up the pairs; of these:
        11848666 (91.51%) aligned 0 times
        1044342 (8.07%) aligned exactly 1 time
        55614 (0.43%) aligned >1 times
76.81% overall alignment rate
[bam_sort_core] merging from 6 files and 4 in-memory blocks...


Alignment complete for sample SRR8437481. BAM file saved at: RNA_seq_workflow/output/3_aligned_sequences/aligned_bam/SRR8437481.sorted.bam
Aligning reads with HISAT2 for sample SRR8437478: RNA_seq_workflow/output/2_trimmed_output/SRR8437478_1.trimmed.fastq.gz and RNA_seq_workflow/output/2_trimmed_output/SRR8437478_2.trimmed.fastq.gz...


20140139 reads; of these:
  20140139 (100.00%) were paired; of these:
    2096939 (10.41%) aligned concordantly 0 times
    17212149 (85.46%) aligned concordantly exactly 1 time
    831051 (4.13%) aligned concordantly >1 times
    ----
    2096939 pairs aligned concordantly 0 times; of these:
      144561 (6.89%) aligned discordantly 1 time
    ----
    1952378 pairs aligned 0 times concordantly or discordantly; of these:
      3904756 mates make up the pairs; of these:
        2363861 (60.54%) aligned 0 times
        1454705 (37.25%) aligned exactly 1 time
        86190 (2.21%) aligned >1 times
94.13% overall alignment rate
[bam_sort_core] merging from 5 files and 4 in-memory blocks...


Alignment complete for sample SRR8437478. BAM file saved at: RNA_seq_workflow/output/3_aligned_sequences/aligned_bam/SRR8437478.sorted.bam
Aligning reads with HISAT2 for sample SRR8437479: RNA_seq_workflow/output/2_trimmed_output/SRR8437479_1.trimmed.fastq.gz and RNA_seq_workflow/output/2_trimmed_output/SRR8437479_2.trimmed.fastq.gz...


23016090 reads; of these:
  23016090 (100.00%) were paired; of these:
    2523936 (10.97%) aligned concordantly 0 times
    19599384 (85.16%) aligned concordantly exactly 1 time
    892770 (3.88%) aligned concordantly >1 times
    ----
    2523936 pairs aligned concordantly 0 times; of these:
      191235 (7.58%) aligned discordantly 1 time
    ----
    2332701 pairs aligned 0 times concordantly or discordantly; of these:
      4665402 mates make up the pairs; of these:
        2799421 (60.00%) aligned 0 times
        1765452 (37.84%) aligned exactly 1 time
        100529 (2.15%) aligned >1 times
93.92% overall alignment rate
[bam_sort_core] merging from 5 files and 4 in-memory blocks...


Alignment complete for sample SRR8437479. BAM file saved at: RNA_seq_workflow/output/3_aligned_sequences/aligned_bam/SRR8437479.sorted.bam
Alignment complete. Results are saved in: RNA_seq_workflow/output/3_aligned_sequences/aligned_bam


#### Step 4. Perform gene quantification

In [20]:
# Perform gene quantification with FeatureCounts
def run_feature_counts(annotation_file, bam_files, output_file):
    print(f"Running FeatureCounts on BAM files...")
    subprocess.run([
        "featureCounts",
        "-T", "8",  # Number of threads
        "-a", annotation_file,  # Annotation file (GTF/GFF)
        "-o", output_file,  # Output file
        "-t", "gene", "-g", "gene_id",  # Count at the gene level by gene ID,
        "-p" # Specify paired-end reads
    ] + bam_files, check=True)
    print(f"Gene counts saved at: {output_file}")

# Annotation file
annotation_file = os.path.join(annotation_dir, "Hordeum_vulgare.MorexV3_pseudomolecules_assembly.60.chr.gff3")

# Output file for counts
gene_counts_file = os.path.join(final_counts_dir, "gene_counts.txt")

# List of BAM files for quantification
sorted_bam_files = [
    os.path.join(aligned_bam_dir, f"{sample_id}.sorted.bam") for sample_id in samples.keys()
]

# Run FeatureCounts
run_feature_counts(annotation_file, sorted_bam_files, gene_counts_file)

Running FeatureCounts on BAM files...



        =====         / ____| |  | |  _ \|  __ \|  ____|   /\   |  __ \ 
          =====      | (___ | |  | | |_) | |__) | |__     /  \  | |  | |
            ====      \___ \| |  | |  _ <|  _  /|  __|   / /\ \ | |  | |
              ====    ____) | |__| | |_) | | \ \| |____ / ____ \| |__| |
	  v2.0.6

||                                                                            ||
||             Input files : 8 BAM files                                      ||
||                                                                            ||
||                           SRR8437484.sorted.bam                            ||
||                           SRR8437485.sorted.bam                            ||
||                           SRR8437482.sorted.bam                            ||
||                           SRR8437483.sorted.bam                            ||
||                           SRR8437480.sorted.bam                            ||
||                           SRR8437481.sorted.b

Gene counts saved at: RNA_seq_workflow/output/4_final_counts/gene_counts.txt


||    Paired-end reads are included.                                          ||
||    The reads are assigned on the single-end mode.                          ||
||    Total alignments : 50241298                                             ||
||    Successfully assigned alignments : 33457556 (66.6%)                     ||
||    Running time : 0.08 minutes                                             ||
||                                                                            ||
|| Write the final count table.                                               ||
|| Write the read assignment summary.                                         ||
||                                                                            ||
|| Summary of counting results can be found in file "RNA_seq_workflow/output  ||
|| /4_final_counts/gene_counts.txt.summary"                                   ||
||                                                                            ||



Further analysis (Differential gene expression analysis) is performed in R