In [1]:
import os
import subprocess
import qiime2
from qiime2.plugins.dada2.methods import denoise_paired
from qiime2.plugins.feature_table.methods import merge
from qiime2.plugins.feature_classifier.methods import classify_sklearn

# Set up paths for the input and output directories
input_path = '/FASTQ/demultiplexed_run_DI18R24/'
filtered_path = os.path.join(input_path, 'filtered')
os.makedirs(filtered_path, exist_ok=True)

# Set seed for reproducibility
np.random.seed(531)

# List all the FASTQ files and separate forward/reverse reads
fastq_files = sorted([f for f in os.listdir(input_path) if f.endswith('.fq.gz')])
fnFs = [f for f in fastq_files if '.1.fq.gz' in f]
fnRs = [f for f in fastq_files if '.2.fq.gz' in f]
sample_names = [os.path.splitext(f)[0].split('.1')[0] for f in fnFs]

# Prepare file paths for filtered outputs
filtFs = [os.path.join(filtered_path, f"{sample}_F_filt.fastq.gz") for sample in sample_names]
filtRs = [os.path.join(filtered_path, f"{sample}_R_filt.fastq.gz") for sample in sample_names]

# Quality filtering and trimming with DADA2 through QIIME 2
# Parameters similar to filterAndTrim in DADA2
denoise_results = denoise_paired(
    demultiplexed_seqs=input_path,
    trunc_len_f=130,
    trunc_len_r=200,
    trim_left_f=30,
    trim_left_r=30,
    max_ee=(2, 2),
    trunc_q=11,
    chimera_method="consensus",
    n_threads=0
)

# Filter out samples with fewer than 50 reads
df_out = denoise_results.table.view(pd.DataFrame)
filtered_samples = df_out[df_out['read_count'] > 50]
filtered_sample_names = filtered_samples.index.tolist()

# Error model building and dereplication
# This step is integrated into the `denoise_paired` function in QIIME 2 DADA2 plugin.

# Save individual sequence tables for each sequencing run
denoise_results.table.save("DI18R24_seqtab.qza")

# Additional sequencing runs
# Repeat the above denoise_paired steps for each sequencing run
# Save each run’s table with different filenames, e.g., DI18R36_seqtab.qza, etc.

# Merging sequence tables across runs
seqtab_paths = ["DI18R24_seqtab.qza", "DI18R36_seqtab.qza", "DI18R37_seqtab.qza", 
                "DI19R04_seqtab.qza", "DI19R05_seqtab.qza", "DI19R06_seqtab.qza"]
seqtables = [qiime2.Artifact.load(path) for path in seqtab_paths]
merged_seqtab = merge(*seqtables)
merged_seqtab.table.save("seqtab_all.qza")

# Chimera removal (already done if using chimera_method="consensus" in `denoise_paired`)
# Save sequence table without chimeras
merged_seqtab_no_chimeras = denoise_results.table
merged_seqtab_no_chimeras.save("seqtab_all_no_chimeras.qza")

# Taxonomic assignment using a pre-trained SILVA classifier in QIIME 2
classifier = qiime2.Artifact.load("silva_nr99_v138_1_classifier.qza")
taxonomic_results = classify_sklearn(reads=merged_seqtab_no_chimeras, classifier=classifier)
taxonomic_results.classification.save("seqtab_all_no_chimeras_tax_SLV138.1.qza")

# Export sequence table and taxonomy to view results
subprocess.run(["qiime", "tools", "export", "--input-path", "seqtab_all_no_chimeras_tax_SLV138.1.qza", "--output-path", "exported_taxonomy"])

# Convert exported taxonomy and table into readable format (optional)
# qiime2 metadata tabulate -- Can help view table as a metadata file
subprocess.run(["qiime", "tools", "view", "exported_taxonomy/taxonomy.tsv"])


ModuleNotFoundError: No module named 'qiime2'