# QC

In [None]:
%%bash

#PAIRED READS QC

mkdir 3_qc
mkdir 4_merged_fastq
mkdir 5_merged_fasta

libraries=$(ls -1 2_demultiplexed)
for library in $libraries
do
    mkdir 3_qc/${library}/
    mkdir 4_merged_fastq/${library}/
    mkdir 5_merged_fasta/${library}/
    files=$(ls -1 2_demultiplexed/${library} | grep -v invalid | grep .R1. | sed 's/.R1.f.*//')
    for sample in $files
    do
        fastp -i 2_demultiplexed/${library}/${sample}.R1.fastq -I 2_demultiplexed/${library}/${sample}.R2.fastq \
        -o 3_qc/${library}/${sample}.R1.fastq -O 3_qc/${library}/${sample}.R2.fastq \
        --unpaired1 3_qc/${library}/${sample}_unpaired.R1.fastq \
        --unpaired2 3_qc/${library}/${sample}_unpaired.R2.fastq \
        --failed_out 3_qc/${library}/${sample}_failed.fastq \
        -q 30 \
        --cut_tail \
        --trim_front1 20 \
        --trim_front2 20 \
        --max_len1 106 \
        --max_len2 106 \
        -l 90 \
        --merge \
        --overlap_len_require 90 \
        --correction \
        --merged_out 4_merged_fastq/${library}/${sample}_merged.fastq \
        -w 6 \
        -h ./fastp_out/${sample}_fastp.html \
        -j ./fastp_out/${sample}_fastp.json
        cat 3_qc/${library}/${sample}.R1.fastq >> 4_merged_fastq/${library}/${sample}_merged.fastq
        cat 3_qc/${library}/${sample}_unpaired.R1.fastq >> 4_merged_fastq/${library}/${sample}_merged.fastq
        seqkit fq2fa 4_merged_fastq/${library}/${sample}_merged.fastq -o 5_merged_fasta/${library}/${sample}_merged.fasta
    done
done


# derep - denoise - chimera - rerep

In [None]:
%%bash

#DEREPLICATE, DENOISE, CHIMERA DETECTION AND REREPLICATE

mkdir 6_denoise_uc/
mkdir 6_denoise_uc/derep/
mkdir 6_denoise_uc/denoise/
mkdir 6_denoise_uc/uchime/
mkdir 6_denoise_uc/rerep/

libraries=$(ls -1 5_merged_fasta/)
for library in $libraries
do
    mkdir 6_denoise_uc/derep/${library}/
    mkdir 6_denoise_uc/denoise/${library}/
    mkdir 6_denoise_uc/uchime/${library}/
    mkdir 6_denoise_uc/rerep/${library}/
    
    files=$(ls -1 5_merged_fasta/${library} | grep -v invalid | grep _merged.fasta | sed 's/_merged.fasta*//')
    for sample in $files
    do
        
        vsearch --derep_fulllength 5_merged_fasta/${library}/${sample}_merged.fasta --sizeout \
        --output 6_denoise_uc/derep/${library}/${sample}_derep.fasta --minuniquesize 3 #--uc stuff.txt
        
        vsearch --cluster_unoise 6_denoise_uc/derep/${library}/${sample}_derep.fasta --minsize 3 \
        --unoise_alpha 0.5 --centroids 6_denoise_uc/denoise/${library}/${sample}_denoise.fasta #--uc stuff.txt
        
        vsearch --uchime_ref 6_denoise_uc/denoise/${library}/${sample}_denoise.fasta --db blast_db/12s_full.fasta \
        --chimeras 6_denoise_uc/uchime/${library}/${sample}_chimera.fasta \
        --borderline 6_denoise_uc/uchime/${library}/${sample}_chimera.fasta \
        --mindiffs 1 --mindiv 0.8 --nonchimeras 6_denoise_uc/uchime/${library}/${sample}_nc.fasta #--uc stuff.txt
        
        vsearch --rereplicate 6_denoise_uc/uchime/${library}/${sample}_nc.fasta \
        --output 6_denoise_uc/rerep/${library}/${sample}_rerep.fasta #--uc stuff.txt
        
    done
done


# Blast

In [None]:
%%bash

mkdir blast_out

blastn -query 6_denoise_uc/uchime/EA01/BLEL01_uc.fasta \
-db blast_db/12s_full \
-out blast_out/BLEL01_blast.tsv \
-perc_identity 100 \
-outfmt '6 qseqid stitle sacc staxids pident qcovs evalue bitscore' \
-num_alignments 20


# Taxonomy to Blast

In [None]:
%%bash

python tax_to_blast.py -i blast_out/BLEL04_blast.tsv -o blast_out/BLEL04_tax.tsv -lin new_taxdump/rankedlineage.dmp

# LCA

In [None]:
%%bash

mkdir lca_out

python mlca.py -i blast_out/BLEL04_tax.tsv -o lca_out/BLEL04_lca3.tsv -b 10 -id 100 -cov 60 -m 100 -hits 1