In [None]:
# Step 0, activate the software Qiime

source activate qiime1

In [None]:
# Step 1, convert fastq file to fasta file together with quality score

convert_fastaqual_fastq.py -c fastq_to_fastaqual -f SRR1778214.fastq -o fastaqual/

# Output: SRR1778214.fna (fasta format), SRR1778214.qual (quality score)

In [None]:
# Step 2: check the quality of the sequence file

quality_scores_plot.py -q SRR1778214 -o quality_histogram/

# Output: quality_scores_plot.pdf (histogram of the quality score)

# Truncate the sequence to 250bp
truncate_fasta_qual_files.py -f SRR1778214.fna -q SRR1778214.qual -b 250 -o filtered250

# However, after examining the quality score histogram, we determined that 
# the quality of sequence file is decent and truncation is likely to be processed prior to
# our downloading of the data

In [None]:
# Step 3: Generate the map file 

# Step 3.1: Generate the barcode
extract_barcodes.py -f SRR1778214.fastq -c barcode_single_end --bc1_len 12 -o processed_seqs
# Output: barcodes.fastq (fastq file containing all the barcodes)

# Convert the barcode fastq file to fasta file
convert_fastaqual_fastq.py -c fastq_to_fastaqual -f barcodes.fastq -o barcodes/
# Output: barcodes.fna (barcodes in fasta format)

# The map file supported by Qiime is a tab-delimited txt file.
# Excel is used to generate the map file.  
# Duplicated barcodes will yield error messages on Qiime thus duplicated barcodes were removed

# Output: map_no_dup.txt

# Check the if the map file is valide
validate_mapping_file.py -m     map_no_dup.txt -o validate_mapping_file_output

# No error messages yieled, continue on with the map file

In [None]:
# Step 4: Split the file based on barcodes
split_libraries.py -m map_no_dup.txt -f SRR1778214.fna -q SRR1778214.qual -o split_output

# Ideally, the next step should use the split sequence file. 
# After checking the output, no split sequence file was generated, thus we drew the conclusion that
# it is okay to continue the analysis using the original file.

In [None]:
# Step 5: OTU picking
pick_otus.py -i SRR1778214.fna -o OTU_result

# Output: SRR1778214_clusters.uc (tab-delimited file containing the OTU information)

In [None]:
# Step 6: Taxaonomy assignment
assign_taxonomy.py -i SRR1778214.fna -o taxo_output

# Output: SRR1778214_tax_assignments.txt (table of the taxaonomy)

In [None]:
# Step 7: Generate the OTU table:
make_otu_table.py -i SRR1778214_otus.txt -t SRR1778214_tax_assignments.txt -o otu_table.biom
summarize_taxa.py -i otu_table.biom -o tax_summary

# Output: otu_table_L2-L6 

In [None]:
# Step 8: plotting using ggplot2 in R

# The OTU tables were opened and processed via Excel
# File : taxa_assignment_cleaned.csv

# Following the R script taxa_plot.R to plot the OTU table.