In [1]:
import subprocess
import make_plots
import data_prep_stack_barplots as prep
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from qiime2.plugins import feature_table
from qiime2 import Artifact

In [3]:
def auto_qiime(directory,trimlength):
    """function to run auto qiime2 bash script, outputs data in appropriate form to work with for plotting"""
    subprocess.run(['bash','-c','bash auto_qiime.sh '+directory+' '+trimlength])

"""takes inputs of directory of .fastq files, trim length, and sampling depth for running the auto qiime script and creating quality plots"""
directory=input('Directory of .fastq files:')
trimlength=input('Sequencing trim length:')
if trimlength.isdigit():
    pass
else:
    raise TypeError('trim length input must be a positive integer')

samp_depth = input("Desired sampling depth:")
if samp_depth.isdigit():
    samp_depth = int(samp_depth)
else:
    raise TypeError('sampling depth must be a positive integer')

"""runs auto_qiime function"""
auto_qiime(directory,trimlength)

"""read in newly created taxonomy data file to pandas"""
taxonomy = pd.read_csv("data/taxonomy.tsv", sep='\t')
taxonomy[['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species']] = taxonomy['Taxon'].str.split(';', expand=True)
taxonomy.set_index('Feature ID', inplace=True)
taxonomy.shape

"""reads in table.qza file from qiime2 into DataFrame"""
unrarefied_table = Artifact.load('table.qza')
rarefy_result = feature_table.methods.rarefy(table=unrarefied_table, sampling_depth=samp_depth)
rarefied_table = rarefy_result.rarefied_table
table = rarefied_table.view(pd.DataFrame)

# add in any other data structures that need to be read in


"""pre process data for relative abundance stacked bar plots"""
kingdom_df, phylum_df, class_df, order_df, family_df, genus_df, species_df = prep.prepare_data_stacked_barplots(table, taxonomy)

"""create plotly figures"""
king_plot = make_plots.plotly_stacked_barplot(kingdom_df, 'Kingdom Relative Abundances')
phy_plot = make_plots.plotly_stacked_barplot(phylum_df, 'Phylum Relative Abundances')
class_plot = make_plots.plotly_stacked_barplot(class_df, 'Class Relative Abundances')
ord_plot = make_plots.plotly_stacked_barplot(order_df, 'Order Relative Abundances')
fam_plot = make_plots.plotly_stacked_barplot(family_df, 'Family Relative Abundances')
gen_plot = make_plots.plotly_stacked_barplot(genus_df, 'Genus Relative Abundances')
spec_plot = make_plots.plotly_stacked_barplot(species_df, 'Species Relative Abundances')

qual_plot = make_plots.plot_qualities(directory, samp_depth)

Directory of .fastq files:emp-single-end-sequences
Sequencing trim length:120
Desired sampling depth:100


(770, 9)

In [6]:
"""reads in table.qza file from qiime2 into DataFrame"""
unrarefied_table = Artifact.load('outputs/table.qza')
rarefy_result = feature_table.methods.rarefy(table=unrarefied_table, sampling_depth=samp_depth)
rarefied_table = rarefy_result.rarefied_table
tabl = rarefied_table.view(pd.DataFrame)


In [7]:
# """read in newly created taxonomy data file to pandas"""
# taxonomy = pd.read_csv("data/taxonomy.tsv", sep='\t')
# taxonomy[['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species']] = taxonomy['Taxon'].str.split(';', expand=True)
# taxonomy.set_index('Feature ID', inplace=True)

# """reads in table.qza file from qiime2 into DataFrame"""
# unrarefied_table = Artifact.load('table.qza')
# rarefy_result = feature_table.methods.rarefy(table=unrarefied_table, sampling_depth=samp_depth)
# rarefied_table = rarefy_result.rarefied_table
# tabl = rarefied_table.view(pd.DataFrame)

# # add in any other data structures that need to be read in


"""pre process data for relative abundance stacked bar plots"""
kingdom_df, phylum_df, class_df, order_df, family_df, genus_df, species_df = prep.prepare_data_stacked_barplots(tabl, taxonomy)

"""create plotly figures"""
king_plot = make_plots.plotly_stacked_barplot(kingdom_df, 'Kingdom Relative Abundances')
phy_plot = make_plots.plotly_stacked_barplot(phylum_df, 'Phylum Relative Abundances')
class_plot = make_plots.plotly_stacked_barplot(class_df, 'Class Relative Abundances')
ord_plot = make_plots.plotly_stacked_barplot(order_df, 'Order Relative Abundances')
fam_plot = make_plots.plotly_stacked_barplot(family_df, 'Family Relative Abundances')
gen_plot = make_plots.plotly_stacked_barplot(genus_df, 'Genus Relative Abundances')
spec_plot = make_plots.plotly_stacked_barplot(species_df, 'Species Relative Abundances')

qual_plot = make_plots.plot_qualities(directory, samp_depth)

In [10]:
qual_plot.show()

In [36]:
tax2 = pd.read_csv("872a0592-bb99-4296-941e-37dbbdff0d7a/data/taxonomy.tsv", sep='\t')
tax2[['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species']] = tax2['Taxon'].str.split(';', expand=True)
tax2.set_index('Feature ID', inplace=True)
tax2.head()

Unnamed: 0_level_0,Taxon,Confidence,kingdom,phylum,class,order,family,genus,species
Feature ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
4b5eeb300368260019c1fbc7a3c718fc,k__Bacteria; p__Bacteroidetes; c__Bacteroidia;...,0.991607,k__Bacteria,p__Bacteroidetes,c__Bacteroidia,o__Bacteroidales,f__Bacteroidaceae,g__Bacteroides,s__
fe30ff0f71a38a39cf1717ec2be3a2fc,k__Bacteria; p__Proteobacteria; c__Betaproteob...,0.982863,k__Bacteria,p__Proteobacteria,c__Betaproteobacteria,o__Neisseriales,f__Neisseriaceae,g__Neisseria,
d29fe3c70564fc0f69f2c03e0d1e5561,k__Bacteria; p__Firmicutes; c__Bacilli; o__Lac...,1.0,k__Bacteria,p__Firmicutes,c__Bacilli,o__Lactobacillales,f__Streptococcaceae,g__Streptococcus,
868528ca947bc57b69ffdf83e6b73bae,k__Bacteria; p__Bacteroidetes; c__Bacteroidia;...,0.985898,k__Bacteria,p__Bacteroidetes,c__Bacteroidia,o__Bacteroidales,f__Bacteroidaceae,g__Bacteroides,s__
154709e160e8cada6bfb21115acc80f5,k__Bacteria; p__Bacteroidetes; c__Bacteroidia;...,1.0,k__Bacteria,p__Bacteroidetes,c__Bacteroidia,o__Bacteroidales,f__Bacteroidaceae,g__Bacteroides,
