# Time Series: Microbiome and RNASeq data
   
    1. For each time point, find classifications where its z-score deviates from all other time points
    2. Treat each time point as the last time point, find linear trends
    3. Do changepoint analysis based on differences between population and sliding window.

In [178]:
import sys

# User Libraries
import tanner.stats.timeseries as ts
import tanner.stats.helpers as shelp
import tanner.analysis.rnaseq as rs
import tanner.analysis.microbiome as mb
import tanner.visual.timeseries as vts
import tanner.analysis.helpers as ah

# Python Libraries
import pandas as pd
from pandas import ExcelWriter
import os 
import seaborn as sns
import scipy.stats as stats

# Ipython Configuration
%pylab inline
%load_ext autoreload
%autoreload 2


Populating the interactive namespace from numpy and matplotlib
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [198]:
# Data and analysis paths
microbiome_path = "/mounts/tscc/projects/Li-Fraumeni/data/family3/microbiome/14009b/aggregated/"
rnaseq_path = "/mounts/tscc/projects/Li-Fraumeni/data/family3/rna-seq/updated_runs/11292015_Tanner_RNASeq/"
analysis_path = "/mounts/tscc/projects/Li-Fraumeni/analysis/feb_2016_spreadsheets"

In [226]:
rnaseq_data = rs.load_deseq(rnaseq_path, individual='002')

In [227]:
rnaseq_ts = ah.create_timeseries_df(rnaseq_data, pvalue=0.001)
rnaseq_data = rnaseq_data.T

In [228]:
rnaseq_data.columns = rnaseq_data.columns.map(lambda x: x.strftime('%Y-%m-%d'))
rnaseq_df = pd.concat([rnaseq_data, rnaseq_ts], axis=1)
rnaseq_df = rnaseq_df.sort(['outliers', 'changepoints', 'linear-trend', 'linear-pvalue'], ascending=[True, True, False, True])

In [230]:
rnaseq_df.tail(50)

Unnamed: 0_level_0,2014-09-25,2014-10-29,2014-11-25,2014-12-19,2015-02-02,2015-03-17,2015-04-06,2015-05-11,2015-06-10,2015-07-02,2015-08-10,zscores,outliers,changepoints,linear-trend,linear-pvalue
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
CT476828.1,0,0,0,0,0,0,0,0,0,0,0,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0",,,False,1
CT476828.2,0,0,0,0,0,0,0,0,0,0,0,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0",,,False,1
CT476828.10,0,0,0,0,0,0,0,0,0,0,0,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0",,,False,1
CT476828.13,0,0,0,0,0,0,0,0,0,0,0,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0",,,False,1
CT476828.8,0,0,0,0,0,0,0,0,0,0,0,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0",,,False,1
CT476828.11,0,0,0,0,0,0,0,0,0,0,0,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0",,,False,1
CT476828.4,0,0,0,0,0,0,0,0,0,0,0,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0",,,False,1
CT476828.7,0,0,0,0,0,0,0,0,0,0,0,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0",,,False,1
CT476828.6,0,0,0,0,0,0,0,0,0,0,0,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0",,,False,1
CT476828.15,0,0,0,0,0,0,0,0,0,0,0,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0",,,False,1


In [210]:
microbiome_data = mb.load_aggregated(os.path.join(microbiome_path, 'Bacteria-species-abundance.txt'))

In [211]:
microbiome_ts = ah.create_timeseries_df(microbiome_data, pvalue=0.005)
microbiome_data = microbiome_data.T
microbiome_data.columns = microbiome_data.columns.map(lambda x: x.strftime('%Y-%m-%d'))

In [219]:
microbiome_df = pd.concat([microbiome_data, microbiome_ts], axis=1)
microbiome_df = microbiome_df.sort(['outliers', 'changepoints', 'linear-trend', 'linear-pvalue'], ascending=[True, True, False, True])

In [220]:
microbiome_df.head(50)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,2014-07-12,2014-08-08,2014-08-20,2014-09-16,2014-09-24,2014-10-03,2014-10-28,2014-11-13,2014-11-24,2014-12-13,2014-12-18,zscores,outliers,changepoints,linear-trend,linear-pvalue
name,family,class,phylum,genus,order,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Dehalobacter_sp_UNSWDHB,Peptococcaceae,Clostridia,Firmicutes,Dehalobacter,Clostridiales,0.0002840101,4.514938e-05,4.407837e-05,3.997605e-05,3.131942e-05,0.000109921,6.357787e-05,7.138139e-06,1.885671e-05,2.166934e-05,3.918445e-05,"2.96334308337,-0.255072813419,-0.269503579397,...",2014-07-12,,False,0.044705
Haloferax_volcanii,Halobacteriaceae,Halobacteria,Euryarchaeota,Haloferax,Halobacteriales,7.608749e-09,7.377447e-10,0.0,3.176695e-09,2.592304e-11,3.924689e-09,4.765128e-11,0.0,2.978934e-12,4.953175e-13,3.427278e-13,"2.60992416509,-0.283694643856,-0.594384586909,...",2014-07-12,,False,0.045359
Leptospira_sp_serovar_Kenya,Leptospiraceae,Spirochaetia,Spirochaetes,Leptospira,Spirochaetales,3.363272e-08,2.636515e-10,3.387418e-09,3.971041e-10,2.877407e-10,2.224558e-13,0.0,2.848647e-13,5.463573e-18,1.577877e-16,1.044738e-13,"3.14676384476,-0.332401235837,-0.0067074905007...",2014-07-12,,False,0.053671
Pediococcus_claussenii,Lactobacillaceae,Bacilli,Firmicutes,Pediococcus,Lactobacillales,9.941818e-07,5.179631e-14,3.318851e-07,4.058369e-07,1.818402e-07,1.026951e-07,1.358797e-26,0.0,2.711141e-21,2.559518e-07,1.134132e-07,"2.7674274505,-0.772208966678,0.409418272755,0....",2014-07-12,,False,0.058594
Lactobacillus_florum,Lactobacillaceae,Bacilli,Firmicutes,Lactobacillus,Lactobacillales,3.624001e-07,1.4175030000000002e-23,3.324731e-14,1.677149e-07,1.173905e-19,4.362614e-16,2.577899e-13,1.101734e-29,3.994276e-14,1.613548e-11,2.370456e-17,"2.8477363414,-0.436793048811,-0.436792747481,1...",2014-07-12,,False,0.059686
Leptospira_kirschneri,Leptospiraceae,Spirochaetia,Spirochaetes,Leptospira,Spirochaetales,6.687385e-07,1.077797e-08,3.925766e-08,3.086546e-07,4.262587e-08,2.144315e-07,1.142015e-10,6.187677e-09,2.12635e-16,1.182908e-07,2.352747e-09,"2.74983417024,-0.598044220856,-0.453131973635,...",2014-07-12,,False,0.068279
Streptomyces_sp_SirexAA_E,Streptomycetaceae,Actinobacteria,Actinobacteria,Streptomyces,Actinomycetales,8.165761e-07,5.932986e-08,2.02212e-07,1.445061e-07,1.331417e-07,2.853291e-07,4.938031e-08,1.945561e-07,3.582312e-08,8.53805e-08,1.581472e-07,"2.96950546899,-0.658432135127,0.0261105207196,...",2014-07-12,,False,0.069007
Natrinema_versiforme,Halobacteriaceae,Halobacteria,Euryarchaeota,Natrinema,Halobacteriales,4.018835e-07,2.880279e-23,1.543429e-07,1.549291e-07,8.417104e-08,1.522243e-07,0.0,0.0,0.0,9.511606e-08,9.700956e-08,"2.64420696386,-0.918470593154,0.449772178116,0...",2014-07-12,,False,0.070861
Xanthomonas_alfalfae,Xanthomonadaceae,Gammaproteobacteria,Proteobacteria,Xanthomonas,Xanthomonadales,2.952228e-07,7.009937e-23,1.436032e-07,4.344898e-08,4.881643e-08,4.233025e-08,2.838867e-13,0.0,8.652642e-16,1.168566e-07,1.853021e-12,"2.65741910332,-0.717341345222,0.924220200804,-...",2014-07-12,,False,0.072026
Acinetobacter_sp_NIPH_713,Moraxellaceae,Gammaproteobacteria,Proteobacteria,Acinetobacter,Pseudomonadales,2.10803e-07,0.0,1.557969e-37,6.586107e-08,0.0,5.348232e-09,0.0,0.0,2.527072e-40,0.0,1.626643e-08,"3.00943585027,-0.444259665622,-0.444259665622,...",2014-07-12,,False,0.073712


In [231]:
writer = ExcelWriter(os.path.join(analysis_path, "rnaseq_and_microbiome.xlsx"))
microbiome_df.to_excel(writer, "Microbiome")
rnaseq_df.to_excel(writer, "RNA-Seq")
writer.close()

In [196]:
stats.norm.ppf(0.001)

-3.0902323061678132