In [None]:
import math
import matplotlib as mpl
import matplotlib
matplotlib.use('Agg')
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import sys

In [None]:
sys.path.append('../code/')

In [None]:
from utils import split_df, aggregate_df

In [None]:
## Create a pandas dataframe to hold the gene expression data
GENE_EXP_DATA = "../data/raw_data/3_summary_rpkm.xls"
meta4_exp = pd.read_csv(GENE_EXP_DATA, sep='\t')
genomes = pd.read_csv('../data/genomes_curated.tsv', sep='\t')

In [None]:
meta4_exp = meta4_exp.merge(genomes)
meta4_exp.head(3)

In [None]:
split_by_type = split_df(meta4_exp, 'type')
split_by_type.keys()

In [None]:
split_by_type['m'].shape

In [None]:
split_by_type['nmm'].shape

In [None]:
methanotroph_abundances = aggregate_df(split_by_type['m'], 'genome', colnorm=True)
methanotroph_abundances.shape

In [None]:
methylotroph_abundances = aggregate_df(split_by_type['nmm'], 'genome', colnorm=True)
methylotroph_abundances.shape

In [None]:
methanotroph_abundances.head()

In [None]:
sample_info = pd.read_csv('../data/sample_info.tsv', sep='\t')
sample_info.head(3)

In [None]:
def prepare_df(df, sample_info):
    df = df.copy()  # for dev purposes
    # 'LakWasM130_HOW14_2_rpkm' --> '130_HOW14'
    df.columns = df.columns.str.extract('([0-9]*_[A-z]*[0-9]*)_', expand=False)
    # merge on the sample info to parse the oxygen, rep, week info.
    # The sample info are columns.  Transpose and reset index to access them. 
    orgs_as_cols = df.T.reset_index()
    orgs_as_cols.rename(columns={'index':'ID'}, inplace=True)
    df = pd.merge(orgs_as_cols, sample_info)
    del df['ID']
    del df['project']
    df.set_index(['oxy', 'rep', 'week'], inplace=True)
    return df
    

In [None]:
methane = prepare_df(methanotroph_abundances, sample_info)

In [None]:
methane.head(2)

In [None]:
sns.heatmap(methane, cmap="YlGnBu")

In [None]:
methane.iloc[methane.index.get_level_values('oxy') == 'Low']

In [None]:
plot_data = methane.iloc[(methane.index.get_level_values('oxy') == 'Low') & 
           (methane.index.get_level_values('rep') == 1)]

In [None]:
plot_data.head(2)

In [None]:
methane.loc['Low', 1]

In [None]:
methanol = prepare_df(methylotroph_abundances, sample_info)

In [None]:
plot_data.head(3)

In [None]:
x = methane.loc['Low', 1]

In [None]:
sns.heatmap(x.T)

In [None]:
methanol.head(2)

In [None]:
toy = pd.read_csv('../data/toy/toy_oxy_rep_week_org.csv')
toy.head(2)

In [None]:
for tup, df in toy.groupby(['oxy', 'rep']):
    print(tup)
    del df['oxy']
    del df['rep']
    df.set_index(['week'], inplace=True)
    print(df.head(2))
    fig, axs = plt.subplots(1, 1, figsize=(2, 3))
    sns.heatmap(df.T, ax=axs)

In [None]:
! mkdir ../figures/methylotroph_abundances

In [None]:
fig, axs = plt.subplots(1, 1, figsize=(3, 5))

In [None]:
axs.figure.subplots_adjust

In [None]:
for tup, df in methanol.reset_index().groupby(['oxy', 'rep']):
    #print(tup)
    del df['oxy']
    del df['rep']
    title = 'Organism abundances: {} oxygen, replicate {}'.format(tup[0], tup[1])
    df.set_index(['week'], inplace=True)
    #print(df.head(2))
    fig, axs = plt.subplots(1, 1, figsize=(6, 5))
    sns.heatmap(df.T, ax=axs)
    plt.subplots_adjust(top=0.92)
    fig.suptitle(title, size=12)
    #plt.tight_layout(pad=1) # BAD
    #fig.subplots_adjust(wspace=1, hspace=1)
    #plt.subplots_adjust(bottom=0.1, left=0.2, right=0.8, top=0.9)
    fig.subplots_adjust(left = 0.5)
    fig.savefig('../figures/methylotroph_abundances/' + title.replace(' ', '_') + '.pdf')