# Comparison of glucose gradient microplate tests with bioreactor test #

### Notebook to generate Figs 6b, 6c, Supplementary Figs.S8, S9 ###

In [970]:
#Import necessary modules

import impact as                             impt
import impact.plotting as                    implot
from impact.parsers import Parser as parser
from sklearn.manifold import TSNE
import pandas as pd
from scipy.stats import linregress, ttest_ind
import numpy as np
import sys
from plotly import tools, subplots
import plotly.graph_objs as go
import pickle
import plotly.io as pio
from plotly.subplots import make_subplots
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
pio.templates.default = "none"
import plotly
if 'ipykernel' in sys.modules:
    from plotly.offline import init_notebook_mode
    from plotly.offline import iplot as plot
    from IPython.display import HTML
    HTML("""
         <script>
          var waitForPlotly = setInterval( function() {
          if( typeof(window.Plotly) !== "undefined" ){
          MathJax.Hub.Config({ SVG: { font: "STIX-Web" }, displayAlign: "center" });
          MathJax.Hub.Queue(["setRenderer", MathJax.Hub, "SVG"]);
          clearInterval(waitForPlotly);}}, 250 );
        </script>
        """
    )
    init_notebook_mode(connected=True)

In [971]:
impt.settings.perform_curve_fit = False
impt.settings.fit_type = 'gompertz'
impt.settings.outlier_cleaning_flag = True
impt.settings.max_fraction_replicates_to_remove = 0.5
impt.settings.std_deviation_cutoff=0.1
impt.settings.verbose = False
impt.settings.live_calculations = False
impt.settings.use_filtered_data = True

impt.settings.death_phase_hyperparameter = 4
impt.settings.savgolFilterWindowSize = 9

### Load raw data ###

In [972]:
data_format='spectromax_OD'
plate_type = '96 Wells'
file_name = 'GlucoseGradient_02022021.xlsx'
glc_od = parser.parse_raw_data(data_format=data_format,file_name=file_name,plate_type=plate_type)
glc_od.calculate()


data_format='default_titers'
plate_type = '96 Wells'
file_name = 'GlucoseGradient_Titers_02022021.xlsx'
glc_titers = parser.parse_raw_data(data_format=data_format,file_name=file_name,plate_type=plate_type,id_type='traverse')
glc_titers.calculate()


data_format='default_titers'
plate_type = None
file_name = 'Bioreactor_Data.xlsx'
reactor = parser.parse_raw_data(data_format=data_format,file_name=file_name,id_type='traverse')
reactor.calculate()


Importing data from GlucoseGradient_02022021.xlsx...0.1s
Parsing time point list...Parsed 4896 time points in 0.5s
Parsing analyte list...Parsed 96 single trials in 324.1ms
Parsing single trial list...Parsed 30 replicates in 0.1s
Analyzing data...Ran analysis in 1.7s


Importing data from GlucoseGradient_Titers_02022021.xlsx...0.0s
Parsed 546 timeCourseObjects in 0.668s...Number of lines skipped:  4
Parsing time point list...Parsed 546 time points in 0.2s
Parsing analyte list...Parsed 91 single trials in 1538.1ms
Parsing single trial list...Parsed 30 replicates in 0.1s
Analyzing data...


invalid value encountered in true_divide



Ran analysis in 2.9s


Importing data from Bioreactor_Data.xlsx...0.0s
Parsed 189 timeCourseObjects in 0.068s...Number of lines skipped:  0
Parsing time point list...Parsed 189 time points in 0.0s
Parsing analyte list...Parsed 4 single trials in 42.6ms
Parsing single trial list...Parsed 4 replicates in 0.0s
Analyzing data...No blanks were indicated. Blank subtraction will not be done.Ran analysis in 0.1s



### Write functions to extract required analytes (yields of acetate, formate, lactate, pyruvate, succinate, growth rate and biomass yield) ###

In [973]:
def average_exponential_specific_productivity(replicate_trial):
    growth_rates = []
    for single_trial in replicate_trial.single_trials:
        OD600 = single_trial.analyte_dict['OD600'].pd_series
        growth_second_gradient = np.gradient(single_trial.analyte_dict['OD600'].specific_productivity.data)
        end_index = 18+np.argmax(growth_second_gradient[18:]>=-0.005)
        if single_trial.analyte_dict['OD600'].specific_productivity.data[end_index]<=0.05 or single_trial.analyte_dict['OD600'].specific_productivity.data[end_index]>=0.2:
            end_index = 18+np.argmax(single_trial.analyte_dict['OD600'].specific_productivity.data[18:]<=0.05)
        start_index = 3+np.argmax(single_trial.analyte_dict['OD600'].specific_productivity.data[3:])
        growth_rates.append(np.average(single_trial.analyte_dict['OD600'].specific_productivity.data[start_index:end_index]))
    return growth_rates


def average_exponential_specific_productivity_singletrial(replicate_trial):
    growth_rates = {}
    for single_trial in replicate_trial.single_trials:
        OD600 = single_trial.analyte_dict['OD600'].pd_series
        growth_second_gradient = np.gradient(single_trial.analyte_dict['OD600'].specific_productivity.data)
        end_index = 18+np.argmax(growth_second_gradient[18:]>=-0.005)
        if single_trial.analyte_dict['OD600'].specific_productivity.data[end_index]<=0.05 or single_trial.analyte_dict['OD600'].specific_productivity.data[end_index]>=0.2:
            end_index = 18+np.argmax(single_trial.analyte_dict['OD600'].specific_productivity.data[18:]<=0.05)
        start_index = 3+np.argmax(single_trial.analyte_dict['OD600'].specific_productivity.data[3:])
        growth_rate = np.average(single_trial.analyte_dict['OD600'].specific_productivity.data[start_index:end_index])
        growth_rates[str(single_trial.trial_identifier.replicate_id)] = growth_rate
    return growth_rates



def end_point_biomass(replicate_trial):
    end_point_od = []
    for single_trial in replicate_trial.single_trials:
        OD600 = single_trial.analyte_dict['OD600'].pd_series
        growth_second_gradient = np.gradient(single_trial.analyte_dict['OD600'].specific_productivity.data)
        end_index = 18+np.argmax(growth_second_gradient[18:]>=-0.005)
        if single_trial.analyte_dict['OD600'].specific_productivity.data[end_index]<=0.05 or single_trial.analyte_dict['OD600'].specific_productivity.data[end_index]>=0.2:
            end_index = 18+np.argmax(single_trial.analyte_dict['OD600'].specific_productivity.data[18:]<=0.05)
        end_point_od.append(single_trial.analyte_dict['OD600'].pd_series.iloc[end_index] - 
                            single_trial.analyte_dict['OD600'].pd_series.iloc[0])
    return end_point_od



def end_point_biomass_singletrial(replicate_trial):
    end_point_od = {}
    for single_trial in replicate_trial.single_trials:
        OD600 = single_trial.analyte_dict['OD600'].pd_series
        growth_second_gradient = np.gradient(single_trial.analyte_dict['OD600'].specific_productivity.data)
        end_index = 18+np.argmax(growth_second_gradient[18:]>=-0.005)
        if single_trial.analyte_dict['OD600'].specific_productivity.data[end_index]<=0.05 or single_trial.analyte_dict['OD600'].specific_productivity.data[end_index]>=0.2:
            end_index = 18+np.argmax(single_trial.analyte_dict['OD600'].specific_productivity.data[18:]<=0.05)
        end_point_od[str(single_trial.trial_identifier.replicate_id)] = single_trial.analyte_dict['OD600'].pd_series.iloc[end_index] - single_trial.analyte_dict['OD600'].pd_series.iloc[0]
    return end_point_od




def yield_calculator(reps, hplc_timepoint=-1, od_timepoint=-1):
    od_rep, hplc_rep = reps
    y_vals = []
    y_errs = []
    mass_dict = {'Lactate':89.078,
             'Acetate':59.052,
             'Formate':45.025,
             'Succinate':116.088,
             'Pyruvate':87.06,
             'Glucose':180.156}

    for analyte in analyte_list:
        
        #For analytes
        if analyte not in ['Growth Rate', 'Biomass']:
            #For plate expt
            if hplc_rep.blank:
                y = -hplc_rep.avg.analyte_dict[analyte].pd_series.iloc[hplc_timepoint]/hplc_rep.avg.analyte_dict['Glucose'].pd_series.iloc[hplc_timepoint] * (mass_dict['Glucose']/mass_dict[analyte])
                y_err =  y*np.sqrt((hplc_rep.std.analyte_dict[analyte].pd_series.iloc[hplc_timepoint]/hplc_rep.avg.analyte_dict[analyte].pd_series.iloc[hplc_timepoint])**2 + (hplc_rep.std.analyte_dict['Glucose'].pd_series.iloc[hplc_timepoint]/hplc_rep.avg.analyte_dict['Glucose'].pd_series.iloc[hplc_timepoint])**2)            #For bioreactor expt
            else:
                y = hplc_rep.avg.analyte_dict[analyte].pd_series[:hplc_timepoint].iloc[-1]/(hplc_rep.avg.analyte_dict['Glucose'].pd_series[0] - hplc_rep.avg.analyte_dict['Glucose'].pd_series[:hplc_timepoint].iloc[-1]) * (mass_dict['Glucose']/mass_dict[analyte])
                y_err = 0
        
        #For growth rate
        elif analyte=='Growth Rate':
            #For plate expt
            if od_rep.blank:
                specific_prod = average_exponential_specific_productivity(od_rep) 
                y = np.average(specific_prod)
                y_err = np.std(specific_prod)

            #For bioreactor expt
            else:
                y = np.average(np.gradient(od_rep.avg.analyte_dict['OD600'].pd_series[:od_timepoint].values)/np.gradient(od_rep.avg.analyte_dict['OD600'].pd_series[:od_timepoint].index.values)/od_rep.avg.analyte_dict['OD600'].pd_series[:od_timepoint].values)
                y_err = 0
        
                
        #For biomass yield
        elif analyte=='Biomass':
            #For plate expt
            if od_rep.blank:
                end_point_ods = end_point_biomass(od_rep)
                y = -np.average(end_point_ods)/hplc_rep.avg.analyte_dict['Glucose'].pd_series.iloc[od_timepoint] * (0.3) #0.3 g/L dry weight of cells in 1 unit of OD600
                y_err = np.std(end_point_ods)
                y_err = y*np.sqrt((y_err/np.average(end_point_ods))**2 + 
                                  (hplc_rep.std.analyte_dict['Glucose'].pd_series.iloc[od_timepoint]/hplc_rep.avg.analyte_dict['Glucose'].pd_series.iloc[od_timepoint])**2)
            #For bioreactor expt
            else:
                y = (od_rep.avg.analyte_dict['OD600'].pd_series[:od_timepoint].iloc[-1] - od_rep.avg.analyte_dict['OD600'].pd_series.iloc[0])/(hplc_rep.avg.analyte_dict['Glucose'].pd_series[0] - hplc_rep.avg.analyte_dict['Glucose'].pd_series[:od_timepoint].iloc[-1]) * (0.3) #0.3 g/L dry weight of cells in 1 unit of OD600
                y_err = 0
        y_vals.append(y)
        y_errs.append(y_err)        
    return y_vals, y_errs


## Loading individual replicate data for all experimental conditions to perform PCA ##

In [974]:
strain_list = ['WT', 'Δ(adh,pta)-D1', 'Δ(adh,pta)-D28', 'Δ(adh,pta)-D59']
condition_list = ['RDM + 1.5 g/L glc', 'RDM + 3.0 g/L glc', 'RDM + 6.0 g/L glc', 'RDM + 9.0 g/L glc', 'RDM + 12.0 g/L glc', 'RDM + 18.0 g/L glc']
analyte_list = ['Acetate', 'Formate', 'Lactate', 'Pyruvate', 'Succinate', 'Biomass', 'Growth Rate', 'OD0', 'OD2', 'OD4', 'OD6', 'OD8']
mass_dict = {'Lactate':89.078,
         'Acetate':59.052,
         'Formate':45.025,
         'Succinate':116.088,
         'Pyruvate':87.06,
         'Glucose':180.156}
st_df = pd.DataFrame()
for strain in strain_list:
    if strain.lower() != 'blank':
        for condition in condition_list:
            temp_df = pd.DataFrame()
            od_rep = [rep for rep in glc_od.replicate_trials if rep.trial_identifier.strain.name==strain and str(rep.trial_identifier.media)==condition][0]
            hplc_rep = [rep for rep in glc_titers.replicate_trials if rep.trial_identifier.strain.name==strain and str(rep.trial_identifier.media)==condition][0] 
            for analyte in ['Glucose']+analyte_list:
                if analyte not in ['Biomass', 'Growth Rate', 'OD0', 'OD2', 'OD4', 'OD6', 'OD8']:
                    temp_df = pd.concat([temp_df,hplc_rep.replicate_df[analyte].transpose().rename(columns={-1:analyte.lower()})], axis=1)

                elif analyte=='Growth Rate':
                    temp_df = pd.concat([temp_df, pd.Series(average_exponential_specific_productivity_singletrial(od_rep))],axis=1).rename(columns={0:'growth_rate'})
                    
                elif analyte=='Biomass':
                    temp_df = pd.concat([temp_df, pd.Series(end_point_biomass_singletrial(od_rep))*0.3],axis=1).rename(columns={0:'biomass'})
                elif 'OD' in analyte:
                    temp_df = pd.concat([temp_df, pd.DataFrame(od_rep.replicate_df['OD600'].transpose()[int(analyte.lstrip('OD'))]).rename(columns={int(analyte.lstrip('OD')):analyte.lower()})], axis=1)
            temp_df.reset_index(level=0, inplace=True)
            temp_df = temp_df.rename(columns={'index':'replicate'})
            temp_df = temp_df.fillna(temp_df.mean())
            temp_df['biomass_yield'] = -temp_df['biomass']/temp_df['glucose']
            temp_df['acetate_yield'] = -temp_df['acetate']/temp_df['glucose']*mass_dict['Glucose']/mass_dict['Acetate']
            temp_df['formate_yield'] = -temp_df['formate']/temp_df['glucose']*mass_dict['Glucose']/mass_dict['Formate']
            temp_df['lactate_yield'] = -temp_df['lactate']/temp_df['glucose']*mass_dict['Glucose']/mass_dict['Lactate']
            temp_df['pyruvate_yield'] = -temp_df['pyruvate']/temp_df['glucose']*mass_dict['Glucose']/mass_dict['Pyruvate']
            temp_df['succinate_yield'] = -temp_df['succinate']/temp_df['glucose']*mass_dict['Glucose']/mass_dict['Succinate']
            temp_df['strain'] = strain
            temp_df['condition'] = condition
            temp_df['starting_glucose'] = hplc_rep.blank.avg.analyte_dict['Glucose'].pd_series[-1]
            temp_df = temp_df.drop(columns=['biomass'])
            temp_df = temp_df.sort_values(by=['strain', 'condition', 'replicate'])
            st_df = pd.concat([st_df,temp_df], axis=0)

analyte_list = ['Acetate', 'Formate', 'Lactate', 'Pyruvate', 'Succinate', 'Biomass', 'Growth Rate']
for strain in strain_list:
    temp_df = pd.DataFrame()
    rep = [rep for rep in reactor.replicate_trials if rep.trial_identifier.strain.name==strain][0]
    y,temp = yield_calculator([rep,rep],od_timepoint=8, hplc_timepoint=8)
    entry = {analyte.lower()+'_yield': y[i] for i,analyte in enumerate(analyte_list)}
    entry['growth_rate'] = entry.pop('growth rate_yield')
    temp_df = temp_df.append(entry,ignore_index=True)
    temp_df = temp_df.rename(columns={'biomass': 'biomass_yield'})
    temp_df['strain'] = strain
    temp_df['condition'] = 'reactor'
    temp_df['replicate'] = '1'
    for analyte in ['OD0', 'OD2', 'OD4', 'OD6', 'OD8']:
        temp_df[analyte.lower()] = rep.replicate_df['OD600'].loc[int(analyte.lstrip('OD'))].values[0]
        
    temp_df['glucose'] = rep.avg.analyte_dict['Glucose'].pd_series.loc[8] - rep.avg.analyte_dict['Glucose'].pd_series.loc[0]
    for analyte in ['acetate', 'lactate',  'formate', 'pyruvate', 'succinate']:
            temp_df[analyte] = -temp_df[analyte+'_yield']*mass_dict[analyte.title()]/mass_dict['Glucose']*temp_df['glucose']
    temp_df['starting_glucose'] = rep.avg.analyte_dict['Glucose'].pd_series.loc[0]

    st_df = pd.concat([st_df,temp_df], axis=0)


st_df = st_df.reset_index(level=0, drop=True)


### Write plotting functions for tsne and pca ###

#### Use Standard Scaler to scale data before tsne and pca ####
#### tsne allowed to be performed with perplexity and learning rate parameters as inputs ####

In [975]:
layout = go.Layout(height=475, width=430, legend_x= 0.1, legend_y=-0.15, legend_font=dict(family='Myriad Pro', size=12, color='black'), legend_orientation='h',
                   xaxis=dict(title='Component 1',
                              title_standoff=0.5,
                              titlefont=dict(family='Myriad Pro', size=16, color='black'),
                              showline=True, linewidth=1, linecolor='black', mirror=True, side='bottom',
                              ticks='outside', ticklen=4, tickangle=0, nticks=8,
                              tickfont=dict(size=16, family='Myriad Pro', color='black'), tickcolor='black',
                              showgrid=True,zeroline=True),
                  
                  yaxis=dict(title='Component 2',
                            titlefont=dict(family='Myriad Pro', size=16, color='black'),

                              anchor='x', side='left', showgrid=True, zeroline=True,
                            tickfont=dict(family='Myriad Pro',size=16, color='black'), tickcolor='black', 

                              showline=True, linewidth=1, linecolor='black', mirror=True,
                              ticks='outside', ticklen=4, tickangle=0))


colors = ['rgb'+str(color) for color in plotly.colors.n_colors((33,100,175), (234,234,234), 4)][:-1]
colors = colors + ['rgb'+str(color) for color in plotly.colors.n_colors((234,234,234), (217,72,42), 4)][1:]

symbols = ['circle', 'x', 'diamond', 'square']
def pca_plot(features):
    x = st_df[features].values
    y = st_df[['strain','condition']].values

    x = StandardScaler().fit_transform(x)

    pcs = pca.fit_transform(x)
    principal_df = pd.DataFrame(data=pcs[:,:4], columns = ['PC1', 'PC2','PC3','PC4'])

    pca_df = st_df.copy(deep=True)
    pca_df = pd.concat([pca_df,principal_df], axis=1)
    trace_list = []
    for i, strain in enumerate(strain_list):
        for j,condition in enumerate(['reactor'] + condition_list):
            if condition!='reactor':
                trace_list.append(go.Scatter(x=pca_df[(pca_df['strain']==strain) & (pca_df['condition']==condition)]['PC1'],
                                             y=pca_df[(pca_df['strain']==strain) & (pca_df['condition']==condition)]['PC2'],
                                             mode='markers', showlegend=False,
                                             marker=dict(symbol=symbols[i], size=8, color=colors[j-1],
                                                         line=dict(color='black', width=0.5))))
            else:
                trace_list.append(go.Scatter(x=pca_df[(pca_df['strain']==strain) & (pca_df['condition']==condition)]['PC1'],
                                 y=pca_df[(pca_df['strain']==strain) & (pca_df['condition']==condition)]['PC2'],
                                 mode='markers', name=strain,
                                 marker=dict(symbol=symbols[i], size=12, color='rgb(54, 146, 68)',
                                line=dict(color='black', width=0.5))))
    print("PCA")      
    fig = go.Figure(data=trace_list, layout=layout)     
    plot(fig)
    return fig

def tsne_plot(features,perplexity=10,learning_rate=250):
    perplexity = perplexity
    learning_rate = learning_rate
    
    x = st_df[features].values
    y = st_df[['strain','condition']].values
    
    x = StandardScaler().fit_transform(x)
    
    tsn = TSNE(perplexity=perplexity, learning_rate = learning_rate, init='pca', n_iter=20000, n_iter_without_progress=2500, verbose=0).fit_transform(x)
    tsn_df = pd.DataFrame(data=tsn, columns = ['tsn1', 'tsn2'])
    tsne_df = st_df.copy(deep=True)
    tsne_df = pd.concat([tsne_df,tsn_df], axis=1)
    print("TSNE")
    print("Learning Rate: ", learning_rate)
    print("Perplexity: ",perplexity)
    trace_list = []

    for i, strain in enumerate(strain_list):
        for j,condition in enumerate(['reactor'] + condition_list):
            if condition!='reactor':
                trace_list.append(go.Scatter(x=tsne_df[(tsne_df['strain']==strain) & (tsne_df['condition']==condition)]['tsn1'],
                                             y=tsne_df[(tsne_df['strain']==strain) & (tsne_df['condition']==condition)]['tsn2'],
                                             mode='markers', showlegend=False,
                                             marker=dict(symbol=symbols[i], size=8, color=colors[j-1],
                                                         line=dict(color='black', width=0.5))))
            else:
                trace_list.append(go.Scatter(x=tsne_df[(final_df['strain']==strain) & (tsne_df['condition']==condition)]['tsn1'],
                                 y=tsne_df[(tsne_df['strain']==strain) & (tsne_df['condition']==condition)]['tsn2'],
                                 mode='markers', name=strain,
                                 marker=dict(symbol=symbols[i], size=12, color='rgb(54, 146, 68)',
                                line=dict(color='black', width=0.5))))
    fig = go.Figure(data=trace_list, layout=layout)     
    plot(fig)
    return fig


### Determine effect of changing learning rate with perplexity fixed ###

### Perplexity was changed previously. 15 was chosen to be a value that gave robust convergence. Too small perplexity resulted in very small clusters. 15 is a reasonable choice here since we have about 76 trials and expect between 3-5 clusters ###

In [964]:
features = [ 'acetate_yield', 
             'formate_yield',
             'lactate_yield',
             'pyruvate_yield',
             'succinate_yield',
            'growth_rate']
x = st_df[features].values
y = st_df[['strain','condition']].values
kl_dict={}
x = StandardScaler().fit_transform(x)
for learning_rate in [1,2,3,4,5,6,7,8,9,10,15,20,25,30,35,40,45,50,60,75,80,90,100,125,150,175,200]:
    tsn = TSNE(perplexity=15, learning_rate = learning_rate, init='pca', n_iter=20000, n_iter_without_progress=2500, verbose=0).fit(x)
    kl_dict[learning_rate]=tsn.kl_divergence_
    print("The KL Divergence for a learning rate of ", learning_rate," is ", tsn.kl_divergence_)

The KL Divergence for a learning rate of  1  is  0.05194332078099251
The KL Divergence for a learning rate of  2  is  0.05256853997707367
The KL Divergence for a learning rate of  3  is  0.05318477749824524
The KL Divergence for a learning rate of  4  is  0.05202179029583931
The KL Divergence for a learning rate of  5  is  0.052479542791843414
The KL Divergence for a learning rate of  6  is  0.052035413682460785
The KL Divergence for a learning rate of  7  is  0.05231994390487671
The KL Divergence for a learning rate of  8  is  0.05220714956521988
The KL Divergence for a learning rate of  9  is  0.05684839189052582
The KL Divergence for a learning rate of  10  is  0.0528097003698349
The KL Divergence for a learning rate of  15  is  0.05403079092502594
The KL Divergence for a learning rate of  20  is  0.05633304640650749
The KL Divergence for a learning rate of  25  is  0.054994020611047745
The KL Divergence for a learning rate of  30  is  0.05260952189564705
The KL Divergence for a lea

### Final TSNE - Fig6c in article ###

In [976]:
features = [ 'acetate_yield', 
             'formate_yield',
             'lactate_yield',
             'pyruvate_yield',
             'succinate_yield',
            'growth_rate']
fig = tsne_plot(features,15,10)

#pio.write_image(fig,"Figures/si_fig_glc_pca_loadings.svg",format='svg')


TSNE
Learning Rate:  10
Perplexity:  15


### Effect of adding other features to tsne - only hindered calculation. It is reasonable to only use features in which we want the algorithm to learn the differences ###

In the following cells, the metabolite names alone indicate their respective titers and the metabolites followed by "\_yield" indicates their respective yields.
In some cases, the cell density, measured as OD600 is used as a feature, followed by the time (in h) of being recorded i.e. OD2 represents OD600 recorded at 2 hrs after start of fermentation, OD4 represents OD600 recorded 4 hrs after start of fermentation and so on.

In [914]:
features = [ 'acetate_yield',
             'formate_yield',
             'lactate_yield', 'biomass_yield',
             'growth_rate', 'pyruvate_yield', 'succinate_yield']
tsne_plot(features,15,10)


TSNE
Learning Rate:  10
Perplexity:  15


In [977]:
features = [ 'acetate', 
             'formate',
             'lactate',
             'pyruvate',
             'succinate','glucose',
            'biomass_yield',
            'growth_rate']
tsne_plot(features, 10, 300)

TSNE
Learning Rate:  300
Perplexity:  10


In [905]:
features = ['acetate', 
            'formate', 
            'lactate', 
            'pyruvate', 'growth_rate',
            'succinate', 'glucose']
tsne_plot(features,15,10)

TSNE
Learning Rate:  10
Perplexity:  15


In [907]:
features = ['acetate', 
            'formate', 
            'lactate', 
            'pyruvate',
            'succinate', 'glucose','od0','od2','od4','od6','od8']
tsne_plot(features,15,10)

TSNE
Learning Rate:  10
Perplexity:  15


In [909]:
features = ['acetate', 'acetate_yield',
            'formate', 'formate_yield',
            'lactate', 'lactate_yield',
            'pyruvate', 'growth_rate', 'pyruvate_yield', 'succinate_yield',
            'succinate', 'glucose','od0','od2','od4','od6','od8']
tsne_plot(features,15,10)

TSNE
Learning Rate:  10
Perplexity:  15


In [913]:
features = [ 'acetate_yield',
             'formate_yield',
             'lactate_yield', 'biomass_yield',
             'growth_rate', 'pyruvate_yield', 'succinate_yield']
tsne_plot(features,15,10)

TSNE
Learning Rate:  10
Perplexity:  15


In [911]:
features = ['acetate', 'acetate_yield',
            'formate', 'formate_yield',
            'lactate', 'lactate_yield', 'biomass_yield', 'od0', 'od2','od4','od6','od8',
            'pyruvate', 'growth_rate', 'pyruvate_yield', 'succinate_yield',
            'succinate', 'glucose']
tsne_plot(features,15,10)

TSNE
Learning Rate:  10
Perplexity:  15


### PCA Scores for first two PCs ###

In [934]:
features = [ 'acetate_yield', 
             'formate_yield',
             'lactate_yield',
             'pyruvate_yield',
             'succinate_yield',
            'growth_rate']

fig = pca_plot(features)
x = st_df[features].values
y = st_df[['strain','condition']].values

x = StandardScaler().fit_transform(x)
pca=PCA(n_components=5)
pcs = pca.fit_transform(x)
principal_df = pd.DataFrame(data=pcs[:,:4], columns = ['PC1', 'PC2','PC3','PC4'])

pca_df = st_df.copy(deep=True)
pca_df = pd.concat([pca_df,principal_df], axis=1)
#pio.write_image(fig,"Figures/si_fig_glc_pca_scores.svg",format='svg')


PCA


### Plot cumulative variance represented by each component ###

In [936]:
trace_list = []
trace_list.append(go.Scatter(x=np.arange(1,7),
                             y=np.cumsum(pca.explained_variance_ratio_),
                             mode='lines+markers',
                             line=dict(width=1.5, color='rgb(33,100,175)'),
                             name=analyte))


layout = go.Layout(height=425, width=405, legend_x= 0.1, legend_y=-0.15, legend_font=dict(family='Myriad Pro', size=12, color='black'), legend_orientation='h',
                   xaxis=dict(title='Number of Components',
                              title_standoff=0.5,
                              titlefont=dict(family='Myriad Pro', size=16, color='black'),
                              showline=True, linewidth=1, linecolor='black', mirror=True, side='bottom',
                              ticks='outside', ticklen=4, tickangle=0, nticks=8,
                              tickfont=dict(size=16, family='Myriad Pro', color='black'), tickcolor='black',
                              showgrid=True,zeroline=True),
                  
                  yaxis=dict(title='Cumulative Variance Explained',
                            titlefont=dict(family='Myriad Pro', size=16, color='black'),

                              anchor='x', side='left', showgrid=True, zeroline=True,
                            tickfont=dict(family='Myriad Pro',size=16, color='black'), tickcolor='black', 

                              showline=True, linewidth=1, linecolor='black', mirror=True,
                              ticks='outside', ticklen=4, tickangle=0))


fig = go.Figure(data = trace_list, layout=layout)
plot(fig)
#pio.write_image(fig,"Figures/si_fig_glc_pca_exp_var.svg",format='svg')


### Plot loadings of first two PCs ###

In [937]:
trace_list=[]

for i, analyte in enumerate(features):
    trace_list.append(go.Scatter(x=[0]+[pca.components_[0,i]],
                                 y=[0]+[pca.components_[1,i]],
                                 mode='lines', showlegend=False,
                                 line=dict(width=1.5, color='rgb(54, 146, 68)'),
                                 name=analyte))


layout = go.Layout(height=425, width=405, legend_x= 0.1, legend_y=-0.15, legend_font=dict(family='Myriad Pro', size=12, color='black'), legend_orientation='h',
                   xaxis=dict(title='Principal Component 1',
                              title_standoff=0.5,
                              titlefont=dict(family='Myriad Pro', size=16, color='black'),
                              showline=True, linewidth=1, linecolor='black', mirror=True, side='bottom',
                              ticks='outside', ticklen=4, tickangle=0, nticks=8,
                              tickfont=dict(size=16, family='Myriad Pro', color='black'), tickcolor='black',
                              showgrid=True,zeroline=True),
                  
                  yaxis=dict(title='Principal Component 2',
                            titlefont=dict(family='Myriad Pro', size=16, color='black'),

                              anchor='x', side='left', showgrid=True, zeroline=True,
                            tickfont=dict(family='Myriad Pro',size=16, color='black'), tickcolor='black', 

                              showline=True, linewidth=1, linecolor='black', mirror=True,
                              ticks='outside', ticklen=4, tickangle=0))
fig = go.Figure(data=trace_list, layout=layout)
plot(fig)
#pio.write_image(fig,"Figures/si_fig_glc_pca_loadings.svg",format='svg')


### Plot analyte data for all strains ###

In [963]:
strain_list = ['WT']
analyte_list = ['Acetate', 'Formate', 'Lactate', 'Growth Rate', 'Pyruvate', 'Succinate', 'Growth Rate']
condition_list = ['RDM + 1.5 g/L glc', 'RDM + 3.0 g/L glc', 'RDM + 6.0 g/L glc', 'RDM + 9.0 g/L glc', 'RDM + 12.0 g/L glc', 'RDM + 18.0 g/L glc']
condition_titles = ['1.5', '3', '6', '9', '12', '18']

colors = ['rgb'+str(color) for color in plotly.colors.n_colors((33,100,175), (234,234,234), 4)][:-1]
colors = colors + ['rgb'+str(color) for color in plotly.colors.n_colors((234,234,234), (217,72,42), 4)][1:]

od_timepoint = 8
hplc_timepoint = 8
pts_per_hour = 6
ranges= [[(0,3), (0,0.35), (0,1.5)]]

for j,strain in enumerate(strain_list):
    if strain=='WT':
        
        print(strain)
        trace_list = []
        rep = [rep for rep in reactor.replicate_trials if rep.trial_identifier.strain.name == strain][0]
        x = [analyte for analyte in analyte_list]
        y, yerrs = yield_calculator((rep,rep), od_timepoint=od_timepoint, hplc_timepoint=hplc_timepoint)
        
        trace_list.append(go.Bar(x=x[:-4],
                         y=y[:-4],
                         marker=dict(color='rgb(54, 146, 68)', line=dict(width=0, color='rgb(54, 146, 68)')),
                         name='Bioreactor'))

        trace_list.append(go.Bar(x=x[-4:-3],
                         y=y[-4:-3],
                         marker=dict(color='rgb(54, 146, 68)', line=dict(width=0, color='black')),showlegend=False,
                         name='Bioreactor', yaxis='y2'))
        
        for i, condition in enumerate(condition_list):
            od_rep = [rep for rep in glc_od.replicate_trials if str(rep.trial_identifier.media)==condition_list[i] and rep.trial_identifier.strain.name==strain][0]
            hplc_rep = [rep for rep in glc_titers.replicate_trials if str(rep.trial_identifier.media)==condition_list[i] and rep.trial_identifier.strain.name==strain][0]
            y, y_errs = yield_calculator((od_rep,hplc_rep), od_timepoint=-1, hplc_timepoint=-1)

            trace_list.append(go.Bar(x=x[:-4],
                                     y=y[:-4],
                                     error_y=dict(type='data', array=y_errs[:-3], thickness=1.5, width=3, color='black'),
                                     marker=dict(color=colors[i], line=dict(width=0, color='rgb(54, 146, 68)')),
                                     name=condition_titles[i]))
            trace_list.append(go.Bar(x=x[-4:-3],
                 y=y[-4:-3],
                 error_y=dict(type='data', array=y_errs[-4:-3], thickness=1.5, width=3, color='black'),
                 marker=dict(color=colors[i], line=dict(width=0, color='black')),showlegend=False,
                 yaxis='y2',
                 name=condition_titles[i]))

        layout = go.Layout(height=450, width=1000, legend_x= 1.15, bargroupgap=0.1, bargap=0.4,
                           legend_orientation='v',legend_y=-0, legend_font=dict(family='Myriad Pro', color='black', size=14),

                           xaxis=dict(title='Product', type='category',
                                      titlefont=dict(family='Myriad Pro', size=18, color='black'),
                                      showline=True, linewidth=1, linecolor='black', mirror=True, side='bottom',
                                      ticks='', ticklen=4, tickangle=0, nticks=8,
                                      tickfont=dict(size=16, family='Myriad Pro', color='black'), tickcolor='black',
                                      showgrid=False,zeroline=False),

                           yaxis=dict(title='Product Yield<br>(mmol Product/mmol Glucose)',
                                      titlefont=dict(family='Myriad Pro', size=18, color='black'),
                                      showline=True, linewidth=1, linecolor='black', mirror=True, range=ranges[j][0],
                                      ticks='outside', ticklen=4, tickangle=0, side='left',
                                      tickfont=dict(family='Myriad Pro',size=16, color='black'), tickcolor='black', 
                                      showgrid=True,zeroline=False),
                          
                          yaxis2=dict(title='Biomass Yield<br>(g Biomass/g Glucose)',
                                      titlefont=dict(family='Myriad Pro', size=18, color='black'),
                                      showline=True, linewidth=1, linecolor='black', mirror=True, range=ranges[j][2],
                                      ticks='outside', ticklen=4, tickangle=0, side='right', anchor='x', overlaying='y', nticks=6,
                                      tickfont=dict(family='Myriad Pro',size=16, color='black'), tickcolor='black', 
                                      showgrid=True,zeroline=False))

        if trace_list:
            fig = go.Figure(data=trace_list, layout=layout)
            plot(fig)#, image='svg')
            #pio.write_image(fig,"Figures/si_fig_6_"+strain+".svg",format='svg')
            
        trace_list = []
        rep = [rep for rep in reactor.replicate_trials if rep.trial_identifier.strain.name == strain][0]
        y, yerrs = yield_calculator((rep,rep), od_timepoint=od_timepoint, hplc_timepoint=hplc_timepoint)
        
        trace_list.append(go.Bar(x=x[-3:-1],
                                 y=y[-3:-1],
                                 marker=dict(color='rgb(54, 146, 68)', line=dict(width=0, color='black')),showlegend=True,
                                 name='Bioreactor'))


        for i, condition in enumerate(condition_list):
            od_rep = [rep for rep in glc_od.replicate_trials if str(rep.trial_identifier.media)==condition_list[i] and rep.trial_identifier.strain.name==strain][0]
            hplc_rep = [rep for rep in glc_titers.replicate_trials if str(rep.trial_identifier.media)==condition_list[i] and rep.trial_identifier.strain.name==strain][0]
            y, y_errs = yield_calculator((od_rep,hplc_rep), od_timepoint=-1, hplc_timepoint=-1)

            trace_list.append(go.Bar(x=x[-3:-1],
                             y=y[-3:-1],
                             error_y=dict(type='data', array=y_errs[-3:-1], thickness=1.5, width=3, color='black'),
                             marker=dict(color=colors[i], line=dict(width=0, color='black')),showlegend=True,
                             name=condition_titles[i]))
            


        layout = go.Layout(height=400, width=475, legend_x= 1.05, bargroupgap=0.1, bargap=0.4, showlegend=False,
                           legend_orientation='v',legend_y=-0, legend_font=dict(family='Myriad Pro', color='black', size=14),

                           xaxis=dict(title='Product', type='category',
                                      titlefont=dict(family='Myriad Pro', size=18, color='black'),
                                      showline=True, linewidth=1, linecolor='black', mirror=True, side='bottom',
                                      ticks='', ticklen=4, tickangle=0, nticks=8,
                                      tickfont=dict(size=16, family='Myriad Pro', color='black'), tickcolor='black',
                                      showgrid=False,zeroline=False),

                           yaxis=dict(title='Product Yield<br>(mmol Product/mmol Glucose)',
                                      titlefont=dict(family='Myriad Pro', size=18, color='black'),
                                      showline=True, linewidth=1, linecolor='black', mirror=True, range=(0,0.5),
                                      ticks='outside', ticklen=4, tickangle=0, side='left',
                                      tickfont=dict(family='Myriad Pro',size=16, color='black'), tickcolor='black', 
                                      showgrid=True,zeroline=False))


        if trace_list:
            fig = go.Figure(data=trace_list, layout=layout)
            plot(fig)
            #pio.write_image(fig,"Figures/si_fig_6_"+strain+".svg",format='svg')



WT



invalid value encountered in double_scalars



In [949]:
strain_list = ['Δ(adh,pta)-D1', 'Δ(adh,pta)-D28', 'Δ(adh,pta)-D59']
analyte_list = ['Acetate', 'Formate', 'Lactate', 'Pyruvate', 'Succinate', 'Growth Rate']
condition_list = ['RDM + 1.5 g/L glc', 'RDM + 3.0 g/L glc', 'RDM + 6.0 g/L glc', 'RDM + 9.0 g/L glc', 'RDM + 12.0 g/L glc', 'RDM + 18.0 g/L glc']
condition_titles = ['1.5', '3', '6', '9', '12', '18']

colors = ['rgb'+str(color) for color in plotly.colors.n_colors((33,100,175), (234,234,234), 4)][:-1]
colors = colors + ['rgb'+str(color) for color in plotly.colors.n_colors((234,234,234), (217,72,42), 4)][1:]

od_timepoint = 8
hplc_timepoint = 8
pts_per_hour = 6
ranges= [(0,2.5), (0,1)]

for strain in strain_list:
    print(strain)
    trace_list = []
    rep = [rep for rep in reactor.replicate_trials if rep.trial_identifier.strain.name == strain][0]
    x = [analyte for analyte in analyte_list]
    y, yerrs = yield_calculator((rep,rep), od_timepoint=od_timepoint, hplc_timepoint=hplc_timepoint)
    
    trace_list.append(go.Bar(x=x[:-1],
                     y=y[:-1],
                     marker=dict(color='rgb(54, 146, 68)', line=dict(width=0, color='rgb(54, 146, 68)')),
                     name='Bioreactor'))
    
    trace_list.append(go.Bar(x=x[-1:],
                             y=y[-1:],
                             marker=dict(color='rgb(54, 146, 68)', line=dict(width=0, color='black')),showlegend=False,
                             name='Bioreactor', yaxis='y2'))

    for i, condition in enumerate(condition_list):
        od_rep = [rep for rep in glc_od.replicate_trials if str(rep.trial_identifier.media)==condition_list[i] and rep.trial_identifier.strain.name==strain][0]
        hplc_rep = [rep for rep in glc_titers.replicate_trials if str(rep.trial_identifier.media)==condition_list[i] and rep.trial_identifier.strain.name==strain][0]
        y, y_errs = yield_calculator((od_rep,hplc_rep), od_timepoint=-1, hplc_timepoint=-1)

        trace_list.append(go.Bar(x=x[:-1],
                                 y=y[:-1],
                                 error_y=dict(type='data', array=y_errs[:-1], thickness=1.5, width=3, color='black'),
                                 marker=dict(color=colors[i], line=dict(width=0, color='rgb(54, 146, 68)')),
                                 name=condition_titles[i]))

        trace_list.append(go.Bar(x=x[-1:],
                         y=y[-1:],
                         error_y=dict(type='data', array=y_errs[-1:], thickness=1.5, width=3, color='black'),
                         marker=dict(color=colors[i], line=dict(width=0, color='black')),showlegend=False,
                         yaxis='y2',
                         name=condition_titles[i]))



    layout = go.Layout(height=425, width=1000, legend_x= 0.2, bargroupgap=0.1, bargap=0.4,
                       legend_orientation='h',legend_y=-0.2, legend_font=dict(family='Myriad Pro', color='black', size=14),

                       xaxis=dict(title='Product', title_standoff=5,
                                  titlefont=dict(family='Myriad Pro', size=18, color='black'),
                                  showline=True, linewidth=1, linecolor='black', mirror=True, side='bottom',
                                  ticks='outside', ticklen=4, tickangle=0, nticks=8,
                                  tickfont=dict(size=16, family='Myriad Pro', color='black'), tickcolor='black',
                                  showgrid=False,zeroline=False),

                       yaxis=dict(title='Product Yield<br>(mmol Product/mmol Glucose)',
                                  titlefont=dict(family='Myriad Pro', size=18, color='black'),
                                  showline=True, linewidth=1, linecolor='black', mirror=True, range=ranges[0],
                                  ticks='outside', ticklen=4, tickangle=0, side='left',
                                  tickfont=dict(family='Myriad Pro',size=16, color='black'), tickcolor='black', 
                                  showgrid=True,zeroline=False),
                       
                      yaxis2=dict(title='Biomass Yield<br>(g Biomass/g Glucose)',
                                  titlefont=dict(family='Myriad Pro', size=18, color='black'),
                                  showline=True, linewidth=1, linecolor='black', mirror=True, range=ranges[1],
                                  ticks='outside', ticklen=4, tickangle=0, side='right', anchor='x', overlaying='y', 
                                  tickfont=dict(family='Myriad Pro',size=16, color='black'), tickcolor='black', 
                                  showgrid=True,zeroline=False))

    if trace_list:
        fig = go.Figure(data=trace_list, layout=layout)
        plot(fig)
        #pio.write_image(fig,"Figures/si_fig_6_"+strain+".svg",format='svg')

Δ(adh,pta)-D1



invalid value encountered in double_scalars



Δ(adh,pta)-D28


Δ(adh,pta)-D59
