# Comparison of reducing agent tests with bioreactor test#

### Notebook to generate Supplementary Figs. S5, S6###

In [26]:
#Import necessary modules

import impact as                             impt
import impact.plotting as                    implot
from impact.parsers import Parser as parser
import pandas as pd
from openpyxl import load_workbook
import numpy as np
import sys
from plotly import tools, subplots
import plotly.graph_objs as go
from sklearn.manifold import TSNE
import pickle
import plotly.io as pio
from plotly.subplots import make_subplots
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

pio.templates.default = "none"
import plotly
if 'ipykernel' in sys.modules:
    from plotly.offline import init_notebook_mode
    from plotly.offline import iplot as plot
    from IPython.display import HTML
    HTML("""
         <script>
          var waitForPlotly = setInterval( function() {
          if( typeof(window.Plotly) !== "undefined" ){
          MathJax.Hub.Config({ SVG: { font: "STIX-Web" }, displayAlign: "center" });
          MathJax.Hub.Queue(["setRenderer", MathJax.Hub, "SVG"]);
          clearInterval(waitForPlotly);}}, 250 );
        </script>
        """
    )
    init_notebook_mode(connected=True)

In [2]:
impt.settings.perform_curve_fit = False
impt.settings.fit_type = 'gompertz'
impt.settings.outlier_cleaning_flag = False
impt.settings.max_fraction_replicates_to_remove = 0.5
impt.settings.std_deviation_cutoff=0.05
impt.settings.verbose = False
impt.settings.live_calculations = False
impt.settings.use_filtered_data = True

impt.settings.death_phase_hyperparameter = 4
impt.settings.savgolFilterWindowSize = 9

### Load raw data ###

In [3]:
data_format='spectromax_OD'
plate_type = '96 Wells'
file_name = 'ReducingAgentTest_02022021.xlsx'
red_od = parser.parse_raw_data(data_format=data_format,file_name=file_name,plate_type=plate_type)
red_od.calculate()

data_format='default_titers'
plate_type = '96 Wells'
file_name = 'ReducingAgentTest_Titers_02022021.xlsx'
red_titers = parser.parse_raw_data(data_format=data_format,file_name=file_name,plate_type=plate_type,id_type='traverse')
red_titers.calculate()

data_format='default_titers'
plate_type = None
file_name = 'Bioreactor_Data.xlsx'
reactor = parser.parse_raw_data(data_format=data_format,file_name=file_name,id_type='traverse')
reactor.calculate()



Importing data from ReducingAgentTest_02022021.xlsx...0.2s
Parsing time point list...Parsed 8160 time points in 1.0s
Parsing analyte list...Parsed 96 single trials in 462.3ms
Parsing single trial list...Parsed 25 replicates in 0.1s
Analyzing data...Ran analysis in 2.0s


Importing data from ReducingAgentTest_Titers_02022021.xlsx...0.0s
Parsed 552 timeCourseObjects in 0.247s...Number of lines skipped:  0
Parsing time point list...Parsed 552 time points in 0.4s
Parsing analyte list...Parsed 92 single trials in 2824.8ms
Parsing single trial list...Parsed 24 replicates in 0.2s
Analyzing data...


invalid value encountered in true_divide



Ran analysis in 2.6s


Importing data from Bioreactor_Data.xlsx...0.0s
Parsed 189 timeCourseObjects in 0.077s...Number of lines skipped:  0
Parsing time point list...Parsed 189 time points in 0.0s
Parsing analyte list...Parsed 4 single trials in 47.8ms
Parsing single trial list...Parsed 4 replicates in 0.0s
Analyzing data...No blanks were indicated. Blank subtraction will not be done.Ran analysis in 0.2s



### Write functions to extract required analytes (yields of acetate, formate, lactate, pyruvate, succinate, growth rate and biomass yield) ###

In [4]:
def average_exponential_specific_productivity(replicate_trial):
    growth_rates = []
    for single_trial in replicate_trial.single_trials:
        OD600 = single_trial.analyte_dict['OD600'].pd_series
        growth_second_gradient = np.gradient(single_trial.analyte_dict['OD600'].specific_productivity.data)
        end_index = 18+np.argmax(growth_second_gradient[18:]>=-0.005)
        if single_trial.analyte_dict['OD600'].specific_productivity.data[end_index]<=0.05 or single_trial.analyte_dict['OD600'].specific_productivity.data[end_index]>=0.2:
            end_index = 18+np.argmax(single_trial.analyte_dict['OD600'].specific_productivity.data[18:]<=0.05)
        start_index = 3+np.argmax(single_trial.analyte_dict['OD600'].specific_productivity.data[3:])
        growth_rates.append(np.average(single_trial.analyte_dict['OD600'].specific_productivity.data[start_index:end_index]))
    return growth_rates


def average_exponential_specific_productivity_singletrial(replicate_trial):
    growth_rates = {}
    for single_trial in replicate_trial.single_trials:
        OD600 = single_trial.analyte_dict['OD600'].pd_series
        growth_second_gradient = np.gradient(single_trial.analyte_dict['OD600'].specific_productivity.data)
        end_index = 18+np.argmax(growth_second_gradient[18:]>=-0.005)
        if single_trial.analyte_dict['OD600'].specific_productivity.data[end_index]<=0.05 or single_trial.analyte_dict['OD600'].specific_productivity.data[end_index]>=0.2:
            end_index = 18+np.argmax(single_trial.analyte_dict['OD600'].specific_productivity.data[18:]<=0.05)
        start_index = 3+np.argmax(single_trial.analyte_dict['OD600'].specific_productivity.data[3:])
        growth_rate = np.average(single_trial.analyte_dict['OD600'].specific_productivity.data[start_index:end_index])
        growth_rates[str(single_trial.trial_identifier.replicate_id)] = growth_rate
    return growth_rates



def end_point_biomass(replicate_trial):
    end_point_od = []
    for single_trial in replicate_trial.single_trials:
        OD600 = single_trial.analyte_dict['OD600'].pd_series
        growth_second_gradient = np.gradient(single_trial.analyte_dict['OD600'].specific_productivity.data)
        end_index = 18+np.argmax(growth_second_gradient[18:]>=-0.005)
        if single_trial.analyte_dict['OD600'].specific_productivity.data[end_index]<=0.05 or single_trial.analyte_dict['OD600'].specific_productivity.data[end_index]>=0.2:
            end_index = 18+np.argmax(single_trial.analyte_dict['OD600'].specific_productivity.data[18:]<=0.05)
        end_point_od.append(single_trial.analyte_dict['OD600'].pd_series.iloc[end_index] - 
                            single_trial.analyte_dict['OD600'].pd_series.iloc[0])
    return end_point_od



def end_point_biomass_singletrial(replicate_trial):
    end_point_od = {}
    for single_trial in replicate_trial.single_trials:
        OD600 = single_trial.analyte_dict['OD600'].pd_series
        growth_second_gradient = np.gradient(single_trial.analyte_dict['OD600'].specific_productivity.data)
        end_index = 18+np.argmax(growth_second_gradient[18:]>=-0.005)
        if single_trial.analyte_dict['OD600'].specific_productivity.data[end_index]<=0.05 or single_trial.analyte_dict['OD600'].specific_productivity.data[end_index]>=0.2:
            end_index = 18+np.argmax(single_trial.analyte_dict['OD600'].specific_productivity.data[18:]<=0.05)
        end_point_od[str(single_trial.trial_identifier.replicate_id)] = single_trial.analyte_dict['OD600'].pd_series.iloc[end_index] - single_trial.analyte_dict['OD600'].pd_series.iloc[0]
    return end_point_od




def yield_calculator(reps, hplc_timepoint=-1, od_timepoint=-1):
    od_rep, hplc_rep = reps
    y_vals = []
    y_errs = []
    mass_dict = {'Lactate':89.078,
             'Acetate':59.052,
             'Formate':45.025,
             'Succinate':116.088,
             'Pyruvate':87.06,
             'Glucose':180.156}

    for analyte in analyte_list:
        
        #For analytes
        if analyte not in ['Growth Rate', 'Biomass']:
            #For plate expt
            if hplc_rep.blank:
                y = -hplc_rep.avg.analyte_dict[analyte].pd_series.iloc[hplc_timepoint]/hplc_rep.avg.analyte_dict['Glucose'].pd_series.iloc[hplc_timepoint] * (mass_dict['Glucose']/mass_dict[analyte])
                y_err =  y*np.sqrt((hplc_rep.std.analyte_dict[analyte].pd_series.iloc[hplc_timepoint]/hplc_rep.avg.analyte_dict[analyte].pd_series.iloc[hplc_timepoint])**2 + (hplc_rep.std.analyte_dict['Glucose'].pd_series.iloc[hplc_timepoint]/hplc_rep.avg.analyte_dict['Glucose'].pd_series.iloc[hplc_timepoint])**2)            #For bioreactor expt
            else:
                y = hplc_rep.avg.analyte_dict[analyte].pd_series[:hplc_timepoint].iloc[-1]/(hplc_rep.avg.analyte_dict['Glucose'].pd_series[0] - hplc_rep.avg.analyte_dict['Glucose'].pd_series[:hplc_timepoint].iloc[-1]) * (mass_dict['Glucose']/mass_dict[analyte])
                y_err = 0
        
        #For growth rate
        elif analyte=='Growth Rate':
            #For plate expt
            if od_rep.blank:
                specific_prod = average_exponential_specific_productivity(od_rep) 
                y = np.average(specific_prod)
                y_err = np.std(specific_prod)

            #For bioreactor expt
            else:
                y = np.average(np.gradient(od_rep.avg.analyte_dict['OD600'].pd_series[:od_timepoint].values)/np.gradient(od_rep.avg.analyte_dict['OD600'].pd_series[:od_timepoint].index.values)/od_rep.avg.analyte_dict['OD600'].pd_series[:od_timepoint].values)
                y_err = 0
        
                
        #For biomass yield
        elif analyte=='Biomass':
            #For plate expt
            if od_rep.blank:
                end_point_ods = end_point_biomass(od_rep)
                y = -np.average(end_point_ods)/hplc_rep.avg.analyte_dict['Glucose'].pd_series.iloc[od_timepoint] * (0.3) #0.3 g/L dry weight of cells in 1 unit of OD600
                y_err = np.std(end_point_ods)
                y_err = y*np.sqrt((y_err/np.average(end_point_ods))**2 + 
                                  (hplc_rep.std.analyte_dict['Glucose'].pd_series.iloc[od_timepoint]/hplc_rep.avg.analyte_dict['Glucose'].pd_series.iloc[od_timepoint])**2)
            #For bioreactor expt
            else:
                y = (od_rep.avg.analyte_dict['OD600'].pd_series[:od_timepoint].iloc[-1] - od_rep.avg.analyte_dict['OD600'].pd_series.iloc[0])/(hplc_rep.avg.analyte_dict['Glucose'].pd_series[0] - hplc_rep.avg.analyte_dict['Glucose'].pd_series[:od_timepoint].iloc[-1]) * (0.3) #0.3 g/L dry weight of cells in 1 unit of OD600
                y_err = 0
        y_vals.append(y)
        y_errs.append(y_err)        
    return y_vals, y_errs


## Loading individual replicate data for all experimental conditions to perform PCA ##

In [17]:
strain_list = ['WT', 'Δ(adh,pta)-D1', 'Δ(adh,pta)-D28', 'Δ(adh,pta)-D59']
condition_list = ['RDM + 7.0 a.u. pH', 'RDM + 1.0 mM Cysteine', 'RDM + 1.0 mM DTT', 'RDM + 8.0 mM Na2S', 'RDM + 8.0 a.u. pH']
analyte_list = ['Acetate', 'Formate', 'Lactate', 'Pyruvate', 'Succinate', 'Biomass', 'Growth Rate', 'OD0', 'OD2', 'OD4', 'OD6', 'OD8']
mass_dict = {'Lactate':89.078,
         'Acetate':59.052,
         'Formate':45.025,
         'Succinate':116.088,
         'Pyruvate':87.06,
         'Glucose':180.156}
st_df = pd.DataFrame()
for strain in strain_list:
    if strain.lower() != 'blank':
        for condition in condition_list:
            temp_df = pd.DataFrame()
            od_rep = [rep for rep in red_od.replicate_trials if rep.trial_identifier.strain.name==strain and str(rep.trial_identifier.media)==condition][0]
            hplc_rep = [rep for rep in red_titers.replicate_trials if rep.trial_identifier.strain.name==strain and str(rep.trial_identifier.media)==condition][0] 
            for analyte in ['Glucose']+analyte_list:
                if analyte not in ['Biomass', 'Growth Rate', 'OD0', 'OD2', 'OD4', 'OD6', 'OD8']:
                    temp_df = pd.concat([temp_df,hplc_rep.replicate_df[analyte].transpose().rename(columns={24:analyte.lower()})], axis=1)

                elif analyte=='Growth Rate':
                    temp_df = pd.concat([temp_df, pd.Series(average_exponential_specific_productivity_singletrial(od_rep))],axis=1).rename(columns={0:'growth_rate'})
                    
                elif analyte=='Biomass':
                    temp_df = pd.concat([temp_df, pd.Series(end_point_biomass_singletrial(od_rep))*0.3],axis=1).rename(columns={0:'biomass'})
                elif 'OD' in analyte:
                    temp_df = pd.concat([temp_df, pd.DataFrame(od_rep.replicate_df['OD600'].transpose()[int(analyte.lstrip('OD'))]).rename(columns={int(analyte.lstrip('OD')):analyte.lower()})], axis=1)
            temp_df.reset_index(level=0, inplace=True)
            temp_df = temp_df.rename(columns={'index':'replicate'})
            temp_df = temp_df.fillna(temp_df.mean())
            temp_df['biomass_yield'] = -temp_df['biomass']/temp_df['glucose']
            temp_df['acetate_yield'] = -temp_df['acetate']/temp_df['glucose']*mass_dict['Glucose']/mass_dict['Acetate']
            temp_df['formate_yield'] = -temp_df['formate']/temp_df['glucose']*mass_dict['Glucose']/mass_dict['Formate']
            temp_df['lactate_yield'] = -temp_df['lactate']/temp_df['glucose']*mass_dict['Glucose']/mass_dict['Lactate']
            temp_df['pyruvate_yield'] = -temp_df['pyruvate']/temp_df['glucose']*mass_dict['Glucose']/mass_dict['Pyruvate']
            temp_df['succinate_yield'] = -temp_df['succinate']/temp_df['glucose']*mass_dict['Glucose']/mass_dict['Succinate']
            temp_df['strain'] = strain
            temp_df['condition'] = condition
            temp_df['starting_glucose'] = hplc_rep.blank.avg.analyte_dict['Glucose'].pd_series[24]
            temp_df = temp_df.drop(columns=['biomass'])
            temp_df = temp_df.sort_values(by=['strain', 'condition', 'replicate'])
            st_df = pd.concat([st_df,temp_df], axis=0)

analyte_list = ['Acetate', 'Formate', 'Lactate', 'Pyruvate', 'Succinate', 'Biomass', 'Growth Rate']
for strain in strain_list:
    temp_df = pd.DataFrame()
    rep = [rep for rep in reactor.replicate_trials if rep.trial_identifier.strain.name==strain][0]
    y,temp = yield_calculator([rep,rep],od_timepoint=8, hplc_timepoint=8)
    entry = {analyte.lower()+'_yield': y[i] for i,analyte in enumerate(analyte_list)}
    entry['growth_rate'] = entry.pop('growth rate_yield')
    temp_df = temp_df.append(entry,ignore_index=True)
    temp_df = temp_df.rename(columns={'biomass': 'biomass_yield'})
    temp_df['strain'] = strain
    temp_df['condition'] = 'reactor'
    temp_df['replicate'] = '1'
    for analyte in ['OD0', 'OD2', 'OD4', 'OD6', 'OD8']:
        temp_df[analyte.lower()] = rep.replicate_df['OD600'].loc[int(analyte.lstrip('OD'))].values[0]
        
    temp_df['glucose'] = rep.avg.analyte_dict['Glucose'].pd_series.loc[8] - rep.avg.analyte_dict['Glucose'].pd_series.loc[0]
    for analyte in ['acetate', 'lactate',  'formate', 'pyruvate', 'succinate']:
            temp_df[analyte] = -temp_df[analyte+'_yield']*mass_dict[analyte.title()]/mass_dict['Glucose']*temp_df['glucose']
    temp_df['starting_glucose'] = rep.avg.analyte_dict['Glucose'].pd_series.loc[0]

    st_df = pd.concat([st_df,temp_df], axis=0)


st_df = st_df.reset_index(level=0, drop=True)


### PCA with 6 components on standard scaled phenotype data ###

In [20]:
features = ['acetate', 'formate', 'lactate', 'pyruvate', 'succinate', 'growth_rate', 'biomass_yield']

pca = PCA(n_components=6)
x = st_df[features].values
y = st_df[['strain','condition']].values
x = StandardScaler().fit_transform(x)
pcs = pca.fit_transform(x)
principal_df = pd.DataFrame(data=pcs[:,:4], columns = ['PC1', 'PC2','PC3','PC4'])

final_df = st_df.copy(deep=True)
final_df = final_df.drop(columns=features+['replicate','glucose','starting_glucose'])
final_df = pd.concat([final_df,principal_df], axis=1)

### Plot cumulative variance represented by each component ###

In [30]:
trace_list = []

trace_list.append(go.Scatter(x=np.arange(1,7),
                             y=np.cumsum(pca.explained_variance_ratio_),
                             mode='lines+markers',
                             line=dict(width=1.5, color='rgb(33,100,175)'),
                             name=analyte))


layout = go.Layout(height=425, width=405, legend_x= 0.1, legend_y=-0.15, legend_font=dict(family='Myriad Pro', size=12, color='black'), legend_orientation='h',
                   xaxis=dict(title='Number of Components',
                              title_standoff=0.5,
                              titlefont=dict(family='Myriad Pro', size=16, color='black'),
                              showline=True, linewidth=1, linecolor='black', mirror=True, side='bottom',
                              ticks='outside', ticklen=4, tickangle=0, nticks=8,
                              tickfont=dict(size=16, family='Myriad Pro', color='black'), tickcolor='black',
                              showgrid=True,zeroline=True),
                  
                  yaxis=dict(title='Cumulative Variance Explained',
                            titlefont=dict(family='Myriad Pro', size=16, color='black'),

                              anchor='x', side='left', showgrid=True, zeroline=True,
                            tickfont=dict(family='Myriad Pro',size=16, color='black'), tickcolor='black', 

                              showline=True, linewidth=1, linecolor='black', mirror=True,
                              ticks='outside', ticklen=4, tickangle=0))



            
fig = go.Figure(data = trace_list, layout=layout)
plot(fig)
#pio.write_image(fig,"/Figures/si_fig_red_pca_variance.svg",format='svg')


### Plot scores of first 3 PCs ###

In [31]:
strain_list = ['WT', 'Δ(adh,pta)-D1', 'Δ(adh,pta)-D28', 'Δ(adh,pta)-D59']
condition_list = ['RDM + 7.0 a.u. pH', 'RDM + 1.0 mM Cysteine', 'RDM + 1.0 mM DTT', 'RDM + 8.0 mM Na2S', 'RDM + 8.0 a.u. pH']
analyte_list = ['Acetate', 'Formate', 'Lactate', 'Pyruvate', 'Succinate', 'Biomass', 'Growth Rate']

symbols = ['circle', 'x', 'diamond', 'square']

colors = ['rgb(33, 100, 175)',
          'rgb(217, 72, 42)',
          'rgb(146, 54, 132)',
          'rgb(175, 127, 91)',
         'rgb(166.0, 193, 223)']
fig = make_subplots(rows=3, cols=3, vertical_spacing=0.05, shared_xaxes=False, shared_yaxes=False, horizontal_spacing=0.05)

for k in range(3):
    for l in range(3):
        if k!=l:
            for i, strain in enumerate(strain_list):
                for j,condition in enumerate(['reactor'] + condition_list):
                    if condition!='reactor':
                        fig.add_trace(go.Scatter(x=final_df[(final_df['strain']==strain) & (final_df['condition']==condition)]['PC'+str(l+1)],
                                                     y=final_df[(final_df['strain']==strain) & (final_df['condition']==condition)]['PC'+str(k+1)],
                                                     mode='markers', showlegend=False,
                                                     marker=dict(symbol=symbols[i], size=8, color=colors[j-1],
                                                                 line=dict(color='black', width=0.5))),
                                     row=k+1, col=l+1)
                    else:

                        fig.add_trace(go.Scatter(x=final_df[(final_df['strain']==strain) & (final_df['condition']==condition)]['PC'+str(l+1)],
                                                     y=final_df[(final_df['strain']==strain) & (final_df['condition']==condition)]['PC'+str(k+1)],
                                                     mode='markers', showlegend=False,
                                                     marker=dict(symbol=symbols[i], size=12, color='rgb(54, 146, 68)',
                                                                 line=dict(color='black', width=0.5))),
                                     row=k+1, col=l+1)
        
        if k==2:
            fig.update_xaxes(title='Principal Component '+str(l+1), ticks='outside', ticklen=4, tickangle=0, nticks=8, title_standoff=0.5,

                              titlefont=dict(family='Myriad Pro', size=16, color='black'),row=k+1, col=l+1)
        if l==0:
            fig.update_yaxes(title='Principal Component '+str(k+1), ticks='outside', ticklen=4, tickangle=0, title_standoff=0,
                              titlefont=dict(family='Myriad Pro', size=16, color='black'),row=k+1, col=l+1)
            
        if k!=2:
            fig.update_xaxes(ticks='')

                        
                        
fig.update_layout(height=800, width=800, legend_x= 0.1, legend_y=-0.15, legend_font=dict(family='Myriad Pro', size=12, color='black'), legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True, side='bottom',
                              ticks='outside', ticklen=4, tickangle=0, nticks=8,
                              tickfont=dict(size=16, family='Myriad Pro', color='black'), tickcolor='black',
                              showgrid=True,zeroline=True)
fig.update_yaxes(showgrid=True, zeroline=True,
                            tickfont=dict(family='Myriad Pro',size=16, color='black'), tickcolor='black', 

                              showline=True, linewidth=1, linecolor='black', mirror=True,
                              ticks='outside', ticklen=4, tickangle=0)
plot(fig)
#pio.write_image(fig,"/Figures/si_fig_red_pca_scores.svg",format='svg')


### Plot loadings of first 3 PCs ###

In [32]:
fig = make_subplots(rows=3, cols=3, vertical_spacing=0.075, shared_xaxes=False, shared_yaxes=False, horizontal_spacing=0.075)
for k in range(3):
    for l in range(3):
        if k!=l:
            for i, analyte in enumerate(features):
                fig.add_trace(go.Scatter(x=[0]+[pca.components_[l,i]],
                                             y=[0]+[pca.components_[k,i]],
                                             mode='lines', showlegend=False,
                                             line=dict(width=1.5, color='rgb(54, 146, 68)'),
                                             name=analyte), row=k+1, col=l+1)
                if k==2:
                    fig.update_xaxes(title='Principal Component '+str(l+1), ticks='outside', ticklen=4, tickangle=0, nticks=8, title_standoff=0.5,

                                      titlefont=dict(family='Myriad Pro', size=16, color='black'),row=k+1, col=l+1)
                if l==0:
                    fig.update_yaxes(title='Principal Component '+str(k+1), ticks='outside', ticklen=4, tickangle=0, title_standoff=0,
                                      titlefont=dict(family='Myriad Pro', size=16, color='black'),row=k+1, col=l+1)

                if k!=2:
                    fig.update_xaxes(ticks='')

                        
                        
fig.update_layout(height=800, width=800, legend_x= 0.1, legend_y=-0.15, legend_font=dict(family='Myriad Pro', size=12, color='black'), legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True, side='bottom',
                              ticks='outside', ticklen=4, tickangle=0, nticks=8,
                              tickfont=dict(size=16, family='Myriad Pro', color='black'), tickcolor='black',
                              showgrid=False,zeroline=True)
fig.update_yaxes(showgrid=False, zeroline=True,
                            tickfont=dict(family='Myriad Pro',size=16, color='black'), tickcolor='black', 

                              showline=True, linewidth=1, linecolor='black', mirror=True,
                              ticks='outside', ticklen=4, tickangle=0)
            
plot(fig)
#pio.write_image(fig,"Figures/si_fig_red_pca_loadings.svg",format='svg')


### Plotting function for tsne ###

In [33]:
layout = go.Layout(height=475, width=430, legend_x= 0.1, legend_y=-0.15, legend_font=dict(family='Myriad Pro', size=12, color='black'), legend_orientation='h',
                   xaxis=dict(title='Component 1',
                              title_standoff=0.5,
                              titlefont=dict(family='Myriad Pro', size=16, color='black'),
                              showline=True, linewidth=1, linecolor='black', mirror=True, side='bottom',
                              ticks='outside', ticklen=4, tickangle=0, nticks=8,
                              tickfont=dict(size=16, family='Myriad Pro', color='black'), tickcolor='black',
                              showgrid=True,zeroline=True),
                  
                  yaxis=dict(title='Component 2',
                            titlefont=dict(family='Myriad Pro', size=16, color='black'),

                              anchor='x', side='left', showgrid=True, zeroline=True,
                            tickfont=dict(family='Myriad Pro',size=16, color='black'), tickcolor='black', 

                              showline=True, linewidth=1, linecolor='black', mirror=True,
                              ticks='outside', ticklen=4, tickangle=0))



colors = ['rgb(33, 100, 175)',
          'rgb(217, 72, 42)',
          'rgb(146, 54, 132)',
          'rgb(175, 127, 91)',
         'rgb(166.0, 193, 223)']

symbols = ['circle', 'x', 'diamond', 'square']

def tsne_plot(features,perplexity=10,learning_rate=250):
    perplexity = perplexity
    learning_rate = learning_rate
    
    x = st_df[features].values
    y = st_df[['strain','condition']].values
    
    x = StandardScaler().fit_transform(x)
    
    tsn = TSNE(perplexity=perplexity, learning_rate = learning_rate, init='pca', n_iter=20000, n_iter_without_progress=2500, verbose=0).fit_transform(x)
    tsn_df = pd.DataFrame(data=tsn, columns = ['tsn1', 'tsn2'])
    tsne_df = st_df.copy(deep=True)
    tsne_df = pd.concat([tsne_df,tsn_df], axis=1)
    print("TSNE")
    print("Learning Rate: ", learning_rate)
    print("Perplexity: ",perplexity)
    trace_list = []

    for i, strain in enumerate(strain_list):
        for j,condition in enumerate(['reactor'] + condition_list):
            if condition!='reactor':
                trace_list.append(go.Scatter(x=tsne_df[(tsne_df['strain']==strain) & (tsne_df['condition']==condition)]['tsn1'],
                                             y=tsne_df[(tsne_df['strain']==strain) & (tsne_df['condition']==condition)]['tsn2'],
                                             mode='markers', showlegend=False,
                                             marker=dict(symbol=symbols[i], size=8, color=colors[j-1],
                                                         line=dict(color='black', width=0.5))))
            else:
                trace_list.append(go.Scatter(x=tsne_df[(final_df['strain']==strain) & (tsne_df['condition']==condition)]['tsn1'],
                                 y=tsne_df[(tsne_df['strain']==strain) & (tsne_df['condition']==condition)]['tsn2'],
                                 mode='markers', name=strain,
                                 marker=dict(symbol=symbols[i], size=12, color='rgb(54, 146, 68)',
                                line=dict(color='black', width=0.5))))
    fig = go.Figure(data=trace_list, layout=layout)     
    plot(fig)
    return fig


In [34]:
features = [ 'acetate_yield', 
             'formate_yield',
             'lactate_yield',
             'pyruvate_yield',
             'succinate_yield',
            'growth_rate']
fig = tsne_plot(features,15,10)
#pio.write_image(fig,"/Figures/si_fig_red_tsne.svg",format='svg')


TSNE
Learning Rate:  10
Perplexity:  15


### Plot analyte data for all strains ###

In [65]:
strain_list = ['WT','Δ(adh,pta)-D1', 'Δ(adh,pta)-D28', 'Δ(adh,pta)-D59']
analyte_list = ['Acetate', 'Formate', 'Lactate', 'Pyruvate', 'Succinate', 'Growth Rate']
condition_list = ['RDM + 7.0 a.u. pH', 'RDM + 1.0 mM Cysteine', 'RDM + 1.0 mM DTT', 'RDM + 8.0 mM Na2S', 'RDM + 8.0 a.u. pH']
condition_titles = ['Base Media', 'Cysteine', 'DTT', 'Sulfide', 'pH 8']

colors = ['rgb(33, 100, 175)',
          'rgb(217, 72, 42)',
          'rgb(146, 54, 132)',
          'rgb(175, 127, 91)',
         'rgb(166.0, 193, 223)']

od_timepoint = 8
hplc_timepoint = 8

ranges= [(0,2), (0,0.04)]

for strain in strain_list:
    print(strain)
    trace_list = []
    rep = [rep for rep in reactor.replicate_trials if rep.trial_identifier.strain.name == strain][0]
    x = [analyte for analyte in analyte_list]
    y, yerrs = yield_calculator((rep,rep), od_timepoint=od_timepoint, hplc_timepoint=hplc_timepoint)
    
    trace_list.append(go.Bar(x=x[:-1],
                     y=y[:-1],
                     marker=dict(color='rgb(54, 146, 68)', line=dict(width=0, color='rgb(54, 146, 68)')),
                     name='Bioreactor'))
    
    trace_list.append(go.Bar(x=x[-1:],
                             y=y[-1:],
                             marker=dict(color='rgb(54, 146, 68)', line=dict(width=0, color='black')),showlegend=False,
                             name='Bioreactor', yaxis='y2'))

    for i, condition in enumerate(condition_list):
        od_rep = [rep for rep in red_od.replicate_trials if str(rep.trial_identifier.media)==condition_list[i] and rep.trial_identifier.strain.name==strain][0]
        hplc_rep = [rep for rep in red_titers.replicate_trials if str(rep.trial_identifier.media)==condition_list[i] and rep.trial_identifier.strain.name==strain][0]
        y, y_errs = yield_calculator((od_rep,hplc_rep), od_timepoint=-1, hplc_timepoint=-1)


        trace_list.append(go.Bar(x=x[:-1],
                                 y=y[:-1],
                                 error_y=dict(type='data', array=y_errs[:-1], thickness=1.5, width=3, color='black'),
                                 marker=dict(color=colors[i], line=dict(width=0, color='rgb(54, 146, 68)')), width=0.09,
                                 name=condition_titles[i]))

        trace_list.append(go.Bar(x=x[-1:],
                         y=y[-1:],
                         error_y=dict(type='data', array=y_errs[-1:], thickness=1.5, width=3, color='black'),
                         marker=dict(color=colors[i], line=dict(width=0, color='black')),showlegend=False, width=0.09,
                         yaxis='y2',
                         name=condition_titles[i]))



    layout = go.Layout(height=400, width=925, legend_x= 0.1, bargroupgap=0.1, bargap=0.4,
                       legend_orientation='h',legend_y=-0.2, legend_font=dict(family='Myriad Pro', color='black', size=14),

                       xaxis=dict(title='Product', title_standoff=5,
                                  titlefont=dict(family='Myriad Pro', size=18, color='black'),
                                  showline=True, linewidth=1, linecolor='black', mirror=True, side='bottom',
                                  ticks='outside', ticklen=4, tickangle=0, nticks=8,
                                  tickfont=dict(size=16, family='Myriad Pro', color='black'), tickcolor='black',
                                  showgrid=False,zeroline=False),

                       yaxis=dict(title='Product Yield<br>(mmol Product/mmol Glucose)',
                                  titlefont=dict(family='Myriad Pro', size=18, color='black'),
                                  showline=True, linewidth=1, linecolor='black', mirror=True, range=(0,1.8),
                                  ticks='outside', ticklen=4, tickangle=0, side='left', nticks=5,
                                  tickfont=dict(family='Myriad Pro',size=16, color='black'), tickcolor='black', 
                                  showgrid=True,zeroline=False),
                       
                      yaxis2=dict(title='Biomass Yield<br>(g Biomass/g Glucose)',
                                  titlefont=dict(family='Myriad Pro', size=18, color='black'),
                                  showline=True, linewidth=1, linecolor='black', mirror=True, range=(0,0.9),nticks=4,dtick=0.25,
                                  ticks='outside', ticklen=4, tickangle=0, side='right', anchor='x', overlaying='y', 
                                  tickfont=dict(family='Myriad Pro',size=16, color='black'), tickcolor='black', 
                                  showgrid=True,zeroline=False))

    if trace_list:
        fig = go.Figure(data=trace_list, layout=layout)
        plot(fig)#, image='svg')
        #pio.write_image(fig,"Figures/si_fig_5_"+strain+".svg",format='svg')


WT



invalid value encountered in double_scalars



Δ(adh,pta)-D1


Δ(adh,pta)-D28


Δ(adh,pta)-D59
