In [8]:
import pandas as pd
import numpy as np
from pathlib import Path

import sys
sys.path.append('../../code/')
import leakage

# This notebook is used to map intracellular data to extracelluar conditions
This is necessary to get the most relevant intracelluar concentrations from literature values

In [5]:
data_folder = Path('../../data')
figure_folder = Path('../../Figures/')
div_folder = Path('../../../data/this_project/5_div/')


## Get extracellular data

In [96]:
sintef_fn = data_folder / 'this_project/1_e_coli_batch_cultures/1ABE_merged_metabolomics_data.csv'
paczia_fn = data_folder / 'paczia_2012' / 'e_coli'/'e_coli_exometabolites.csv'
vila_fn = data_folder / 'vila_2023' / 'Targeted_LCMS.csv'


In [97]:
sintef_df = pd.read_csv(sintef_fn, index_col=0)
# Only intrerested in the absolute quantification
sintef_df = sintef_df.loc[sintef_df['Concentration [uM]'].notna()]

In [34]:
paczia_df, paczia_df_std = leakage.get_concentrations(paczia_fn.parent, 'e_coli')

In [110]:
vila_df = pd.read_csv(vila_fn)
vila_df = vila_df.loc[vila_df.Strain == 'Ecoli']

## Mapping


In [107]:
mapping_df = pd.read_csv(data_folder / 'this_project/5_div/5B_id_mapping.csv', index_col=0)
met_abrv_to_id = mapping_df['Ecoli metabolite'].to_dict()
met_abrv_to_name = mapping_df['Metabolite name'].to_dict()
met_name_to_abrv_paczia = {value:key for key, value in met_abrv_to_name.items()}


## Find conditions

In [188]:
conditions_metabolites = []
for cs in sintef_df['Carbon source'].unique():
    df_cs = sintef_df.loc[sintef_df['Carbon source']==cs]
    for m in df_cs.Metabolite.unique():
        for phase in ['Stationary', 'Exponential']:
            conditions_metabolites.append([cs, phase, m])

for m in paczia_df.columns:
    m_name = met_abrv_to_name[m]
    for phase in ['Stationary', 'Exponential']:
        conditions_metabolites.append(['Glucose', phase, m_name])



In [189]:
vila_temp = vila_df.groupby(['Carbon_Source', 'Metabolite', 'Timepoint']).agg({'Strain':'first'}).reset_index()
vila_temp['Phase'] = 'Unknown'
vila_list = vila_temp[['Carbon_Source','Phase', 'Metabolite']].values.tolist()

In [210]:
conditions_df = pd.DataFrame(conditions_metabolites + vila_list, columns= ['Carbon source', 'Phase', 'Metabolite'])



# Get intracellular data

In [296]:
all_conc_fn = data_folder / 'this_project' / '5_div' / '5E_intracellular_concentrations.csv'
intra_df = pd.read_csv(all_conc_fn, index_col=0)


# Map the different conditions/extracellular metabolites to intracellular values
We don't differ between shake flask and bioreactor, as the difference seems small (Thorfinnsdottir et al., https://www.mdpi.com/2218-1989/13/2/150)

In [299]:

for i, row in conditions_df.iterrows():
    met = row['Metabolite']
    # print(i, row)
    intra_i = intra_df.loc[intra_df.Metabolite==met]
    if not len(intra_i):
        conditions_df.at[i, 'Intracellular concentration [uM]'] = np.nan
        continue
    minimal_idx = intra_i['Minimal/complex']=='Minimal'
    if np.sum(minimal_idx):
        idx = minimal_idx
        if row['Phase'] == 'Exponential':
            log_phase_idx = intra_i.loc[idx].growth_status.str.lower().str.contains('log')
            if np.sum(log_phase_idx):
                idx = idx & log_phase_idx
        elif row['Phase'] == 'Stationary':
            stat_phase_idx = intra_i.loc[idx].growth_status.str.lower().str.contains('stationary')
            if np.sum(stat_phase_idx):
                idx = idx & stat_phase_idx

        # Carbon source
        cs_idx = intra_i.loc[idx, 'Carbon source'].str.lower() == row['Carbon source'].lower()
        if np.sum(cs_idx):
            idx = idx & cs_idx
    else:
        idx = np.ones(len(intra_i)).astype(bool)
        
    concentration = intra_i.loc[idx].concentration.mean()
    n = len(intra_i.loc[idx])
    matched_conc_idx = ",".join([str(x) for x in intra_i.loc[idx].index])
    if n == 1:
        error = intra_i.loc[idx].error.mean()
        sem = error/1.96
    else:
        sem = intra_i.loc[idx, 'concentration'].sem()
        error = 1.96*sem
    
    conditions_df.at[i,'Intracellular concentration [uM]'] = concentration
    conditions_df.at[i,'Error [uM]'] = error
    conditions_df.at[i,'SEM [uM]'] = sem
    conditions_df.at[i,'# values'] = n
    conditions_df.at[i,'idx concentration sheet'] = matched_conc_idx
    conditions_df.at[i,'Min'] = intra_i.loc[idx].concentration.min()
    conditions_df.at[i,'Max'] = intra_i.loc[idx].concentration.max()


In [300]:
conditions_fn = data_folder / 'this_project/4_paired_metabolomics_live_dead/4F_mapped_intracellular_conc.csv'

conditions_df.to_csv()

Unnamed: 0,Carbon source,Phase,Metabolite,Intracellular concentration [uM],Error [uM],# values,idx concentration sheet,SEM [uM],Min,Max
0,L-malate,Stationary,Alanine,358.0,0.000000,1.0,10,0.000000,358.000000,358.0
1,L-malate,Exponential,Alanine,1197.9,825.752376,5.0,1615131214,421.302233,349.166667,2550.0
2,L-malate,Stationary,Alpha-aminoadipate,,,,,,,
3,L-malate,Exponential,Alpha-aminoadipate,,,,,,,
4,L-malate,Stationary,Asparagine,103.0,0.000000,1.0,34,0.000000,103.000000,103.0
...,...,...,...,...,...,...,...,...,...,...
691,Pyruvate,Unknown,alpha-Ketoglutaric acid,,,,,,,
692,Pyruvate,Unknown,alpha-Ketoglutaric acid,,,,,,,
693,Pyruvate,Unknown,beta-Hydroxybutyric acid,,,,,,,
694,Pyruvate,Unknown,beta-Hydroxybutyric acid,,,,,,,
