# Compute heavy fractions

In [1]:
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from pathlib import Path
from scipy.stats import binom

In [2]:
input_data_path = Path('00_input_data')
mid_data_path = Path('01_mid_data')
heavy_fractions_path = Path('02_heavy_fractions')

In [3]:
def _index_levels_to_int(df: pd.DataFrame, levels: list, axis: int) -> None:
    for level in levels:
        df.rename(int, level=level, axis=axis, inplace=True)

In [4]:
def import_mids(file_name: Path) -> pd.DataFrame:
    mids = pd.read_csv(file_name, header=[0, 1], index_col=[0, 1, 2, 3, 4])
    _index_levels_to_int(mids, [2, 3, 4], axis=0)
    _index_levels_to_int(mids, [1], axis=1)
    return mids.sort_index()

In [5]:
# move to simpleflux package
def binomial_mid(n: int, p_heavy = 0.0107) -> np.array:
    return np.array([binom.pmf(k, n, p_heavy) for k in range(n+1)])

## Import MID data
We use only cell extract without standards, and only well-measured metabolites

### U-13C-methionine

In [6]:
met_mids = import_mids(mid_data_path / 'U13C-met_mids.csv')
met_mids.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,metabolite,hcys,hcys,cyst,cyst,cys,met,met,met,sam,sam,sam,sah,sah
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,mi,0,4,0,4,0,0,4,5,0,4,5,0,4
matrix,cell_type,time_minutes,with_standards,replicate_nr,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2
Cells,BJ-RAS,5,0,1,,,1.0,0.0,1.0,0.049983,0.038196,0.911821,0.869159,0.0,0.130841,1.0,0.0
Cells,BJ-RAS,5,0,2,,,1.0,0.0,1.0,0.039748,0.037279,0.922972,0.843118,0.0,0.156882,1.0,0.0
Cells,BJ-RAS,15,0,1,,,1.0,0.0,1.0,0.025675,0.039879,0.934446,0.510197,0.019301,0.470503,0.0,1.0
Cells,BJ-RAS,15,0,2,,,1.0,0.0,1.0,0.028325,0.042716,0.928959,0.504703,0.021287,0.47401,0.0,1.0
Cells,BJ-RAS,30,0,1,,,1.0,0.0,1.0,0.027776,0.043632,0.928592,0.252247,0.033658,0.714094,0.0,1.0


### D4-homocysteine

In [7]:
hcys_mids = import_mids(mid_data_path / 'D4-hcys_mids.csv')
hcys_mids.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,metabolite,hcys,hcys,cyst,cyst,cys,met,met,sam,sam,sah,sah
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,mi,0,4,0,4,0,0,4,0,4,0,4
matrix,cell_type,time_minutes,with_standards,replicate_nr,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
Cells,BJ-RAS,5,0,1,0.025623,0.974377,1.0,0.0,1.0,0.834521,0.165479,0.938673,0.061327,0.906442,0.093558
Cells,BJ-RAS,5,0,2,0.022464,0.977536,1.0,0.0,1.0,0.80732,0.19268,0.939687,0.060313,0.898653,0.101347
Cells,BJ-RAS,15,0,1,0.02217,0.97783,1.0,0.0,1.0,0.83522,0.16478,0.893253,0.106747,0.850411,0.149589
Cells,BJ-RAS,15,0,2,0.015744,0.984256,1.0,0.0,1.0,0.812176,0.187824,0.878791,0.121209,0.828471,0.171529
Cells,BJ-RAS,30,0,1,0.026149,0.973851,0.966616,0.033384,1.0,0.86272,0.13728,0.800938,0.199062,0.769718,0.230282


## Estimate tracer purity

The expected tracer MID is $Bin(n, p_t)$ where $p_t$ is the purity. For the $n$'th MI the expected isotopomer fraction is $x_n = p_t^n$, so a simple estimate the tracer purity is $p_t = (x_n)^{1/n}$ 

### U-13C-methionine

In [8]:
met_fresh_medium_mi_fractions = met_mids.loc[('Medium', 'none', 0, 0)]['met'].iloc[0]
met_fresh_medium_mi_fractions

mi
0    0.000996
4    0.037897
5    0.961107
Name: 1, dtype: float64

In [9]:
met_tracer_purity = np.power(met_fresh_medium_mi_fractions.loc[5], 1/5)
met_tracer_purity

0.9920975111953164

Check residuals

In [10]:
met_expected_mid = [
    binom.pmf(mi, 5, met_tracer_purity)
    for mi in met_fresh_medium_mi_fractions.index
]
met_fresh_medium_mi_fractions - met_expected_mid

mi
0    9.957899e-04
4   -3.811084e-04
5   -2.220446e-16
Name: 1, dtype: float64

### D4-homocysteine

In [11]:
hcys_fresh_medium_mi_fractions = hcys_mids.loc[('Medium', 'none', 0, 0)]['hcys'].iloc[0]
hcys_fresh_medium_mi_fractions

mi
0    0.0013
4    0.9987
Name: 1, dtype: float64

In [12]:
hcys_tracer_purity = np.power(hcys_fresh_medium_mi_fractions.loc[4], 1/4)
hcys_tracer_purity

0.9996748734645691

Check residuals

In [13]:
hcys_expected_mid = [
    binom.pmf(mi, 4, hcys_tracer_purity)
    for mi in hcys_fresh_medium_mi_fractions.index
]
hcys_fresh_medium_mi_fractions - hcys_expected_mid

mi
0    0.0013
4    0.0000
Name: 1, dtype: float64

## Mixture model

We have a mixture model
$$y = x_0 y^0 + x_1 y^1$$
where $y$ the observed MID, $y^0$ is the natural MID and $y^1$ is the tracer MID. Solve for coefficients $x$ such that $x_0 + x_1 = 1$. Since we do not measure all mass isotopomers of $y$, we can only determine the measured components up to an unknown scale factor, $c$ so that we observe $y' = c y$. Multiplying the above with $c$,
$$y' = c y = c x_0 y^0 + c x_1 y^1$$
Fitting this model to data yields the coefficients $x' = c x$. To recover the fractions $x$, we normalize $x$ to sum to 1.

For larger metabolites that contain the tracer as a moiety (e.g. SAM) the distribution $y^1$ is a convolution between the tracer distribution and the natural distribution over the remaining carbons.

In [14]:
def convolution_matrix(x_mid: np.array, n_y: int) -> np.array:
        n_x = len(x_mid) - 1
        n_conv = n_x + n_y
        conv_mat = np.zeros(shape=[n_conv + 1, n_y + 1])
        for i in range(n_y + 1):
            conv_mat[i:(i + n_x + 1), i] = x_mid
        return conv_mat

In [15]:
def convolute(x_mid: np.array, y_mid: np.array) -> np.array:
    n_y = len(y_mid) - 1
    return convolution_matrix(x_mid, n_y) @ y_mid

In [16]:
def heavy_mid(n: int, n_tracer: int, tracer_purity: float) -> np.array:
    if n > n_tracer:
        return convolute(binomial_mid(n_tracer, tracer_purity), binomial_mid(n - n_tracer))
    else:
        return binomial_mid(n_tracer, tracer_purity)

In matrix form, the system to solve is
$$ (y^0 \ y^1) cx = cy $$
where $y, y^0, y^1$ contain the observed MIs only.

In [17]:
def estimate_heavy_fraction(mids: np.array, n_carbons: int, n_tracer_carbons: int, measured_mi: list, tracer_purity: float) -> np.array:
    if n_tracer_carbons == 0:
        return np.zeros(len(mids))
    mixture_basis = np.array([
        binomial_mid(n_carbons)[measured_mi],
        heavy_mid(n_carbons, n_tracer_carbons, tracer_purity)[measured_mi]
    ]).T
    rhs = mids.T
    coeff, _, _, _ = np.linalg.lstsq(mixture_basis, rhs, rcond=-1)
    return coeff[1] / np.clip(coeff.sum(axis=0), 1e-6, 1e+6)

In [18]:
def estimate_heavy_fraction_pd(mids: pd.DataFrame, n_carbons: int, n_tracer_carbons: int, tracer_purity: float) -> pd.Series:
    measured_mi = mids.columns.get_level_values('mi')
    return pd.Series(
        estimate_heavy_fraction(
            mids, n_carbons, n_tracer_carbons, measured_mi, tracer_purity),
        index=mids.index
    )

## Estimate heavy fractions

In [19]:
n_carbons = {'cys': 3, 'cyst': 7, 'hcys': 4, 'met': 5, 'sah': 14, 'sam': 15}
met_n_tracer_carbons = {'cys': 0, 'cyst': 4, 'hcys': 4, 'met': 5, 'sah': 4, 'sam': 5}
hcys_n_tracer_carbons = {'cys': 0, 'cyst': 4, 'hcys': 4, 'met': 4, 'sah': 4, 'sam': 4}

### U-13C-methioine

In [20]:
met_metabolites_to_use = ['cyst', 'met', 'sam']
met_mids_to_use = met_mids.loc['Cells'].xs(0, level='with_standards')[met_metabolites_to_use]
met_mids_to_use.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,metabolite,cyst,cyst,met,met,met,sam,sam,sam
Unnamed: 0_level_1,Unnamed: 1_level_1,mi,0,4,0,4,5,0,4,5
cell_type,time_minutes,replicate_nr,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
BJ-RAS,5,1,1.0,0.0,0.049983,0.038196,0.911821,0.869159,0.0,0.130841
BJ-RAS,5,2,1.0,0.0,0.039748,0.037279,0.922972,0.843118,0.0,0.156882
BJ-RAS,15,1,1.0,0.0,0.025675,0.039879,0.934446,0.510197,0.019301,0.470503
BJ-RAS,15,2,1.0,0.0,0.028325,0.042716,0.928959,0.504703,0.021287,0.47401
BJ-RAS,30,1,1.0,0.0,0.027776,0.043632,0.928592,0.252247,0.033658,0.714094


In [21]:
met_heavy_fractions = pd.DataFrame(
    {
        metabolite: estimate_heavy_fraction_pd(
            met_mids_to_use[metabolite],
            n_carbons=n_carbons[metabolite],
            n_tracer_carbons=met_n_tracer_carbons[metabolite],
            tracer_purity=met_tracer_purity
        )
        for metabolite in met_metabolites_to_use
    },
    index=met_mids_to_use.index
)
met_heavy_fractions.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,cyst,met,sam
cell_type,time_minutes,replicate_nr,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
BJ-RAS,5,1,-4.730827e-07,0.947336,0.128583
BJ-RAS,5,2,-4.730827e-07,0.958151,0.154254
BJ-RAS,15,1,-4.730827e-07,0.972891,0.475179
BJ-RAS,15,2,-4.730827e-07,0.97001,0.479771
BJ-RAS,30,1,-4.730827e-07,0.970564,0.735455


In [22]:
met_heavy_fractions.to_csv(heavy_fractions_path / '13C-met_heavy_fractions.csv')

### D4-homocysteine

In [23]:
hcys_metabolites_to_use = ['cyst', 'hcys', 'met', 'sah', 'sam']
hcys_mids_to_use = hcys_mids.loc['Cells'].xs(0, level='with_standards')[hcys_metabolites_to_use]
hcys_mids_to_use.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,metabolite,cyst,cyst,hcys,hcys,met,met,sah,sah,sam,sam
Unnamed: 0_level_1,Unnamed: 1_level_1,mi,0,4,0,4,0,4,0,4,0,4
cell_type,time_minutes,replicate_nr,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
BJ-RAS,5,1,1.0,0.0,0.025623,0.974377,0.834521,0.165479,0.906442,0.093558,0.938673,0.061327
BJ-RAS,5,2,1.0,0.0,0.022464,0.977536,0.80732,0.19268,0.898653,0.101347,0.939687,0.060313
BJ-RAS,15,1,1.0,0.0,0.02217,0.97783,0.83522,0.16478,0.850411,0.149589,0.893253,0.106747
BJ-RAS,15,2,1.0,0.0,0.015744,0.984256,0.812176,0.187824,0.828471,0.171529,0.878791,0.121209
BJ-RAS,30,1,0.966616,0.033384,0.026149,0.973851,0.86272,0.13728,0.769718,0.230282,0.800938,0.199062


In [24]:
hcys_heavy_fractions = pd.DataFrame(
    {
        metabolite: estimate_heavy_fraction_pd(
            hcys_mids_to_use[metabolite],
            n_carbons=n_carbons[metabolite],
            n_tracer_carbons=hcys_n_tracer_carbons[metabolite],
            tracer_purity=hcys_tracer_purity
        )
        for metabolite in hcys_metabolites_to_use
    },
    index=hcys_mids_to_use.index
)
hcys_heavy_fractions

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,cyst,hcys,met,sah,sam
cell_type,time_minutes,replicate_nr,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
BJ-RAS,5,1,-4.593566e-07,0.973315,0.159794,0.090056,0.058944
BJ-RAS,5,2,-4.593566e-07,0.976601,0.18627,0.097586,0.057966
BJ-RAS,15,1,-4.593566e-07,0.976907,0.159115,0.144331,0.102804
BJ-RAS,15,2,-4.593566e-07,0.983596,0.181539,0.165651,0.116804
BJ-RAS,30,1,0.03206148,0.972768,0.132411,0.222937,0.192457
BJ-RAS,30,2,0.01740887,0.976848,0.156405,0.29122,0.213659
BJ-RAS,60,1,0.05102135,0.964485,0.230089,0.338568,0.320273
BJ-RAS,60,2,0.05314853,0.968196,0.198892,0.386517,0.31693
BJ-RAS,300,1,0.4291194,0.963461,0.403578,0.558061,0.520881
BJ-RAS,300,2,0.4404467,0.9703,0.501229,0.531719,0.53329


In [25]:
hcys_heavy_fractions.to_csv(heavy_fractions_path / 'D4-hcys_heavy_fractions.csv')

## Estimate concentrations

Here we use the isotope dilution method to estimate concentration. As the natural MI fraction is virtually zero, the concentration estimate is
$$c = c_\text{std} * \frac{x_\text{mix}}{x_\text{sample} - x_\text{mix}} $$
We here use the effective standard concentration in cell extracts and media, respectively. To obtain cytosolic concentrations from cell extract values, we must multiply by the dilution factor.

In [26]:
# pd.set_eng_float_format(accuracy=2)
pd.set_option('display.float_format', '{:.3E}'.format)

In [27]:
cell_dilution = pd.DataFrame(
    {
        'cell_type': ['BJ-TERT', 'BJ-RAS'],
        'cell_volume': [2.03e-12, 2.54e-12],
        'cell_number': [380_000, 314_000]
    },
).set_index('cell_type')
cell_dilution['total_cell_volume'] = cell_dilution['cell_volume'] * cell_dilution['cell_number']

extract_volume = 1e-3
cell_dilution['dilution_factor'] = extract_volume / cell_dilution['total_cell_volume']
cell_dilution

Unnamed: 0_level_0,cell_volume,cell_number,total_cell_volume,dilution_factor
cell_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BJ-TERT,2.03e-12,380000,7.714e-07,1296.0
BJ-RAS,2.54e-12,314000,7.976e-07,1254.0


### U-13C methionine

In [28]:
# known standard concentrations
met_std_concentrations = pd.read_csv(
    input_data_path / 'U13C-met_standard_concentrations.csv',
    sep=';', index_col=[0,1]
)
met_std_concentrations

Unnamed: 0_level_0,Unnamed: 1_level_0,concentration_m
matrix,metabolite,Unnamed: 2_level_1
Cells,hcys,9e-09
Cells,met,3.6e-07
Cells,cyst,9e-08
Cells,sam,9e-09
Cells,sah,9e-09
Medium,met,0.0001
Medium,hcys,1e-05


In [29]:
# select samples that have internal standards added
met_samples_std = met_mids.xs(1, level='with_standards', drop_level=False).index.remove_unused_levels()
met_samples_no_std = met_samples_std.set_levels([0], level='with_standards')

met_mids_for_estimation = pd.concat(
    [
        met_mids.loc[met_samples_std],
        met_mids.loc[met_samples_no_std]
    ]
).droplevel('time_minutes').sort_index()
met_mids_for_estimation

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,metabolite,hcys,hcys,cyst,cyst,cys,met,met,met,sam,sam,sam,sah,sah
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mi,0,4,0,4,0,0,4,5,0,4,5,0,4
matrix,cell_type,with_standards,replicate_nr,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
Cells,BJ-RAS,0,1,,,0.1949,0.8051,1.0,0.02342,0.04568,0.9309,0.02526,0.05264,0.9221,0.0,1.0
Cells,BJ-RAS,0,2,,,0.2449,0.7551,1.0,0.02133,0.04189,0.9368,0.024,0.05385,0.9221,0.0,1.0
Cells,BJ-RAS,1,1,1.0,0.0,0.9548,0.0452,1.0,0.525,0.02325,0.4518,0.1076,0.05782,0.8346,0.8632,0.1368
Cells,BJ-RAS,1,2,1.0,0.0,0.9561,0.04386,1.0,0.4891,0.02167,0.4893,0.1024,0.04864,0.849,0.8585,0.1415
Cells,BJ-TERT,0,1,,,0.2627,0.7373,1.0,0.02364,0.04494,0.9314,0.02506,0.05234,0.9226,0.0,1.0
Cells,BJ-TERT,0,2,,,0.2502,0.7498,1.0,0.02626,0.04464,0.9291,0.02284,0.04511,0.932,0.0,1.0
Cells,BJ-TERT,1,1,1.0,0.0,0.9259,0.07411,1.0,0.4439,0.02527,0.5308,0.1693,0.0502,0.7805,0.849,0.151
Cells,BJ-TERT,1,2,1.0,0.0,0.9192,0.08079,1.0,0.4474,0.02605,0.5265,0.1854,0.04299,0.7716,0.8438,0.1562
Medium,BJ-RAS,0,1,0.1012,0.8988,1.0,0.0,1.0,0.02031,0.04363,0.9361,0.0,0.0,1.0,,
Medium,BJ-RAS,0,2,0.1312,0.8688,1.0,0.0,1.0,0.02083,0.04347,0.9357,0.0,0.0,1.0,,


In [30]:
# reshape and take highest MI only,
# add standard concentration column
met_conc_estimation = met_mids_for_estimation\
    .T.groupby(level='metabolite').tail(1).T\
    .melt(ignore_index=False)\
    .reset_index().set_index(['metabolite', 'matrix', 'cell_type', 'replicate_nr'])\
    .pivot(columns='with_standards', values='value')\
    .rename({0: 'sample', 1: 'mix'}, axis=1)\
    .join(
        met_std_concentrations.rename({'concentration_m': 'std_conc_m'}, axis=1),
        how='inner'
    )
# estimate concentrations
met_conc_estimation['est_conc_m'] = (
    met_conc_estimation['std_conc_m'] * met_conc_estimation['mix'] / 
    (met_conc_estimation['sample'] - met_conc_estimation['mix'])
)

Medium

In [31]:
met_conc_estimation.xs('Medium', level='matrix')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,sample,mix,std_conc_m,est_conc_m
metabolite,cell_type,replicate_nr,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
hcys,BJ-RAS,1,0.8988,0.1228,1e-05,1.582e-06
hcys,BJ-RAS,2,0.8688,0.1227,1e-05,1.644e-06
hcys,BJ-TERT,2,0.7965,0.1174,1e-05,1.729e-06
hcys,none,1,1.0,0.03347,1e-05,3.463e-07
met,BJ-RAS,1,0.9361,0.4876,0.0001,0.0001087
met,BJ-RAS,2,0.9357,0.4907,0.0001,0.0001103
met,BJ-TERT,2,0.937,0.5149,0.0001,0.000122
met,none,1,0.9602,0.5203,0.0001,0.0001183


Cells

In [32]:
met_cell_concentration = met_conc_estimation.xs('Cells', level='matrix')\
    .join(cell_dilution['dilution_factor'])
met_cell_concentration['cell_conc'] = met_cell_concentration['est_conc_m'] * met_cell_concentration['dilution_factor']

met_cell_concentration_averaged = met_cell_concentration\
    .groupby(['metabolite', 'cell_type'])\
    .agg({'cell_conc': ['mean', 'std']})
met_cell_concentration_averaged

Unnamed: 0_level_0,Unnamed: 1_level_0,cell_conc,cell_conc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std
metabolite,cell_type,Unnamed: 2_level_2,Unnamed: 3_level_2
cyst,BJ-RAS,6.835e-06,1.744e-07
cyst,BJ-TERT,1.356e-05,7.452e-07
hcys,BJ-RAS,,
hcys,BJ-TERT,,
met,BJ-RAS,0.0004596,4.799e-05
met,BJ-TERT,0.0006144,5.606e-06
sah,BJ-RAS,1.824e-06,5.06e-08
sah,BJ-TERT,2.118e-06,6.069e-08
sam,BJ-RAS,0.0001192,1.647e-05
sam,BJ-TERT,6.01e-05,5.649e-06


SAM / SAH ratio

In [33]:
met_sam_sah = met_cell_concentration.loc['sam'][['cell_conc']] / met_cell_concentration.loc['sah'][['cell_conc']]
met_sam_sah.groupby(['cell_type', 'cell_type']).agg({'cell_conc': ['mean', 'std']})

Unnamed: 0_level_0,Unnamed: 1_level_0,cell_conc,cell_conc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std
cell_type,cell_type,Unnamed: 2_level_2,Unnamed: 3_level_2
BJ-RAS,BJ-RAS,65.27,7.221
BJ-TERT,BJ-TERT,28.43,3.482


### D4-homocysteine
Here the internal standard used is L-homocysteine, while cells were cultured in DL-D4-homocysteine. Because D- and L-homocysteine are indistinguishable by LCMS, the observed concentrations reflect the total of D- and L-homocysteine in samples. In medium, we should have about 50:50 D- and L-forms. D-hcys may be taken up by cells (?) but is likely not metabolized further.  

In [34]:
# known standard concentrations
hcys_std_concentrations = pd.read_csv(
    input_data_path / 'D4-hcys_standard_concentrations.csv',
    sep=';', index_col=[0,1]
)
hcys_std_concentrations

Unnamed: 0_level_0,Unnamed: 1_level_0,concentration_m
matrix,metabolite,Unnamed: 2_level_1
Cells,hcys,9e-08
Cells,met,9e-09
Cells,sam,9e-09
Cells,sah,9e-09
Cells,cyst,9e-08
Medium,hcys,5e-05
Medium,met,1.25e-05


In [35]:
# select samples that have internal standards added
hcys_samples_std = hcys_mids.xs(1, level='with_standards', drop_level=False).index.remove_unused_levels()
hycs_samples_no_std = hcys_samples_std.set_levels([0], level='with_standards')

hcys_mids_for_estimation = pd.concat(
    [
        hcys_mids.loc[hcys_samples_std],
        hcys_mids.loc[hycs_samples_no_std]
    ]
).droplevel('time_minutes').sort_index()
hcys_mids_for_estimation

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,metabolite,hcys,hcys,cyst,cyst,cys,met,met,sam,sam,sah,sah
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mi,0,4,0,4,0,0,4,0,4,0,4
matrix,cell_type,with_standards,replicate_nr,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
Cells,BJ-RAS,0,1,0.0,1.0,0.1638,0.8362,1.0,0.3906,0.6094,0.3203,0.6797,0.2736,0.7264
Cells,BJ-RAS,0,2,0.0,1.0,0.1689,0.8311,1.0,0.4032,0.5968,0.3488,0.6512,0.2691,0.7309
Cells,BJ-RAS,1,1,0.3485,0.6515,0.8838,0.1162,1.0,0.8926,0.1074,0.5349,0.4651,0.72,0.28
Cells,BJ-RAS,1,2,0.3623,0.6377,0.8764,0.1236,1.0,0.901,0.09898,0.5144,0.4856,0.7247,0.2753
Cells,BJ-TERT,0,1,0.0,1.0,0.09788,0.9021,1.0,0.6293,0.3707,0.4846,0.5154,0.3475,0.6525
Cells,BJ-TERT,0,2,0.0,1.0,0.09899,0.901,1.0,0.6304,0.3696,0.4696,0.5304,0.3557,0.6443
Cells,BJ-TERT,1,1,0.4868,0.5132,0.7806,0.2194,1.0,0.9258,0.07421,0.6755,0.3245,0.7937,0.2063
Cells,BJ-TERT,1,2,0.4718,0.5282,0.8128,0.1872,1.0,0.9354,0.06462,0.734,0.266,0.7993,0.2007
Medium,BJ-RAS,0,1,0.009007,0.991,0.7697,0.2303,1.0,0.3003,0.6997,1.0,0.0,,
Medium,BJ-RAS,0,2,0.008502,0.9915,0.739,0.261,1.0,0.2856,0.7144,1.0,0.0,,


In [36]:
# reshape and take highest MI only,
# add standard concentration column
hcys_conc_estimation = hcys_mids_for_estimation\
    .T.groupby(level='metabolite').tail(1).T\
    .melt(ignore_index=False)\
    .reset_index().set_index(['metabolite', 'matrix', 'cell_type', 'replicate_nr'])\
    .pivot(columns='with_standards', values='value')\
    .rename({0: 'sample', 1: 'mix'}, axis=1)\
    .join(
        hcys_std_concentrations.rename({'concentration_m': 'std_conc_m'}, axis=1),
        how='inner'
    )
# estimate concentrations
hcys_conc_estimation['est_conc_m'] = (
    hcys_conc_estimation['std_conc_m'] * hcys_conc_estimation['mix'] / 
    (hcys_conc_estimation['sample'] - hcys_conc_estimation['mix'])
)

Medium

In [37]:
hcys_conc_estimation.xs('Medium', level='matrix')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,sample,mix,std_conc_m,est_conc_m
metabolite,cell_type,replicate_nr,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
hcys,BJ-RAS,1,0.991,0.7927,5e-05,0.0001999
hcys,BJ-RAS,2,0.9915,0.7927,5e-05,0.0001993
hcys,BJ-TERT,1,0.9896,0.8028,5e-05,0.0002149
hcys,BJ-TERT,2,0.9907,0.781,5e-05,0.0001862
hcys,none,1,0.9985,0.8173,5e-05,0.0002255
met,BJ-RAS,1,0.6997,0.01316,1.25e-05,2.395e-07
met,BJ-RAS,2,0.7144,0.01276,1.25e-05,2.273e-07
met,BJ-TERT,1,0.584,0.01854,1.25e-05,4.097e-07
met,BJ-TERT,2,0.566,0.01299,1.25e-05,2.936e-07
met,none,1,0.1211,0.001896,1.25e-05,1.989e-07


Cells

In [38]:
hcys_cell_concentration = hcys_conc_estimation.xs('Cells', level='matrix')\
    .join(cell_dilution['dilution_factor'])
hcys_cell_concentration['cell_conc'] = hcys_cell_concentration['est_conc_m'] * hcys_cell_concentration['dilution_factor']
hcys_cell_concentration['cell_conc']

metabolite  cell_type  replicate_nr
cyst        BJ-RAS     1              1.820E-05
                       2              1.972E-05
            BJ-TERT    1              3.749E-05
                       2              3.059E-05
hcys        BJ-RAS     1              2.109E-04
                       2              1.986E-04
            BJ-TERT    1              1.230E-04
                       2              1.306E-04
met         BJ-RAS     1              2.413E-06
                       2              2.244E-06
            BJ-TERT    1              2.920E-06
                       2              2.472E-06
sah         BJ-RAS     1              7.078E-06
                       2              6.818E-06
            BJ-TERT    1              5.396E-06
                       2              5.278E-06
sam         BJ-RAS     1              2.446E-05
                       2              3.309E-05
            BJ-TERT    1              1.983E-05
                       2              1.174E-05
Name

## Uptake / release

** NOTE: move this to separate script **

In [39]:
def normalize_to_incubated(concentrations: pd.DataFrame) -> pd.DataFrame:
    est_conc = concentrations['est_conc_m']
    return concentrations.assign(
        diff_m = (est_conc - est_conc.xs('none', level='cell_type').mean())
    )

In [40]:
medium_volume = 2e-3

### Methionine

In [41]:
met_release = met_conc_estimation.xs('Medium', level='matrix')\
    .groupby(level='metabolite', group_keys=False)\
    .apply(normalize_to_incubated)
met_release['release_mol_h'] = met_release['diff_m'] * medium_volume / 24
met_release

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,sample,mix,std_conc_m,est_conc_m,diff_m,release_mol_h
metabolite,cell_type,replicate_nr,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
hcys,BJ-RAS,1,0.8988,0.1228,1e-05,1.582e-06,1.236e-06,1.03e-10
hcys,BJ-RAS,2,0.8688,0.1227,1e-05,1.644e-06,1.298e-06,1.081e-10
hcys,BJ-TERT,2,0.7965,0.1174,1e-05,1.729e-06,1.383e-06,1.153e-10
hcys,none,1,1.0,0.03347,1e-05,3.463e-07,0.0,0.0
met,BJ-RAS,1,0.9361,0.4876,0.0001,0.0001087,-9.557e-06,-7.965e-10
met,BJ-RAS,2,0.9357,0.4907,0.0001,0.0001103,-7.997e-06,-6.664e-10
met,BJ-TERT,2,0.937,0.5149,0.0001,0.000122,3.708e-06,3.09e-10
met,none,1,0.9602,0.5203,0.0001,0.0001183,0.0,0.0


In [42]:
# mean and std.dev, in pmol
met_release\
    .groupby(['metabolite', 'cell_type'])\
    .agg({'release_mol_h': ['mean','std']}) * 1e12

Unnamed: 0_level_0,Unnamed: 1_level_0,release_mol_h,release_mol_h
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std
metabolite,cell_type,Unnamed: 2_level_2,Unnamed: 3_level_2
hcys,BJ-RAS,105.6,3.64
hcys,BJ-TERT,115.3,
hcys,none,0.0,
met,BJ-RAS,-731.4,91.96
met,BJ-TERT,309.0,
met,none,0.0,


### Homocysteine

In [43]:
hcys_release = hcys_conc_estimation.xs('Medium', level='matrix')\
    .groupby(level='metabolite', group_keys=False)\
    .apply(normalize_to_incubated)
hcys_release['release_mol_h'] = hcys_release['diff_m'] * medium_volume / 24
hcys_release

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,sample,mix,std_conc_m,est_conc_m,diff_m,release_mol_h
metabolite,cell_type,replicate_nr,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
hcys,BJ-RAS,1,0.991,0.7927,5e-05,0.0001999,-2.561e-05,-2.134e-09
hcys,BJ-RAS,2,0.9915,0.7927,5e-05,0.0001993,-2.614e-05,-2.178e-09
hcys,BJ-TERT,1,0.9896,0.8028,5e-05,0.0002149,-1.054e-05,-8.78e-10
hcys,BJ-TERT,2,0.9907,0.781,5e-05,0.0001862,-3.922e-05,-3.269e-09
hcys,none,1,0.9985,0.8173,5e-05,0.0002255,0.0,0.0
met,BJ-RAS,1,0.6997,0.01316,1.25e-05,2.395e-07,4.062e-08,3.385e-12
met,BJ-RAS,2,0.7144,0.01276,1.25e-05,2.273e-07,2.837e-08,2.364e-12
met,BJ-TERT,1,0.584,0.01854,1.25e-05,4.097e-07,2.108e-07,1.757e-11
met,BJ-TERT,2,0.566,0.01299,1.25e-05,2.936e-07,9.471e-08,7.892e-12
met,none,1,0.1211,0.001896,1.25e-05,1.989e-07,0.0,0.0


In [44]:
# mean and std.dev, in pmol
hcys_release\
    .groupby(['metabolite', 'cell_type'])\
    .agg({'release_mol_h': ['mean','std']}) * 1e12

Unnamed: 0_level_0,Unnamed: 1_level_0,release_mol_h,release_mol_h
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std
metabolite,cell_type,Unnamed: 2_level_2,Unnamed: 3_level_2
hcys,BJ-RAS,-2156.0,31.26
hcys,BJ-TERT,-2073.0,1690.0
hcys,none,0.0,
met,BJ-RAS,2.875,0.7219
met,BJ-TERT,12.73,6.84
met,none,0.0,
