In [1]:
import pandas as pd
import tellurium as te
import numpy as np

In [5]:
pd.read_csv("metabolites_particle.tsv", sep="\t", index_col=0)

Unnamed: 0_level_0,uRBC,Std.,Parasite,Std..1
metabolite,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Choline_vo,7710000.0,1200000.0,31400000,1950000.0
Choline_phosphate,2750000.0,722000.0,186000000,9830000.0
CDP_choline,126000.0,126.0,12100000,886000.0
CDP_ethanolamine,72300.0,18100.0,19300000,1680000.0
Ethanolamine,12600000.0,2480000.0,5060000,632000.0
Ethanolamine_phosphate,452000.0,108000.0,162000000,15200000.0
Serine_vo,24400000.0,5070000.0,20800000,2140000.0
Myo_inositol_vo,14800000.0,4940000.0,18200000,3360000.0
Myo_inositol_1_phosphate_vo,137000000.0,13600000.0,49500000,11000000.0
Serine_t14,8910000.0,2350000.0,9210000,2410000.0


In [9]:
df = pd.read_excel("metabolomics_test_data.ods",sheet_name=2,header=1)
df.to_csv("metabolites_particle.tsv", sep="\t", index=False)
df = df.set_index('metabolite')
df

Unnamed: 0_level_0,uRBC,Std.,Parasite,Std..1
metabolite,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Choline_vo,7710000.0,1200000.0,31400000,1950000.0
Choline_phosphate,2750000.0,722000.0,186000000,9830000.0
CDP_choline,126000.0,126.0,12100000,886000.0
CDP_ethanolamine,72300.0,18100.0,19300000,1680000.0
Ethanolamine,12600000.0,2480000.0,5060000,632000.0
Ethanolamine_phosphate,452000.0,108000.0,162000000,15200000.0
Serine_vo,24400000.0,5070000.0,20800000,2140000.0
Myo_inositol_vo,14800000.0,4940000.0,18200000,3360000.0
Myo_inositol_1_phosphate_vo,137000000.0,13600000.0,49500000,11000000.0
Serine_t14,8910000.0,2350000.0,9210000,2410000.0


In [10]:
#
avo = 6.02214076e23
fac = [avo*80e-15, avo*26.7e-15]
stage_dict = {'u_time':['uRBC','Std.'],
              'troph_time': ['Parasite','Std..1']
             }
df_M = df.copy()
for row in df.index:
    if row.startswith('%'):
        continue
    df_M.loc[row, stage_dict['u_time']] = df.loc[row, stage_dict['u_time']].apply(lambda x: x/fac[0]*1e6)
    df_M.loc[row, stage_dict['troph_time']] = df.loc[row, stage_dict['troph_time']].apply(lambda x: x/fac[1]*1e6)
df_M.to_csv("metabolites_muMolar.tsv", sep="\t")

In [3]:
df.loc[df["metabolite"].isin([x for x in df.metabolite if "Serine" in x])].mean()

uRBC        1.665500e+07
Std.        3.710000e+06
Parasite    1.813667e+07
Std..1      5.850000e+06
dtype: float64

In [4]:
df.loc[df["metabolite"].isin([x for x in df.metabolite if "Choline_" in x])]

Unnamed: 0,metabolite,uRBC,Std.,Parasite,Std..1
0,Choline_vo,7710000.0,1200000.0,31400000,1950000.0
1,Choline_phosphate,2750000.0,722000.0,186000000,9830000.0
11,Choline_t14,1810000.0,602000.0,20000000,6930000.0


In [5]:
df.loc[df["metabolite"].isin([x for x in df.metabolite if "PC_" in x])]

Unnamed: 0,metabolite,uRBC,Std.,Parasite,Std..1
13,PC_t14,,,132000000,30700000.0
17,PC_t09,,,22500000,5410000.0


In [6]:
df.loc[df["metabolite"].isin([x for x in df.metabolite if "PE_" in x])]

Unnamed: 0,metabolite,uRBC,Std.,Parasite,Std..1
14,PE_t14,,,23300000,6810000.0
18,PE_t09,,,62600000,18300000.0


In [12]:
model = te.loada("/home/nexia/PhD/malaria_lipid_model/Lipid-Model/Model/model_files/PLModel.atm")

In [41]:
1e-18*6.02214076e23

602214.076

In [22]:
25700000/8.061017e+06

3.188183327240223

In [27]:
alex_factor = 602214.076

df = pd.read_excel("metabolomics_test_data.ods",sheet_name=2,header=1)

u_time = 2 * 3600
ring_time = 8 * 3600
troph_time = 36 * 3600
schiz_time = 48 * 3600

long_name_to_maier = {
    'Choline': 'avg Cho',
    'L_Serine': 'avg Ser',
    'Phosphatidylethanolamine': 'avg PE',
    'Phosphatidylcholine': 'avg PC'}

stage_dict = {u_time:['uRBC'],
              troph_time:['Parasite']}

intersection = list(set(df.metabolite) & set(model.getFloatingSpeciesIds()))

work_df = df.loc[df["metabolite"].isin(intersection + [long_name_to_maier[a] 
                                                       for a in model.getFloatingSpeciesIds() 
                                                       if a in long_name_to_maier])].copy()
weight_df = work_df.copy()
work_df[u_time] = work_df.loc[:, stage_dict[u_time]] / alex_factor
weight_df['weight_uRBC'] = work_df.loc[:, stage_dict[troph_time]].mean(axis=1) / \
                      work_df.loc[:, 'Std.']

work_df[ring_time] = np.nan

work_df[troph_time] = work_df.loc[:, stage_dict[troph_time]] / alex_factor
weight_df['weight_troph'] = work_df.loc[:, stage_dict[troph_time]].mean(axis=1) / \
                      work_df.loc[:, 'Std..1']

work_df[schiz_time] = np.nan

print(weight_df)

datadf = work_df[[u_time, ring_time, troph_time, schiz_time]] #* [4.00**-1 * 1e-3,
                                                      #  26.7**-1 * 1e-3,
                                                      #   26.9**-1 * 1e-3]
datadf = datadf.T


datadf.columns = intersection +[a for a in model.getFloatingSpeciesIds() if a in long_name_to_maier]
print(datadf)
test_dict = datadf.to_dict('list')

test_dict

                metabolite        uRBC          Std.   Parasite        Std..1  \
1        Choline_phosphate   2750000.0  7.220000e+05  186000000  9.830000e+06   
2              CDP_choline    126000.0  1.260000e+02   12100000  8.860000e+05   
3         CDP_ethanolamine     72300.0  1.810000e+04   19300000  1.680000e+06   
4             Ethanolamine  12600000.0  2.480000e+06    5060000  6.320000e+05   
5   Ethanolamine_phosphate    452000.0  1.080000e+05  162000000  1.520000e+07   
19                 avg Cho   4760000.0  4.171930e+06   25700000  8.061017e+06   
20                 avg Ser  16655000.0  1.095308e+07   15005000  8.195368e+06   
21                  avg PC         NaN           NaN   77250000  7.742819e+07   
22                  avg PE         NaN           NaN   97300000  2.778930e+07   

     weight_uRBC  weight_troph  
1     257.617729     18.921668  
2   96031.746032     13.656885  
3    1066.298343     11.488095  
4       2.040323      8.006329  
5    1500.000000     10.

{'Choline_phosphate': [4.566482434728078, nan, 308.86026649433546, nan],
 'Ethanolamine_phosphate': [0.20922792246390468, nan, 20.092522712803543, nan],
 'CDP_choline': [0.12005697455666911, nan, 32.04840399645524, nan],
 'CDP_ethanolamine': [20.92279224639047, nan, 8.402327679899663, nan],
 'Ethanolamine': [0.7505636583625787, nan, 269.00732888216316, nan],
 'Choline': [7.90416595974751, nan, 42.67585402636786, nan],
 'Phosphatidylcholine': [27.656278163780417, nan, 24.916388702943568, nan],
 'Phosphatidylethanolamine': [nan, nan, 128.27664293917965, nan],
 'L_Serine': [nan, nan, 161.5704512360153, nan]}

In [40]:
for values in test_dict.values():
    for value in values:
        if np.isnan(value):
            continue
        else:
            print(value)

2750000.0
186000000.0
126000.0
12100000.0
72300.0
19300000.0
12600000.0
5060000.0
452000.0
162000000.0
4760000.0
25700000.0
16655000.0
15005000.0
77250000.0
97300000.0


In [79]:
maierframe = pd.read_excel('~/PhD/malaria_lipid_model/Lipid-Model/Datasets/RBC and asexual Pf lipidome.xlsx',
                               header=1, index_col=0)  # maier dataset


In [80]:
maierframe = maierframe.dropna()
maierframe_particle = maierframe.copy()

In [11]:
particle_fac = 6.02E+05
for row in maierframe.index:
    if row.startswith('%'):
        continue
    maierframe_particle.loc[row] = maierframe.loc[row].apply(lambda x: x*particle_fac)

#maierframe_particle.to_csv('maierframe_particle.tsv', sep="\t")
maierframe_particle["Trophozoite 1"]

NameError: name 'maierframe_particle' is not defined

In [48]:
u_time = 2 * 3600
ring_time = 8 * 3600
troph_time = 36 * 3600
schiz_time = 48 * 3600
maierframe = pd.read_excel('~/PhD/malaria_lipid_model/Lipid-Model/Datasets/RBC and asexual Pf lipidome.xlsx',
                               header=1, index_col=0)  # maier dataset

long_name_to_maier = {
    '1,2-Diacyl-sn-glycerol': 'DG',
    'DAG': 'DG',
    'Phosphatidylserine_mem': 'PS',
    'Phosphatidylethanolamine_mem': 'PE',
    'Phosphatidylcholine_mem': 'PC'}

stage_dict = {u_time:['RBC1','RBC2','RBC3'],
              ring_time: ['Ring 1', 'Ring 2', 'Ring 3'],
              troph_time: ['Trophozoite 1', 'Trophozoite 2', 'Trophozoite 3'],
              schiz_time: ['Schizont 1', 'Schizont 2', 'Schizont 3']}

# now we make datadf
work_df = maierframe
work_df = work_df.dropna()
new_dict={}
for row in work_df.index:
    try:
        model_name = maier_to_long_name[row]
    except KeyError:
        model_name = row
    new_dict[model_name] = {'mean': [], 'std': []}
    for key in stage_dict.keys():
        new_dict[model_name]['mean'].append(work_df.loc[row, stage_dict[key]].mean())
        new_dict[model_name]['std'].append(work_df.loc[row, stage_dict[key]].std())
        
df_new = pd.DataFrame.from_dict(new_dict)
df_new.T

Unnamed: 0,mean,std
CE 16:0,"[7.696666666666666, 4.6499999999999995, 7.9066...","[0.7221034090304059, 1.8595967304767986, 5.455..."
CE 16:1,"[3.1033333333333335, 1.0633333333333332, 1.51,...","[1.5588564184469762, 1.8417473587149062, 2.615..."
CE 18:1,"[11.016666666666666, 4.0633333333333335, 5.696...","[2.9938993525723827, 2.418043286075197, 9.8669..."
CE 18:2,"[36.31333333333333, 11.713333333333333, 17.52,...","[14.92117399313249, 8.774197019290902, 30.3455..."
CE 18:3,"[5.7, 1.4166666666666667, 2.856666666666667, 0.0]","[2.8416896382258217, 1.6378135832057727, 4.947..."
...,...,...
% TAG,"[8.526898945401134, 20.35771495398677, 17.4852...","[14.76902220444024, 17.992921377441334, 27.080..."
Sphingolipids (total),"[149.28666666666666, 108.91000000000001, 170.2...","[42.94662540099436, 7.675004885991932, 37.2902..."
% Cer,"[2.647519540535303, 3.5900732292884, 3.7982970...","[0.538204406552684, 0.3667333764090664, 0.5274..."
% DHSM,"[0.9397815257601959, 0.7490037885297003, 1.505...","[0.1452913039323298, 0.24286512585895992, 0.47..."


In [15]:
u_time = 2 * 3600
ring_time = 8 * 3600
troph_time = 36 * 3600
schiz_time = 48 * 3600
maierframe = pd.read_excel('~/PhD/malaria_lipid_model/Lipid-Model/Datasets/RBC and asexual Pf lipidome.xlsx',
                               header=1, index_col=0)  # maier dataset

long_name_to_maier = {
    '1,2-Diacyl-sn-glycerol': 'DG',
    'DAG': 'DG',
    'Phosphatidylserine_mem': 'PS',
    'Phosphatidylethanolamine_mem': 'PE',
    'Phosphatidylcholine_mem': 'PC'}

stage_dict = {u_time:['RBC1','RBC2','RBC3'],
              ring_time: ['Ring 1', 'Ring 2', 'Ring 3'],
              troph_time: ['Trophozoite 1', 'Trophozoite 2', 'Trophozoite 3'],
              schiz_time: ['Schizont 1', 'Schizont 2', 'Schizont 3']}
# make avstroph_time = 36 * 3600
schiz_time = 48 * 3600
# todo: clever weighting and such
work_df = maierframe
work_df = work_df.dropna()
for row in work_df.index:
    # value * nmol/10^9 cells -> value * 10^-18mol / V -> Volume has fL= 10^-15 -> value mol/ 10^-3*V
    if row.startswith('%'):
        continue
    #RBC
    work_df.loc[row,stage_dict[u_time]] = work_df.loc[row, stage_dict[u_time]].apply(lambda x: x*80.00**-1 * 1e-3 *1e6)
    #small parasite
    work_df.loc[row,stage_dict[ring_time]] = work_df.loc[row, stage_dict[ring_time]].apply(lambda x: x*4.00**-1 * 1e-3*1e6)
    work_df.loc[row,stage_dict[troph_time]] = work_df.loc[row, stage_dict[troph_time]].apply(lambda x: x*26.7**-1 * 1e-3*1e6)
    work_df.loc[row,stage_dict[schiz_time]] = work_df.loc[row, stage_dict[schiz_time]].apply(lambda x: x*26.9**-1 * 1e-3*1e6)

# extract the avs and give proper names
#datadf = work_df[[ring_time, troph_time, schiz_time]] * [4.00**-1 * 1e-3, 26.7**-1 * 1e-3, 26.9**-1 * 1e-3]
#datadf = datadf.T

maier_to_long_name = {v: k for k, v in long_name_to_maier.items()}
#datadf.rename(columns=maier_to_long_name, inplace=True)
#test_dict = datadf.to_dict('list')

#work_df.to_csv('maierframe_muMolar.tsv', sep='\t')
work_df.loc['PC', 'Trophozoite 1']



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



25268.41448189764