# Compute MIDs from peak areas

In [1]:
import pandas as pd
from pathlib import Path
from scipy.stats import binom

In [2]:
input_data_path = Path('00_input_data')
mid_data_path = Path('01_mid_data')

In [3]:
def import_peak_areas(file_name: str) -> pd.DataFrame:
    peak_areas = pd.read_csv(file_name, sep=';', header=[0,1], index_col=[0,1,2,3,4])\
        .fillna(0)
    # for some reason pd.read_csv discards the index level names
    peak_areas.index.set_names(
        ['matrix', 'cell_type', 'time_minutes', 'with_standards', 'replicate_nr'],
        inplace=True
    )
    peak_areas.columns.set_names(
        ['metabolite', 'mi'],
        inplace=True
    )
    return peak_areas.sort_index()

In [4]:
def normalize_to_mids(peak_areas: pd.DataFrame) -> pd.DataFrame:
    peak_areas_sum = peak_areas.T.groupby(level=0).sum().T
    return peak_areas.divide(peak_areas_sum, axis=0, level=0)

## Homocysteine

In [5]:
hcys_peak_areas = import_peak_areas(input_data_path / 'D4-hcys_peak_areas.csv')
hcys_peak_areas.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,metabolite,hcys,hcys,cyst,cyst,cys,met,met,sam,sam,sah,sah
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,mi,0,4,0,4,0,0,4,0,4,0,4
matrix,cell_type,time_minutes,with_standards,replicate_nr,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
Cells,BJ-RAS,5,0,1,179.0,6807,2980,0.0,3750,1639,325,7500.0,490.0,2800.0,289.0
Cells,BJ-RAS,5,0,2,130.0,5657,2998,0.0,3439,1919,458,6310.0,405.0,2802.0,316.0
Cells,BJ-RAS,15,0,1,132.0,5822,3002,0.0,3857,1992,393,8619.0,1030.0,2689.0,473.0
Cells,BJ-RAS,15,0,2,114.0,7127,3889,0.0,4083,1881,435,9505.0,1311.0,3294.0,682.0
Cells,BJ-RAS,30,0,1,181.0,6741,3185,110.0,5077,4022,640,10590.0,2632.0,1581.0,473.0


In [6]:
hcys_mids = normalize_to_mids(hcys_peak_areas)
hcys_mids.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,metabolite,hcys,hcys,cyst,cyst,cys,met,met,sam,sam,sah,sah
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,mi,0,4,0,4,0,0,4,0,4,0,4
matrix,cell_type,time_minutes,with_standards,replicate_nr,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
Cells,BJ-RAS,5,0,1,0.025623,0.974377,1.0,0.0,1.0,0.834521,0.165479,0.938673,0.061327,0.906442,0.093558
Cells,BJ-RAS,5,0,2,0.022464,0.977536,1.0,0.0,1.0,0.80732,0.19268,0.939687,0.060313,0.898653,0.101347
Cells,BJ-RAS,15,0,1,0.02217,0.97783,1.0,0.0,1.0,0.83522,0.16478,0.893253,0.106747,0.850411,0.149589
Cells,BJ-RAS,15,0,2,0.015744,0.984256,1.0,0.0,1.0,0.812176,0.187824,0.878791,0.121209,0.828471,0.171529
Cells,BJ-RAS,30,0,1,0.026149,0.973851,0.966616,0.033384,1.0,0.86272,0.13728,0.800938,0.199062,0.769718,0.230282


In [7]:
hcys_mids.to_csv(mid_data_path / 'D4-hcys_mids.csv')

## Methionine

In [8]:
met_peak_areas = import_peak_areas(input_data_path / 'U13C-met_peak_areas.csv')
met_peak_areas.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,metabolite,hcys,hcys,cyst,cyst,cys,met,met,met,sam,sam,sam,sah,sah
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,mi,0,4,0,4,0,0,4,5,0,4,5,0,4
matrix,cell_type,time_minutes,with_standards,replicate_nr,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2
Cells,BJ-RAS,5,0,1,0.0,0.0,367,0.0,2640.0,1022,781,18644,1395.0,0.0,210.0,69.0,0.0
Cells,BJ-RAS,5,0,2,0.0,0.0,474,0.0,1732.0,644,604,14954,2542.0,0.0,473.0,72.0,0.0
Cells,BJ-RAS,15,0,1,0.0,0.0,551,0.0,3956.0,714,1109,25986,1401.0,53.0,1292.0,0.0,27.0
Cells,BJ-RAS,15,0,2,0.0,0.0,561,0.0,2189.0,496,748,16267,2039.0,86.0,1915.0,0.0,31.0
Cells,BJ-RAS,30,0,1,0.0,0.0,528,0.0,3526.0,494,776,16515,1319.0,176.0,3734.0,0.0,32.0


In [9]:
met_mids = normalize_to_mids(met_peak_areas)
met_mids.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,metabolite,hcys,hcys,cyst,cyst,cys,met,met,met,sam,sam,sam,sah,sah
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,mi,0,4,0,4,0,0,4,5,0,4,5,0,4
matrix,cell_type,time_minutes,with_standards,replicate_nr,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2
Cells,BJ-RAS,5,0,1,,,1.0,0.0,1.0,0.049983,0.038196,0.911821,0.869159,0.0,0.130841,1.0,0.0
Cells,BJ-RAS,5,0,2,,,1.0,0.0,1.0,0.039748,0.037279,0.922972,0.843118,0.0,0.156882,1.0,0.0
Cells,BJ-RAS,15,0,1,,,1.0,0.0,1.0,0.025675,0.039879,0.934446,0.510197,0.019301,0.470503,0.0,1.0
Cells,BJ-RAS,15,0,2,,,1.0,0.0,1.0,0.028325,0.042716,0.928959,0.504703,0.021287,0.47401,0.0,1.0
Cells,BJ-RAS,30,0,1,,,1.0,0.0,1.0,0.027776,0.043632,0.928592,0.252247,0.033658,0.714094,0.0,1.0


In [10]:
met_mids.to_csv(mid_data_path / 'U13C-met_mids.csv')