This notebook can be used to calculate NA Corrected intensities as well as fractional enrichment for GCMS data incluing derivatized compounds. This example has C13 label:

 - GCMS_raw.csv - demo raw MS intensity file containing intensities for glucose derivatized as pentaacetate (C16H22O11) taken from Cline, Gary W. and Gerald I. Shulman, 1995
 
 The compound formula contains both the actual compound and derivatizing agent

In [9]:
import pandas as pd
import numpy as np
import re

from corna.inputs import maven_parser as parser
import corna.constants as const
from corna.helpers import get_isotope_na, replace_negatives_in_column, merge_multiple_dfs
from corna.autodetect_isotopes import get_element_correction_dict
from corna.algorithms import matrix_calc as algo
from corna.algorithms.nacorr_lcms import na_correction
from corna.postprocess import fractional_enrichment

Reading raw file and merging with sample metadata if present, in this example running without sample metadata

In [10]:
raw_df = pd.read_csv('GCMS_raw.csv')
sample_metadata = pd.DataFrame()

merged_df, iso_tracer_data, element_list = parser.read_maven_file(raw_df, sample_metadata)
merged_df

Unnamed: 0,Name,Label,Formula,Sample,Intensity,Unlabeled Fragment
0,Glucose,C12 PARENT,C16H22O11,sample1,0.571376,Glucose
1,Glucose,C13-label-1,C16H22O11,sample1,0.103652,Glucose
2,Glucose,C13-label-2,C16H22O11,sample1,0.272024,Glucose
3,Glucose,C13-label-3,C16H22O11,sample1,0.042745,Glucose
4,Glucose,C13-label-4,C16H22O11,sample1,0.008984,Glucose
5,Glucose,C13-label-5,C16H22O11,sample1,0.001073,Glucose
6,Glucose,C13-label-6,C16H22O11,sample1,0.000133,Glucose


Performing na_correction and inputs not relevant for this workflow are set as empty. Isotopes indistinguishable because of low resolution mass spec can be defined in the format {'Tracer': [List of Indistinguishable Isotopes]} 

In [11]:
na_corr_df, ele_corr_dict = na_correction(merged_df, iso_tracers=['C13'], ppm_input_user={}, eleme_corr={'C':['H','O']})

na_corr_df = replace_negatives_in_column(na_corr_df, const.NA_CORRECTED_WITH_ZERO, const.NA_CORRECTED_COL)
na_corr_df

Unnamed: 0,Name,Formula,Indistinguishable_isotope,Sample,NA Corrected,Intensity,Label,NA Corrected with zero
0,Glucose,C16H22O11,"{'C': ['H', 'O']}",sample1,0.7013959,0.571376,C12 PARENT,0.7013959
1,Glucose,C16H22O11,"{'C': ['H', 'O']}",sample1,-0.001801295,0.103652,C13-label-1,0.0
2,Glucose,C16H22O11,"{'C': ['H', 'O']}",sample1,0.3008141,0.272024,C13-label-2,0.3008141
3,Glucose,C16H22O11,"{'C': ['H', 'O']}",sample1,-0.0005815639,0.042745,C13-label-3,0.0
4,Glucose,C16H22O11,"{'C': ['H', 'O']}",sample1,0.0001696398,0.008984,C13-label-4,0.0001696398
5,Glucose,C16H22O11,"{'C': ['H', 'O']}",sample1,3.390483e-06,0.001073,C13-label-5,3.390483e-06
6,Glucose,C16H22O11,"{'C': ['H', 'O']}",sample1,-2.179423e-07,0.000133,C13-label-6,0.0
7,Glucose,C16H22O11,"{'C': ['H', 'O']}",sample1,-1.441508e-05,0.0,C13-label-7,0.0
8,Glucose,C16H22O11,"{'C': ['H', 'O']}",sample1,1.958036e-07,0.0,C13-label-8,1.958036e-07
9,Glucose,C16H22O11,"{'C': ['H', 'O']}",sample1,2.606535e-07,0.0,C13-label-9,2.606535e-07


Calculating fractional enrichments, merging all data a into file and saving as 'GCMS_corrected.csv'

In [12]:
frac_enr_df = fractional_enrichment(na_corr_df)
frac_enr_df

Unnamed: 0,Sample,Name,Label,Formula,Pool_total,Fractional enrichment
0,sample1,Glucose,C12 PARENT,C16H22O11,1.002384,0.6997281
1,sample1,Glucose,C13-label-1,C16H22O11,1.002384,0.0
2,sample1,Glucose,C13-label-2,C16H22O11,1.002384,0.3000988
3,sample1,Glucose,C13-label-3,C16H22O11,1.002384,0.0
4,sample1,Glucose,C13-label-4,C16H22O11,1.002384,0.0001692364
5,sample1,Glucose,C13-label-5,C16H22O11,1.002384,3.382421e-06
6,sample1,Glucose,C13-label-6,C16H22O11,1.002384,0.0
7,sample1,Glucose,C13-label-7,C16H22O11,1.002384,0.0
8,sample1,Glucose,C13-label-8,C16H22O11,1.002384,1.95338e-07
9,sample1,Glucose,C13-label-9,C16H22O11,1.002384,2.600337e-07


In [13]:
output_df = merge_multiple_dfs([merged_df, na_corr_df, frac_enr_df])
output_df

Unnamed: 0,Name,Label,Formula,Sample,Intensity_x,Unlabeled Fragment,Indistinguishable_isotope,NA Corrected,Intensity_y,NA Corrected with zero,Pool_total,Fractional enrichment
0,Glucose,C12 PARENT,C16H22O11,sample1,0.571376,Glucose,"{'C': ['H', 'O']}",0.7013959,0.571376,0.701396,1.002384,0.699728
1,Glucose,C13-label-1,C16H22O11,sample1,0.103652,Glucose,"{'C': ['H', 'O']}",-0.001801295,0.103652,0.0,1.002384,0.0
2,Glucose,C13-label-2,C16H22O11,sample1,0.272024,Glucose,"{'C': ['H', 'O']}",0.3008141,0.272024,0.300814,1.002384,0.300099
3,Glucose,C13-label-3,C16H22O11,sample1,0.042745,Glucose,"{'C': ['H', 'O']}",-0.0005815639,0.042745,0.0,1.002384,0.0
4,Glucose,C13-label-4,C16H22O11,sample1,0.008984,Glucose,"{'C': ['H', 'O']}",0.0001696398,0.008984,0.00017,1.002384,0.000169
5,Glucose,C13-label-5,C16H22O11,sample1,0.001073,Glucose,"{'C': ['H', 'O']}",3.390483e-06,0.001073,3e-06,1.002384,3e-06
6,Glucose,C13-label-6,C16H22O11,sample1,0.000133,Glucose,"{'C': ['H', 'O']}",-2.179423e-07,0.000133,0.0,1.002384,0.0


In [14]:
output_df.to_csv('GCMS_corrected.csv')