### Map DFG2 foods to WWEIA ingredient codes

In [1]:
import pandas as pd

In [2]:
#load data for the four DFG2 glycan analyses and wweia map
poly = pd.read_csv('../../data/glycan/polysaccharide_v2.csv')
free = pd.read_csv('../../data/glycan/free_saccharide_v2.csv')
free.drop(columns={'simple_name','food_group'},inplace=True)
mono = pd.read_csv('../../data/glycan/monosaccharide_v2.csv')
mono.drop(columns={'simple_name','food_group'},inplace=True)
linkage = pd.read_csv('../../data/glycan/glycosidic_linkage_v2.csv')
linkage.drop(columns={'simple_name','food_group'},inplace=True)
wweia_map = pd.read_csv('../../data/wweia/dfg2_wweia_map.csv')

In [3]:
free.set_index('sample_id', inplace=True)
free.columns = ['Free_' + str(col) for col in free.columns]
free.reset_index(inplace=True)

In [4]:
mono.set_index('sample_id', inplace=True)
mono.columns = ['Mono_' + str(col) for col in mono.columns]
mono.reset_index(inplace=True)

In [5]:
poly_free = poly.merge(free, on='sample_id', how='left')
poly_free_mono = poly_free.merge(mono, on='sample_id', how='left')
dfg_all = poly_free_mono.merge(linkage, on='sample_id', how='left')

In [6]:
wweia_map = wweia_map[['sample_id', 'ingred_code']]
wweia_map.dropna(inplace=True)

In [7]:
dfg_wweia = wweia_map.merge(dfg_all, on='sample_id', how='left')

In [8]:
dfg_wweia.to_csv('../../data/wweia/dfg2_wweia_data.csv',index=None)

### calculate absolute abundance of linkage feature (rel. abun.) by multiplying by monosaccharide values

In [9]:
linkage.iloc[:,1:10] = linkage.iloc[:,1:10].mul(mono['Mono_Glucose'], axis = 0, level = 0) #glucose

In [10]:
linkage.iloc[:,10:17] = linkage.iloc[:,10:17].mul(mono['Mono_Galactose'], axis = 0, level = 0) #galactose

In [11]:
linkage.iloc[:,17:23] = linkage.iloc[:,17:23].mul(mono['Mono_Xylose'], axis = 0, level = 0) #xylose

In [12]:
linkage.iloc[:,23:29] = linkage.iloc[:,23:29].mul(mono['Mono_Arabinose'], axis = 0, level = 0) #arabinose

In [13]:
linkage.iloc[:,29] = linkage.iloc[:,29] * mono.Mono_Fucose #fucose

In [14]:
linkage.iloc[:,30:34] = linkage.iloc[:,30:34].mul(mono['Mono_Rhamnose'], axis = 0, level = 0) #rhammose

In [15]:
linkage.iloc[:,36:43] = linkage.iloc[:,36:43].mul(mono['Mono_Mannose'], axis = 0, level = 0)  #mannose

In [16]:
#linkage = linkage.drop(columns=['4-Glucosamine/GlcNac', '3-Glucosamine/GlcNac']) 

In [17]:
linkage = linkage.iloc[:,0:41]

In [18]:
linkage

Unnamed: 0,sample_id,t-Glucose,4-Glucose,6-Glucose,3-Glucose/3-Galactose,2-Glucose,"4,6-Glucose","3,4-Glucose","2,4-Glucose","3,4,6-Glucose",...,4-Rhamnose,2-Rhamnose,"2,4-Rhamnose",4-Glucosamine/GlcNac,3-Glucosamine/GlcNac,t-Mannose,4-Mannose,3-Mannose,2-Mannose,"4,6-Mannose"
0,19,0.000045,0.003310,0.000003,2.375596e-05,0.000003,0.000171,0.000017,0.000011,1.400641e-06,...,0.000000e+00,1.914719e-05,1.443669e-05,0.000004,0.0,0.000020,0.000358,2.476561e-06,2.224981e-07,0.000014
1,21,0.000049,0.001185,0.000002,3.497170e-06,0.000000,0.000055,0.000016,0.000003,6.703678e-07,...,4.493798e-07,1.364510e-05,1.464837e-05,0.000001,0.0,0.000019,0.000268,0.000000e+00,1.613594e-07,0.000013
2,34,0.000079,0.002567,0.000004,2.247522e-07,0.000000,0.000108,0.000006,0.000011,9.551982e-07,...,0.000000e+00,1.164307e-05,8.189895e-06,0.000031,0.0,0.000051,0.000630,1.463146e-06,7.682741e-07,0.000015
3,36,0.000053,0.003113,0.000002,7.296980e-06,0.000000,0.000138,0.000007,0.000012,1.246211e-06,...,3.724874e-07,8.690812e-06,6.833322e-06,0.000024,0.0,0.000053,0.000750,2.324241e-06,9.810748e-07,0.000016
4,43,0.000023,0.001613,0.000001,0.000000e+00,0.000000,0.000056,0.000002,0.000006,3.679802e-07,...,0.000000e+00,8.769913e-06,5.568012e-06,0.000026,0.0,0.000007,0.000070,7.891790e-07,1.553614e-07,0.000005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245,1094,0.921593,7.917593,0.047133,0.000000e+00,0.034619,0.078092,0.270812,0.043432,4.308517e-01,...,0.000000e+00,8.763476e-07,1.088370e-06,0.000013,0.0,0.001965,0.000417,3.663526e-03,2.532435e-05,0.000132
246,1095,2.431694,16.714382,0.171552,0.000000e+00,0.048331,0.092290,0.538481,0.075889,2.974606e-02,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000002,0.0,0.000974,0.000400,2.121528e-03,1.193512e-05,0.000011
247,1096,0.568716,7.526459,0.015832,0.000000e+00,0.019198,0.020770,0.371750,0.023432,4.848299e-03,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000051,0.0,0.003180,0.000361,8.490822e-03,1.179001e-04,0.001383
248,1097,0.115039,1.027135,0.012543,0.000000e+00,0.005326,0.003054,0.025932,0.004304,5.446661e-04,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000013,0.0,0.003280,0.000950,4.756223e-03,7.002019e-05,0.000244
