# Merge the diets

Here we will take the individual diets and convert it to a merged medium definition.

In [1]:
import pandas as pd
from micom.qiime_formats import load_qiime_medium

media = {
    "Hadza": "../data/baobab_honey_antelope.qza",
    "Me’Phaa": "../data/guerrero_mountains.qza",
    "Chepang": "../data/himalaya.qza"
}

diets = []
for group in media:
    d = load_qiime_medium(media[group])
    d["ethnic_group"] = group
    diets.append(d)
diets = pd.concat(diets)
diets.head()

Unnamed: 0_level_0,metabolite,flux,name,hmdb,kegg.compound,pubchem.compound,inchi,chebi,reaction,global_id,ethnic_group
reaction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
EX_ala_L_m,ala_L,0.053484,L-alanine,HMDB00161,C00041,5950.0,"InChI=1S/C3H7NO2/c1-2(4)3(5)6/h2H,4H2,1H3,(H,5...",,EX_ala_L_m,EX_ala_L(e),Hadza
EX_arg_L_m,arg_L,0.046745,L-argininium(1+),HMDB00517,C00062,6322.0,InChI=1S/C6H14N4O2/c7-4(5(11)12)2-1-3-10-6(8)9...,,EX_arg_L_m,EX_arg_L(e),Hadza
EX_asn_L_m,asn_L,0.077218,L-asparagine,HMDB00168,C00152,6267.0,"InChI=1S/C4H8N2O3/c5-2(4(8)9)1-3(6)7/h2H,1,5H2...",,EX_asn_L_m,EX_asn_L(e),Hadza
EX_asp_L_m,asp_L,0.060173,L-aspartate(1-),HMDB00191,C00049,5960.0,"InChI=1S/C4H7NO4/c5-2(4(8)9)1-3(6)7/h2H,1,5H2,...",,EX_asp_L_m,EX_asp_L(e),Hadza
EX_ca2_m,ca2,0.338817,calcium(2+),HMDB00464,C00076,,InChI=1S/Ca/q+2,,EX_ca2_m,EX_ca2(e),Hadza


Now we we will read the sample metadata and merge with the diets.

In [2]:
meta = pd.read_csv("../data/metadata.tsv", sep="\t")
merged = meta.merge(diets, on="ethnic_group")
merged.head()

Unnamed: 0,id,ethnic_group,lifestyle_food,country,reference,metabolite,flux,name,hmdb,kegg.compound,pubchem.compound,inchi,chebi,reaction,global_id
0,hadza1,Hadza,Hunter-gatherer,Tanzania,https://doi.org/10.1126/science.aan4834,ala_L,0.053484,L-alanine,HMDB00161,C00041,5950.0,"InChI=1S/C3H7NO2/c1-2(4)3(5)6/h2H,4H2,1H3,(H,5...",,EX_ala_L_m,EX_ala_L(e)
1,hadza1,Hadza,Hunter-gatherer,Tanzania,https://doi.org/10.1126/science.aan4834,arg_L,0.046745,L-argininium(1+),HMDB00517,C00062,6322.0,InChI=1S/C6H14N4O2/c7-4(5(11)12)2-1-3-10-6(8)9...,,EX_arg_L_m,EX_arg_L(e)
2,hadza1,Hadza,Hunter-gatherer,Tanzania,https://doi.org/10.1126/science.aan4834,asn_L,0.077218,L-asparagine,HMDB00168,C00152,6267.0,"InChI=1S/C4H8N2O3/c5-2(4(8)9)1-3(6)7/h2H,1,5H2...",,EX_asn_L_m,EX_asn_L(e)
3,hadza1,Hadza,Hunter-gatherer,Tanzania,https://doi.org/10.1126/science.aan4834,asp_L,0.060173,L-aspartate(1-),HMDB00191,C00049,5960.0,"InChI=1S/C4H7NO4/c5-2(4(8)9)1-3(6)7/h2H,1,5H2,...",,EX_asp_L_m,EX_asp_L(e)
4,hadza1,Hadza,Hunter-gatherer,Tanzania,https://doi.org/10.1126/science.aan4834,ca2,0.338817,calcium(2+),HMDB00464,C00076,,InChI=1S/Ca/q+2,,EX_ca2_m,EX_ca2(e)


Lets check that we merged correctly.

In [3]:
assert diets.shape[0] * 3 == merged.shape[0]

Finally we rename and save the merged diet table.

In [4]:
merged.rename(columns={"id": "sample_id"}, inplace=True)
merged.to_csv("../data/per_sample_media.csv", index=False)