In [1]:
from modelseedpy.biochem import from_local

msdb = from_local("../../ModelSEEDDatabase")



modelseedpy 0.3.3


Matplotlib is building the font cache; this may take a moment.


## vitamin mix

In [2]:
# "10 mL Vitamin Supplement and Trace Mineral Supplement (ATCC)"

"""Folic acid 2.0 mg/liter
Pyridoxine hydrochloride 10.0 mg/liter
Riboflavin 5.0 mg/liter
Biotin 2.0 mg/liter
Thiamine 5.0 mg/liter
Nicotinic acid 5.0 mg/liter
Calcium Pantothenate 5.0 mg/liter
Vitamin B12 0.1 mg/liter
p-Aminobenzoic acid 5.0 mg/liter
Thioctic acid 5.0 mg/liter
Monopotassium phosphate 900.0 mg/liter"""

# all moles/L
vitamins = {
    "cpd00393": 2/1000 / 439,   # folate
    "cpd00263": 10/1000 / 169,  # Pyridoxine
    "cpd00220": 5/1000 / 376,   # Riboflavin
    "cpd00104": 2/1000 / 243,   # Biotin
    "cpd00305": 5/1000 / 265,   # Thiamine
    "cpd00218": 5/1000 / 122,   # Niacin
    "cpd00644": 10/1000 / 218,  # Pantothenate
    "cpd00166": 0.1/1000 / 1579, # B12
    "cpd00443": 5/1000 / 136,   # Aminobenzoate
    "cpd00541": 5/1000 / 205,   # Lipoate
    "cpd00205": 0.9/(39+96) * 1/1,    # Monopotassium phosphate
    "cpd00009": 0.9/(39+96) * 1/1,    # Monopotassium phosphate
}

## mineral mix

In [3]:
# "10 mL Vitamin Supplement and Trace Mineral Supplement (ATCC)"
"""EDTA,  0.5 g/liter
MgSO4 . 7H2O,  3.0 g/liter
MnSO4 . H2O, 0.5 g/liter
NaCl, 1.0 g/liter
FeSO4 . 7H2O, 0.1 g/liter
Co(NO3)2 . 6H2O, 0.1 g/liter
CaCl2 (anhydrous), 0.1 g/liter
ZnSO4 . 7H2O, 0.1 g/liter
CuSO4 . 5H2O, 0.010 g/liter
AlK(SO4)2 (anhydrous), 0.010 g/liter"""

## NOTE:  skipping these
"""H3BO3, 0.010 g/liter
Na2MoO4 . 2H2O, 0.010 g/liter
Na2SeO3 (anhydrous), 0.001 g/liter
Na2WO4 . 2H2O, 0.010 g/liter
NiCl2 . 6H2O, 0.020 g/liter"""

minerals = {
    "cpd00240": 0.5/292.244,   # EDTA
    "cpd00254": 3/(24+96) * 1/1,    # Mg
    "cpd00030": 0.5/(55+96) * 1/1,  # Mn
    "cpd00048": ((3/(24+96) * 1/1) # MgSO4
                 + (0.5/(55+96) * 1/1) # MnSO4
                 + (.1/(56+96) * 1/1) # FeSO4
                 + (.1/(65+96) * 1/1) # ZnSO4
                 + (.01/(64+96) * 1/1) # CuSO4
                 + (.01/(27+39+96*2) * 2/1)  # AlK(SO4)2
                 ),
    "cpd00971": 1/(23+35) * 1/1,   # Na
    "cpd00099": (1/(23+35) * 1/1
                 + 1/(40+35*2) * 2/1),   # Cl
    "cpd10515": (.1/(56+96)) * 1/1, # Fe +2
    "cpd00149": (.1/(59+62*2)) * 1/1,  # Co +2
    "cpd00209": (.1/(59+62*2)) * 2/1,  # (NO3)2
    "cpd00063": .1/(40+35*2) * 1/1,  # Ca +2
    "cpd00034": .1/(65+96) * 1/1,  # Zn +2
    "cpd00058": .01/(64+96) * 1/1,  # Cu +2
    "cpd24344": .01/(27+39+96*2) * 1/1,  # Al +3
    "cpd00205": .01/(27+39+96*2) * 1/1,   # K +
}

In [18]:
from pandas import read_csv, read_excel
from os import path
from glob import glob

descriptions = {
    "20241113_Alice_58_20241111.readFraction.xls": "unannotated reads",
    "24h DNA OD and read fraction.xlsx": "DNA concentration (proxy for biomass), OD of community at 3rd passage, and read fractions at 24 hours",
    "72h DNA OD and read fraction.xlsx": "DNA concentration (proxy for biomass), OD of community at 3rd passage, and read fractions at 72 hours",
    "Inoculum.xlsx": "inocula read fractions",
    "List of Carbohydrates.xlsx": "list of fibers and the percent dissolved\suspended of each fiber",
    "Mega hCom2 Sequencing read fraction.xls": "repeat of reads",
    # ""
}

media_mapping = {
    "cpd00205": "potassium phosphate buffer",   # potassium
    "cpd00009": "potassium phosphate buffer",   # Phosphate
	"cpd00971": "NaCl", 
	"cpd00013": "1.125 g (NH4)2SO4",
	"cpd00084": "0.5 g L-cysteine",
	"cpd00119": "1.0 mL of histidine-hematin",
	"cpd23054": "1.0 mL of histidine-hematin",   # hematin
	"cpd00254": "9.5 mg MgCl2",   # Mg+2
	"cpd00099": "9.5 mg MgCl2",   # Cl
	"cpd10515": "1 mL of 0.4 mg/mL FeSO4•7H2O",   # Fe+2
	"cpd00048": "1 mL of 0.4 mg/mL FeSO4•7H2O",   # SO4
	"cpd01401": "1 mL of 1 mg/mL Vitamin K3 (40 mg Vitamin K3 in 40 mL ethanol)",   # vitamin K1
	"cpd00363": "1 mL of 1 mg/mL Vitamin K3 (40 mg Vitamin K3 in 40 mL ethanol)",
	"cpd05098": "1 mL of 0.8% w/v CaCl2",
}


# all in moles
media_masses = {
    "cpd00205": ( # potassium
                0.1*1 * 1/1),   # 100 mL of 1 M, pH = 7.2, potassium phosphate buffer
    "cpd00009": ( # phosphate
                0.1*1 * 1/1),    # 100 mL of 1 M, pH = 7.2, potassium phosphate buffer
	"cpd00971": ( # sodium
                0.875/(23+35) * 1/1),      # 0.875 g NaCl
	"cpd00013": ( # ammonia
                1.125/(18+96) * 1/1),      # 1.125 g (NH4)2SO4
	"cpd00084": ( # cysteine
                0.500/121),    # 0.5 g L-cysteine
                # "cpd00119": "1.0 mL of histidine-hematin",  # ml, histidine
                # "cpd23054": "1.0 mL of histidine-hematin",   # ml, hematin
	"cpd00254": ( # Mg
                .0095/(24+2*35) * 1/1),    # 9.5 mg MgCl2
	"cpd00099": ( # Cl
                (0.875/(23+35) * 1/1)         # 0.875 g NaCl
                + (.0095/(24+2*35) * 2/1)     # 9.5 mg MgCl2
                + 1*0.8/1000/(40+2*35) * 2/1  # 1 mL of 0.8% w/v CaCl2
        ),
	"cpd10515": ( # Fe +2
                (1*0.4/1000/(55.8+96)) * 1/1),  # 1 mL of 0.4 mg/mL FeSO4•7H2O
	"cpd00048": ( # SO4
                (1*0.4/1000/(55.8+96) * 1/1         # 1 mL of 0.4 mg/mL FeSO4•7H2O
                + 1.125/(18+96)) * 1/1),       # 1.125 g (NH4)2SO4
	"cpd01401": ( # vitamin K3
                1*1 / 172.183),  # 1 mL of 1 mg/mL Vitamin K3 (40 mg Vitamin K3 in 40 mL ethanol)
	"cpd00363": ( # ethanol
                1*0.78945/1000 / 46.1),  # 1 mL of 1 mg/mL Vitamin K3 (40 mg Vitamin K3 in 40 mL ethanol)
	"cpd00063": ( # Ca
                1*0.8/1000/(40+2*35) * 1/1)  # 1 mL of 0.8% w/v CaCl2
	        # "10 mL Vitamin Supplement and Trace Mineral Supplement (ATCC)": "",
}

# display(media_masses)

mediaCPDs = {v:k for k,v in media_mapping.items()}
# display(mediaCPDs)


# combine all of the concentrations
for cpd, conc in vitamins.items():
    if cpd in media_masses:
        media_masses[cpd] += conc
    else:
        media_masses[cpd] = conc

for cpd, conc in minerals.items():
    if cpd in media_masses:
        media_masses[cpd] += conc
    else:
        media_masses[cpd] = conc

# convert to mmol/L and then sort the dictionary by substrate concentration
media_masses = {k:1000*v for k,v in media_masses.items()}
media = dict(sorted(media_masses.items(), key=lambda item: item[1], reverse=True))

display(media)

from json import dump
with open("media_conc.json", 'w') as jsonOut:
    dump(media, jsonOut, indent=3)

  "List of Carbohydrates.xlsx": "list of fibers and the percent dissolved\suspended of each fiber",


{'cpd00205': 106.70542635658916,
 'cpd00009': 106.66666666666667,
 'cpd00099': 50.726077502834656,
 'cpd00048': 39.601346506000006,
 'cpd00971': 32.327586206896555,
 'cpd00254': 25.101063829787236,
 'cpd00013': 9.868421052631579,
 'cpd01401': 5.807774286660123,
 'cpd00084': 4.132231404958678,
 'cpd00030': 3.3112582781456954,
 'cpd00240': 1.710899111701181,
 'cpd00209': 1.092896174863388,
 'cpd00063': 0.9163636363636364,
 'cpd10515': 0.6605297829554122,
 'cpd00034': 0.6211180124223603,
 'cpd00149': 0.546448087431694,
 'cpd00058': 0.0625,
 'cpd00263': 0.05917159763313609,
 'cpd00644': 0.045871559633027525,
 'cpd00218': 0.040983606557377046,
 'cpd24344': 0.03875968992248062,
 'cpd00443': 0.03676470588235294,
 'cpd00541': 0.024390243902439025,
 'cpd00305': 0.01886792452830189,
 'cpd00363': 0.01712472885032538,
 'cpd00220': 0.013297872340425532,
 'cpd00104': 0.00823045267489712,
 'cpd00393': 0.004555808656036447,
 'cpd00166': 6.333122229259024e-05}

In [55]:
csvs = {}
# for xls in glob("/Users/andrewfreiburger/Documents/MicrobiomeNotebooks/H100/Files for Network Modeling/*.xls"):
#     print(xls)
#     # print(path.basename(xls))
#     csvs[xls] = read_excel(xls)
for xlsx in glob("Files for Network Modeling/*.xlsx"):
    # print(path.basename(xlsx))
    excel = read_excel(xlsx)
    print(xlsx)
    # display(excel)
    # create the mapping
    if "Carbohydrates" in xlsx:
        carbohydrates = dict(zip(map(int, excel["Designated Number"].to_numpy()), excel["Name of Carbohydrate"].to_numpy()))
        excel = excel.set_index("Designated Number")
    # remove the members that Alice specified are erroneously added to the community
    elif "inoculum" in xlsx:
        excel.set_index("Strain_Name", inplace=True)
        excel.drop(["Clostridium-sporogenes-ATCC-15579-MAF-2",
                    "Ruminococcus-albus-strain-8-MAF-2", "Ruminococcus-flavefaciens-FD-1"], axis=0)
    elif "MEGA" in xlsx:
        excel.set_index("Strain_Name", inplace=True)    
    csvs[xlsx] = excel

# for csv, xlsx in csvs.items():
#     print(csv)
#     display(xlsx)

from json import dump

with open("carbohydrate_numbering.json", 'w') as jsonOut:
    dump(carbohydrates, jsonOut, indent=3)

display(carbohydrates)

Files for Network Modeling/List of Carbohydrates.xlsx
Files for Network Modeling/MEGA hCom2 Sequencing Analysis.readFraction.xlsx
Files for Network Modeling/inoculum.xlsx
Files for Network Modeling/72h DNA OD and read fraction.xlsx
Files for Network Modeling/24h DNA OD and read fraction.xlsx


{1: 'Fucoidan from U. pinnatifida\xa0',
 2: 'Fucoidan from F. versiculosis\xa0',
 3: 'Fucoidan from F. serrestus\xa0',
 4: 'Maltodextrin\xa0',
 5: 'Xylan from Corn Cob\xa0',
 6: 'Arabinogalactan from Larch Wood\xa0',
 7: 'Inulin\xa0',
 8: 'Danisco Pectin\xa0',
 9: 'Amylopectin from Maize\xa0',
 10: 'Acacia Fiber\xa0',
 11: 'α-D-Glucose\xa0',
 12: 'L-Fucose\xa0',
 13: 'Minimal Media Only (Blank)\xa0',
 14: 'Guar Gum\xa0',
 15: 'Xanthan Gum from Xanthonomas campestris',
 16: 'Mucin from porcine stomach\xa0',
 17: 'Arabinan from Sugar Beet\xa0',
 18: 'Wheat Arabinoxylan\xa0',
 19: 'Rye Arabinoxylan\xa0',
 20: 'β-Glucan from Barley\xa0',
 21: '1,4-β-D-Mannan\xa0',
 22: 'Rhamnogalacturonan from soy bean pectin fiber',
 23: 'Xyloglucan from tamarind seed',
 24: 'Yeast β-Glucan',
 25: 'Gum Arabic',
 26: 'kappa-Carrageenan',
 27: 'Galactomannan from Carob',
 28: 'Galactan from potato'}

# examining each CSV

In [56]:
for name, csv in csvs.items():
    print(name)
    display(csv)

Files for Network Modeling/List of Carbohydrates.xlsx


Unnamed: 0_level_0,Name of Carbohydrate,Concentration (w/v),Sterile-Filtered or Autoclaved?,Special Preparation Instructions?,Source
Designated Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,Fucoidan from U. pinnatifida,0.001,Sterile-Filtered,No,
2,Fucoidan from F. versiculosis,0.001,Sterile-Filtered,No,
3,Fucoidan from F. serrestus,0.0025,Sterile-Filtered,No,
4,Maltodextrin,0.005,Sterile-Filtered,No,
5,Xylan from Corn Cob,0.005,Sterile-Filtered,No,
6,Arabinogalactan from Larch Wood,0.05,Sterile-Filtered,No,
7,Inulin,0.005,Sterile-Filtered,No,
8,Danisco Pectin,0.0025,Sterile-Filtered,No,
9,Amylopectin from Maize,0.005,Autoclaved,No,
10,Acacia Fiber,0.005,Sterile-Filtered,No,


Files for Network Modeling/MEGA hCom2 Sequencing Analysis.readFraction.xlsx


Unnamed: 0_level_0,24_hr_Carb_1,72_hr_Carb_1,24_hr_Carb_2,72_hr_Carb_2,24_hr_Carb_3,72_hr_Carb_3,24_hr_Carb_4,72_hr_Carb_4,24_hr_Carb_5,72_hr_Carb_5,...,24_hr_Carb_26,72_hr_Carb_26,24_hr_Carb_27,72_hr_Carb_27,24_hr_Carb_28,72_hr_Carb_28,Bacteroides_Dropout_1,Bacteroides_Dropout_2,hc2_1,hc2_2
Strain_Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Acidaminococcus-fermentans-DSM-20731-MAF-2,0.000057,0.000231,0.000113,0.016471,0.000471,0.068571,0.045419,0.365372,1.067818,0.944336,...,0.000322,0.000257,0.018154,0.017756,0.355293,0.162235,0.128926,0.144458,0.087517,0.100476
Acidaminococcus-sp-D21-MAF-2,0.001532,0.007518,0.000211,0.002278,0.000022,0.000642,0.000090,0.255510,0.002872,0.000054,...,0.001356,0.310997,0.000006,0.001218,0.002570,0.000315,0.211693,0.234437,0.160008,0.162989
Adlercreutzia-equolifaciens-DSM-19450,0.000029,0.000000,0.000027,0.000007,0.000000,0.000000,0.000000,0.000000,0.000455,0.000000,...,0.000124,0.000000,0.000000,0.000000,0.000000,0.000000,0.010371,0.010393,0.008719,0.008774
Akkermansia-muciniphila-ATCC-BAA-835-MAF-2,0.001233,0.000653,0.000983,0.000280,0.000038,0.000000,0.000011,0.000000,0.000028,0.000000,...,0.004579,0.000854,0.000028,0.000025,0.000006,0.000026,0.613454,0.652181,0.425446,0.463071
Alistipes-finegoldii-DSM-17242,0.018550,0.006043,0.006407,0.007324,0.067923,0.133219,0.005968,0.355033,0.004493,0.052810,...,0.034676,0.002127,0.004869,0.084994,0.001809,0.001494,2.869070,2.878544,2.113976,2.288952
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Solobacterium-moorei-DSM-22971-MAF-2,0.000565,0.000150,0.000594,0.000385,0.000023,0.000053,0.000011,0.000012,0.000035,0.000000,...,0.002285,0.000293,0.000067,0.000019,0.000026,0.028315,0.117689,0.087630,0.088561,0.078647
Streptococcus-thermophilus-ATCC-19258-MAF-2,0.000007,0.000000,0.000007,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000041,0.000000,0.000000,0.000000,0.000000,0.000000,0.011288,0.009260,0.008430,0.006871
Subdoligranulum-sp-4-3-54A2FAA-MAF-2,0.007145,0.074133,0.010480,0.075323,0.007029,0.008741,0.003242,0.001804,0.003751,0.001221,...,0.014137,0.065810,0.009672,0.014513,0.017815,0.004262,1.074722,1.044013,0.888649,0.969954
Subdoligranulum-variabile-DSM-15176-MAF-2,0.000907,0.000112,0.000420,0.001632,0.000000,0.000058,0.000067,0.000000,0.030797,0.094215,...,0.002135,0.000162,0.000026,0.000000,0.003002,0.002603,0.387500,0.436437,0.253009,0.252283


Files for Network Modeling/inoculum.xlsx


Unnamed: 0_level_0,hc2_1,hc2_2,Unnamed: 3
Strain_Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Acidaminococcus-fermentans-DSM-20731-MAF-2,0.087517,0.100476,
Acidaminococcus-sp-D21-MAF-2,0.160008,0.162989,
Adlercreutzia-equolifaciens-DSM-19450,0.008719,0.008774,
Akkermansia-muciniphila-ATCC-BAA-835-MAF-2,0.425446,0.463071,
Alistipes-finegoldii-DSM-17242,2.113976,2.288952,
...,...,...,...
Solobacterium-moorei-DSM-22971-MAF-2,0.088561,0.078647,
Streptococcus-thermophilus-ATCC-19258-MAF-2,0.008430,0.006871,
Subdoligranulum-sp-4-3-54A2FAA-MAF-2,0.888649,0.969954,
Subdoligranulum-variabile-DSM-15176-MAF-2,0.253009,0.252283,


Files for Network Modeling/72h DNA OD and read fraction.xlsx


Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Name,Fucoidan from U. pinnatifida,Fucoidan from F. versiculosis,Fucoidan from F. serrestus,Maltodextrin,Xylan from Corn Cob,Arabinogalactan from Larch Wood,Inulin,...,Rye Arabinoxylan,B-Glucan from Barley,"1,4-B-D-Mannan",Rhamnogalacturonan from Soy Bean Pectin Fiber,Xyloglucan from Tamarind Seed,Yeast B-Glucan,Gum Arabic,kappa-Carrageenan,Galactomannan from Carob,Galactan from Potato
0,,,OD 3rd passage 72h,0.012,0.017,0.164,0.342667,0.209667,0.670333,0.299333,...,0,0,0.031,0,0,0.042333,0.01,0.016667,0.128667,0.785333
1,,,DNA conc,2.824,2.647,45.75,46.909,74.214,162.549,102.868,...,29.231,10.952,6.911,22.942,13.05,11.22,3.223,2.557,38.889,107.681
2,phyla,number,Strain_Name,72_hr_Carb_1,72_hr_Carb_2,72_hr_Carb_3,72_hr_Carb_4,72_hr_Carb_5,72_hr_Carb_6,72_hr_Carb_7,...,72_hr_Carb_19,72_hr_Carb_20,72_hr_Carb_21,72_hr_Carb_22,72_hr_Carb_23,72_hr_Carb_24,72_hr_Carb_25,72_hr_Carb_26,72_hr_Carb_27,72_hr_Carb_28
3,Firmicutes,3,Acidaminococcus-fermentans-DSM-20731-MAF-2,0.000231,0.016471,0.068571,0.365372,0.944336,0.255191,0.385902,...,0.011253,0.000045,0.0003,0.008934,0,0.02372,0.000054,0.000257,0.017756,0.162235
4,Firmicutes,3,Acidaminococcus-sp-D21-MAF-2,0.007518,0.002278,0.000642,0.25551,0.000054,0.000341,0.104065,...,0.000326,0.000042,0.000514,0.00002,0,0.00009,0.000055,0.310997,0.001218,0.000315
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118,Firmicutes,3,Solobacterium-moorei-DSM-22971-MAF-2,0.00015,0.000385,0.000053,0.000012,0,0,0.000119,...,0.000107,0.000258,0.000622,0.000047,0.000337,0.001007,0.000608,0.000293,0.000019,0.028315
119,Firmicutes,3,Streptococcus-thermophilus-ATCC-19258-MAF-2,0,0,0,0,0,0,0,...,0,0.000006,0,0,0,0,0,0,0,0
120,Firmicutes,3,Subdoligranulum-sp-4-3-54A2FAA-MAF-2,0.074133,0.075323,0.008741,0.001804,0.001221,0.020037,0.000696,...,0.004038,0.020213,0.115925,0.017415,0.03367,0.010152,0.014959,0.06581,0.014513,0.004262
121,Firmicutes,3,Subdoligranulum-variabile-DSM-15176-MAF-2,0.000112,0.001632,0.000058,0,0.094215,0.000194,0,...,0,0.000145,0.000102,0,0.000013,0.000006,0.000142,0.000162,0,0.002603


Files for Network Modeling/24h DNA OD and read fraction.xlsx


Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Fucoidan from U. pinnatifida,Fucoidan from F. versiculosis,Fucoidan from F. serrestus,Maltodextrin,Xylan from Corn Cob,Arabinogalactan from Larch Wood,Inulin,...,Rye Arabinoxylan,B-Glucan from Barley,"1,4-B-D-Mannan",Rhamnogalacturonan from Soy Bean Pectin Fiber,Xyloglucan from Tamarind Seed,Yeast B-Glucan,Gum Arabic,kappa-Carrageenan,Galactomannan from Carob,Galactan from Potato
0,,,OD,0.091,0.359,0.653,0.479,0.684,0.230667,0.332333,...,0.104333,0.118,0.158667,0.125333,0.114667,0.084333,0.082667,0.238333,1.041333,0.259333
1,,,DNA ng/ml,2.43,1.449,34.439,41.764,50.669,115.073,15.711,...,23.198,6.052,8.279,22.102,19.931,7.71,1.434,2.382,40.987,120.773
2,phyla,number,Strain_Name,24_hr_Carb_1,24_hr_Carb_2,24_hr_Carb_3,24_hr_Carb_4,24_hr_Carb_5,24_hr_Carb_6,24_hr_Carb_7,...,24_hr_Carb_19,24_hr_Carb_20,24_hr_Carb_21,24_hr_Carb_22,24_hr_Carb_23,24_hr_Carb_24,24_hr_Carb_25,24_hr_Carb_26,24_hr_Carb_27,24_hr_Carb_28
3,Firmicutes,3,Clostridiales-bacterium-VE202-27-MAF-3,0.417784,0.261981,10.802713,19.663384,22.481514,17.611134,18.884991,...,13.393424,2.131289,0.288067,1.106078,40.373589,20.821651,9.721216,0.149145,0.052299,49.977919
4,Bacteroidetes,4,Bacteroides-sp-D2-MAF-2,14.763016,35.821391,39.610415,14.937385,8.406575,14.39899,0.023825,...,2.233039,33.356726,0.957226,38.108825,3.602364,14.164154,0.032411,1.637694,0.8407,24.244336
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118,Firmicutes,3,Blautia-hydrogenotrophica-DSM-10507-MAF-2,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
119,Firmicutes,3,Clostridium-sp-L2-50-MAF-3,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
120,Firmicutes,3,Clostridium-sporogenes-ATCC-15579-MAF-2,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
121,Firmicutes,3,Ruminococcus-albus-strain-8-MAF-2,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [72]:
def isNumber(string):
    try:
        float(string)
        return True
    except:
        return False

# creating a mapping from fibers to cpdIDs
fiber_IDs = {
    "Fucoidan": "cpd13332",
    "Maltodextrin": "cpd11735",  # amylose
    "Xylan": "cpd11732",
    "Arabinogalactan": "cpd11699",
    "Inulin": "cpd11602",
    "Pectin": "cpd11601",
    "Amylopectin": "cpd00265",
    # Acacia is not captured per se in the ModelSEED database.   It is primarily composed of arabinogalactans, so these results will be assumed from arabinogalactans
    "Glucose": "cpd00027",
    "Fucose": "cpd00750",
    # Guar Gum is not captured, and there are multiple representations for the primary chemical constitute of this fiber
    "Xanthan": "cpd01040",   
    "Mucin": "cpd00984",
    "Arabinan": "cpd12115",
    "Arabinoxylan": "cpd11970",
    # no way to distinguish sources of Arabinoxylan
    "Glucan": "cpd11696",
    "Mannan": "cpd11685",
    "Xyloglucan": "cpd11752",
    # can't distinguish sources of Glucan
    "Gum Arabic": "cpd13337",
    "Carrageenan": "cpd12146",
    "Galactomannan": "cpd00656",
    "Galactan": "cpd12777"
}

carbohydrates_IDs = {k: fiber_IDs[v] for k,v in {
    1: 'Fucoidan',
    2: 'Fucoidan',
    3: 'Fucoidan',
    4: 'Maltodextrin',
    5: 'Xylan',
    6: 'Arabinogalactan',
    7: 'Inulin',
    8: 'Pectin',
    9: 'Amylopectin',
    10: 'Arabinogalactan',
    11: 'Glucose',
    12: 'Fucose',
    13: '',
    # 14: 'Guar Gum',
    15: 'Xanthan',
    16: 'Mucin',
    17: 'Arabinan',
    18: 'Arabinoxylan',
    19: 'Arabinoxylan',
    20: 'Glucan',
    21: 'Mannan',
    22: 'Rhamnogalacturonan',
    23: 'Xyloglucan',
    24: 'Glucan',
    25: 'Gum Arabic',
    26: 'Carrageenan',
    27: 'Galactomannan',
    28: 'Galactan'
    }.items() if v not in ['', "Rhamnogalacturonan"]}
carb_info = {num: {"id": ID, "conc": csvs["Files for Network Modeling/List of Carbohydrates.xlsx"].loc[num]["Concentration (w/v)"]} for num, ID in carbohydrates_IDs.items()}


with open("carb_info.json", 'w') as jsonOut:
    dump(carb_info, jsonOut)

In [25]:
inco_df = csvs["Files for Network Modeling/inoculum.xlsx"].drop("Unnamed: 3", axis=1)
inco_df["ave"] = (inco_df["hc2_1"]+inco_df["hc2_2"])/2
inco_dict = dict(zip(inco_df["ave"].index.to_list(), inco_df["ave"].to_list()))

from json import dump
with open("inoculum_abundances.json", 'w') as jsonOut:
    dump(inco_dict, jsonOut, indent=3)

In [71]:
abundances_dic = abundances_dic = {col.split("_")[-1]: {} for col in csvs["Files for Network Modeling/MEGA hCom2 Sequencing Analysis.readFraction.xlsx"].columns if "Carb" in col}
# firstCol = 0
for mem, abundances in csvs["Files for Network Modeling/MEGA hCom2 Sequencing Analysis.readFraction.xlsx"].iterrows():
    # print(mem)
    for col, val in abundances.items():
        dayNum = col.split("_")[0]
        colNum = col.split("_")[-1]
        # if "24" in col:    firstCol += 1
        if dayNum not in abundances_dic[colNum]:   abundances_dic[colNum][dayNum] = {}
        abundances_dic[colNum][dayNum][mem] = val

from json import dump
with open("fiber_day_abundances.json", 'w') as jsonOut:
    dump(abundances_dic, jsonOut, indent=3)