In [1]:
from modelseedpy.biochem import from_local

msdb = from_local("../../ModelSEEDDatabase")



modelseedpy 0.3.3


Matplotlib is building the font cache; this may take a moment.


In [3]:
from pandas import read_csv, read_excel
from os import path
from glob import glob

descriptions = {
    "20241113_Alice_58_20241111.readFraction.xls": "unannotated reads",
    "24h DNA OD and read fraction.xlsx": "DNA concentration (proxy for biomass), OD of community at 3rd passage, and read fractions at 24 hours",
    "72h DNA OD and read fraction.xlsx": "DNA concentration (proxy for biomass), OD of community at 3rd passage, and read fractions at 72 hours",
    "Inoculum.xlsx": "inocula read fractions",
    "List of Carbohydrates.xlsx": "list of fibers and the percent dissolved\suspended of each fiber",
    "Mega hCom2 Sequencing read fraction.xls": "repeat of reads",
    # ""
}

media_mapping = {
    "cpd00205": "potassium phosphate buffer",   # potassium
    "cpd00009": "potassium phosphate buffer",   # Phosphate
	"cpd00971": "NaCl", 
	"cpd00013": "1.125 g (NH4)2SO4",
	"cpd00084": "0.5 g L-cysteine",
	"cpd00119": "1.0 mL of histidine-hematin",
	"cpd23054": "1.0 mL of histidine-hematin",   # hematin
	"cpd00254": "9.5 mg MgCl2",   # Mg+2
	"cpd00099": "9.5 mg MgCl2",   # Cl
	"cpd10515": "1 mL of 0.4 mg/mL FeSO4•7H2O",   # Fe+2
	"cpd00048": "1 mL of 0.4 mg/mL FeSO4•7H2O",   # SO4
	"cpd01401": "1 mL of 1 mg/mL Vitamin K3 (40 mg Vitamin K3 in 40 mL ethanol)",   # vitamin K1
	"cpd00363": "1 mL of 1 mg/mL Vitamin K3 (40 mg Vitamin K3 in 40 mL ethanol)",
	"cpd05098": "1 mL of 0.8% w/v CaCl2",
	# "10 mL Vitamin Supplement and Trace Mineral Supplement (ATCC)": "",
}

media_masses = {
    "cpd00205": ( # potassium
        0.1*1 * 39.1/(39.1+96) / 39.1),   # 100 mL of 1 M, pH = 7.2, potassium phosphate buffer
    "cpd00009": ( # phosphate
        0.1*1 * 96/(39.1+96) / 96),    # 100 mL of 1 M, pH = 7.2, potassium phosphate buffer
	"cpd00971": ( # sodium
        0.875 * 23/(23+35) / 23),      # 0.875 g NaCl
	"cpd00013": ( # ammonia
        1.125 * 18/(18+96) / 18),      # 1.125 g (NH4)2SO4
	"cpd00084": ( # cysteine
        0.500/121),    # 0.5 g L-cysteine
	# "cpd00119": "1.0 mL of histidine-hematin",  # ml, histidine
	# "cpd23054": "1.0 mL of histidine-hematin",   # ml, hematin
	"cpd00254": ( # Mg
        .0095 * 24/(24+2*35) / 24),    # 9.5 mg MgCl2
	"cpd00099": ( # Cl
        (0.875 * 35/(23+35))           # 0.875 g NaCl
        + (.0095 * 2*35/(24+2*35))     # 9.5 mg MgCl2
        + 1*0.8/1000 * 2*35/(40+2*35)  # 1 mL of 0.8% w/v CaCl2
        ) / 35,
	"cpd10515": ( # Fe +2
        (1*0.4/1000 * 55.8/(55.8+96)) / 55.8),  # 1 mL of 0.4 mg/mL FeSO4•7H2O
	"cpd00048": ( # SO4
        (1*0.4/1000 * 96/(55.8+96)          # 1 mL of 0.4 mg/mL FeSO4•7H2O
         + 1.125 * 96/(18+96)) / 96),       # 1.125 g (NH4)2SO4
	"cpd01401": ( # vitamin K3
        1*1 / 172.183),  # 1 mL of 1 mg/mL Vitamin K3 (40 mg Vitamin K3 in 40 mL ethanol)
	"cpd00363": ( # ethanol
        1*0.78945/1000 / 46.1),  # 1 mL of 1 mg/mL Vitamin K3 (40 mg Vitamin K3 in 40 mL ethanol)
	"cpd00063": ( # Ca
        1*0.8/1000 * 40/(40+2*35) / 40)  # 1 mL of 0.8% w/v CaCl2
	# "10 mL Vitamin Supplement and Trace Mineral Supplement (ATCC)": "",
}

display(media_masses)

display(media_mapping)
mediaCPDs = {v:k for k,v in media_mapping.items()}
display(mediaCPDs)

csvs = {}
for xls in glob("/Users/andrewfreiburger/Documents/MicrobiomeNotebooks/H100/Files for Network Modeling/*.xls"):
    # print(path.basename(xls))
    csvs[xls] = read_excel(xls)
for xlsx in glob("/Users/andrewfreiburger/Documents/MicrobiomeNotebooks/H100/Files for Network Modeling/*.xlsx"):
    # print(path.basename(xlsx))
    excel = read_excel(xlsx)
    # create the mapping
    if "Carbohydrates" in xlsx:  carbohydrates = dict(zip(
        map(int, excel["Designated Number"].to_numpy()), excel["Name of Carbohydrate"].to_numpy()))
    # remove the members that Alice specified are erroneously added to the community
    elif "inoculum" in xlsx:
        excel.set_index("Strain_Name", inplace=True)
        excel.drop(["Clostridium-sporogenes-ATCC-15579-MAF-2",
                    "Ruminococcus-albus-strain-8-MAF-2", "Ruminococcus-flavefaciens-FD-1"], axis=0)
    csvs[xlsx] = excel

for csv, xlsx in csvs.items():
    print(csv)
    display(xlsx)

{'cpd00205': 0.0007401924500370097,
 'cpd00009': 0.0007401924500370097,
 'cpd00971': 0.015086206896551725,
 'cpd00013': 0.009868421052631578,
 'cpd00084': 0.004132231404958678,
 'cpd00254': 0.00010106382978723403,
 'cpd00099': 0.015302880010671647,
 'cpd10515': 2.635046113306983e-06,
 'cpd00048': 0.009871056098744885,
 'cpd01401': 0.0058077742866601235,
 'cpd00363': 1.712472885032538e-05,
 'cpd00063': 7.272727272727273e-06}

{'cpd00205': 'potassium phosphate buffer',
 'cpd00009': 'potassium phosphate buffer',
 'cpd00971': 'NaCl',
 'cpd00013': '1.125 g (NH4)2SO4',
 'cpd00084': '0.5 g L-cysteine',
 'cpd00119': '1.0 mL of histidine-hematin',
 'cpd23054': '1.0 mL of histidine-hematin',
 'cpd00254': '9.5 mg MgCl2',
 'cpd00099': '9.5 mg MgCl2',
 'cpd10515': '1 mL of 0.4 mg/mL FeSO4•7H2O',
 'cpd00048': '1 mL of 0.4 mg/mL FeSO4•7H2O',
 'cpd01401': '1 mL of 1 mg/mL Vitamin K3 (40 mg Vitamin K3 in 40 mL ethanol)',
 'cpd00363': '1 mL of 1 mg/mL Vitamin K3 (40 mg Vitamin K3 in 40 mL ethanol)',
 'cpd05098': '1 mL of 0.8% w/v CaCl2'}

{'potassium phosphate buffer': 'cpd00009',
 'NaCl': 'cpd00971',
 '1.125 g (NH4)2SO4': 'cpd00013',
 '0.5 g L-cysteine': 'cpd00084',
 '1.0 mL of histidine-hematin': 'cpd23054',
 '9.5 mg MgCl2': 'cpd00099',
 '1 mL of 0.4 mg/mL FeSO4•7H2O': 'cpd00048',
 '1 mL of 1 mg/mL Vitamin K3 (40 mg Vitamin K3 in 40 mL ethanol)': 'cpd00363',
 '1 mL of 0.8% w/v CaCl2': 'cpd05098'}

ImportError: Missing optional dependency 'xlrd'. Install xlrd >= 2.0.1 for xls Excel support Use pip or conda to install xlrd.

In [22]:
from json import dump

with open("carbohydrate_numbering.json", 'w') as jsonOut:
    dump(carbohydrates, jsonOut, indent=3)

display(carbohydrates)

{1: 'Fucoidan from U. pinnatifida\xa0',
 2: 'Fucoidan from F. versiculosis\xa0',
 3: 'Fucoidan from F. serrestus\xa0',
 4: 'Maltodextrin\xa0',
 5: 'Xylan from Corn Cob\xa0',
 6: 'Arabinogalactan from Larch Wood\xa0',
 7: 'Inulin\xa0',
 8: 'Danisco Pectin\xa0',
 9: 'Amylopectin from Maize\xa0',
 10: 'Acacia Fiber\xa0',
 11: 'α-D-Glucose\xa0',
 12: 'L-Fucose\xa0',
 13: 'Minimal Media Only (Blank)\xa0',
 14: 'Guar Gum\xa0',
 15: 'Xanthan Gum from Xanthonomas campestris',
 16: 'Mucin from porcine stomach\xa0',
 17: 'Arabinan from Sugar Beet\xa0',
 18: 'Wheat Arabinoxylan\xa0',
 19: 'Rye Arabinoxylan\xa0',
 20: 'β-Glucan from Barley\xa0',
 21: '1,4-β-D-Mannan\xa0',
 22: 'Rhamnogalacturonan from soy bean pectin fiber',
 23: 'Xyloglucan from tamarind seed',
 24: 'Yeast β-Glucan',
 25: 'Gum Arabic',
 26: 'kappa-Carrageenan',
 27: 'Galactomannan from Carob',
 28: 'Galactan from potato'}