# Script Create medium
Version GitHub 01

The goal is to create a diet file, 
with a nutrient-basis from the ssniff-diet fed to lab-mice,
tailored for the mouse gut.
 
Required:
- Model catalog (McMurGut 1.1 / MCMG754)
- skeleton diet file.qza (you made yourself)
- all metabolites listed ("id" = cpd... and "name" = H2O column)

 
########################################################  
By Torben Kuehnast, torben.kuehnast@gmail.com, 2024


In [None]:
import sys
import os
import pandas as pd
from micom import show_versions
from micom.qiime_formats import load_qiime_medium
from micom.workflows.db_media import check_db_medium
from micom.workflows.db_media import complete_db_medium
from qiime2 import Artifact


In [None]:
## Variablen definieren

# Project keyword
project = 'projectname'

# Project version number (both names will be merged)
version_nr = 'v01'

# Define working directory, where data is stored 
working_dir = '/home/'

# Define CPUs
cpus_used = int(10)

# Define your model catalog, like McMurGut, AGORA...
mcmurgut_db = '/home/MCMG754_genus.qza'

# Diet file you made from the receipt of the chow, transformed to Qiime2 qza format
ssniff_medium = '/home/mouse_gut_diet.qza'


#Working directory definieren
os.chdir(working_dir)
import os
print(os.getcwd())

pro_ver = project+"_"+version_nr
print(pro_ver)

In [None]:
# QC correct path to skeleton medium
ssniff_medium

In [None]:
# QC Version check

show_versions()

In [None]:
# Loading skeleton file
# QC, storing file as CSV

skeleton = load_qiime_medium(ssniff_medium)
skeleton.to_csv(f'{pro_ver}_skeleton.csv', sep=';', index=True)
skeleton

In [None]:
# check how many McMurGut models can grow under the given skeleton diet file
# Zero growing taxa is absolutely possible.

check = check_db_medium(mcmurgut_db, skeleton, threads=cpus_used)
check.to_csv(f'{pro_ver}_check.csv', sep=';', index=True)
check

In [None]:
# Look at the numbers of how many can grow

check.can_grow.value_counts()

In [None]:
# Define which metabolites are excluded in the medium completion step (like oxygen) 
# because they may not fit into the environment or unlikely due to toxicity

forbidden = ["EX_cpd00007_e0", "EX_cpd00007_e0", "EX_cpd00055_e0", "EX_cpd00071_e0", "EX_cpd00025_e0", "EX_cpd00239_e0", "EX_cpd00075_e0", "EX_cpd00116_e0", "EX_cpd00150_e0"]
#forbidden = ["EX_cpd00007_m", "EX_cpd00007_m", "EX_cpd00055_m", "EX_cpd00071_m", "EX_cpd00025_m", "EX_cpd00239_m", "EX_cpd00075_m", "EX_cpd00116_m", "EX_cpd00150_m"]
#forbidden = ["cpd00007", "cpd00007", "cpd00055", "cpd00071", "cpd00025", "cpd00239", "cpd00075", "cpd00116", "cpd00150"]
forbidden

In [None]:
# They cant grow in the skeleton media taken only from ssniff ingredients
# So we try to complete the medium with Micom's function
# McMurGut models + ssniff (TARGET)

# EX_cpd00007_e0 = oxygen
#cpd00055	Formaldehyde
#cpd00071	Acetaldehyde
#cpd00025	Hydrogen peroxide
#cpd00239	H2S
#cpd00116	Methanol
#cpd00075	Nitrite
#cpd00150	Hydrogen cyanide

# From Micom (growth): The minimum growth rate the model has to achieve with the (fixed) medium. If a Series will have a minimum growth rate for each id/taxon in the model db.
# From Micom (max_added_import): Maximum import flux for each added additional import not included in the growth medium. If positive will expand the medium with additional imports in order to fulfill the growth objective.
# See Micom Github documentation for more details

manifest, imports = complete_db_medium(mcmurgut_db, skeleton, growth=0.1, threads=cpus_used, max_added_import=20, strict=forbidden, weights="mass")
manifest.to_csv(f'{pro_ver}_manifest.csv', sep=';', index=True)
imports.to_csv(f'{pro_ver}_imports.csv', sep=';', index=True)

In [None]:
# QC of manifest
# Was flux added to each taxa?

manifest

In [None]:
# QC, how many taxa can grow now?

manifest.can_grow.value_counts()

In [None]:
# QC, look at metabolites imported into the completed medium

imports

In [None]:
# Oxygen check, hopefully zero (if you are simulating a oxygen-free large intestine)
imports["EX_cpd00007_m"].describe()


In [None]:
#Formaldehyde
imports["EX_cpd00055_m"].describe()


In [None]:
manifest.added.describe()

In [None]:
# QC, which taxa did not grow?

manifest[~manifest.can_grow]

In [None]:
# Transform the flux into a diet file

fluxes = imports.max()

# Lade das TSV-File mit den Metabolit-Beschreibungen
metabolite_info = pd.read_csv("/home/all_gapseq_metabolites_756.tsv", sep='\t', usecols=['id', 'name'])

# Ersetze spezielle Zeichen in der 'name'-Spalte
metabolite_info['name'] = metabolite_info['name'].str.replace("[-,;'']", "_", regex=True)

# Convert skeleton DataFrame to a format that is easy to update
completed = skeleton.set_index('reaction')

# Update or add new rows based on the fluxes values
for reaction, flux in fluxes.items():
    metabolite_id = reaction.replace("EX_", "").replace("_m", "")
    if reaction in completed.index:
        completed.loc[reaction, 'flux'] = flux  # Update existing flux
    else:
        # Versuche, die Beschreibung aus dem TSV zu finden
        description = metabolite_info.loc[metabolite_info['id'] == metabolite_id, 'name'].iloc[0] if not metabolite_info[metabolite_info['id'] == metabolite_id].empty else ""
        
        # Create a new DataFrame for the new entry and concatenate it
        new_row = pd.DataFrame({
            "reaction": [reaction],
            "metabolite": [metabolite_id],
            "description": [description],  # aus dem TSV File, sonst leer
            "flux": [flux],
            "global_id": [reaction.replace("_m", "_e0")]
        })
        completed = pd.concat([completed, new_row.set_index('reaction')])

# Reset the index to turn 'reaction' back into a column
completed.reset_index(inplace=True)

# Save the results
completed.to_csv(f'{pro_ver}_completed.csv', sep=';', index=False)
print(completed)


In [None]:
fluxes

In [None]:
completed.shape

In [None]:

arti = Artifact.import_data("MicomMedium[Global]", completed)
arti.save(f'{pro_ver}_diet.qza')
completed.to_csv(f'{pro_ver}_medium.csv', sep=',', index=False)

In [None]:

# Alternative to arti, if it doesnt work do it manually:
#print("")
print("------")
print(f'cd {working_dir}')
print(f'qiime tools import --type MicomMedium[Global] --input-path {working_dir}/medium.csv --output-path {working_dir}/{pro_ver}_diet.qza')

In [None]:
completed_ssniff = f'{working_dir}/{pro_ver}_diet.qza'
completed_ssniff

In [None]:
comp_snif_load = load_qiime_medium(completed_ssniff)
comp_snif_load.to_csv(f'{pro_ver}_comp_snif_load.csv', sep=';', index=True)
comp_snif_load

In [None]:


re_check = check_db_medium(mcmurgut_db, comp_snif_load, threads=cpus_used)
re_check.to_csv(f'{pro_ver}_re_check.csv', sep=';', index=True)
re_check

In [None]:
re_check.growth_rate.describe()

In [None]:
re_check.can_grow.value_counts()

In [None]:
re_check.groupby("can_grow").growth_rate.describe()

# Script Create medium
Finished!