# Bayesian MCA (Metabolic Control Analysis)

## Load packages

In [1]:
%load_ext nb_black

import cobra, gzip, re
from edd_utils import login, export_study

from collections import defaultdict
from copy import deepcopy

import numpy as np
import pandas as pd
import sys

sys.path.append("../")
from src.eflux2 import EFlux2

<IPython.core.display.Javascript object>

## 1. Provide Required Input Content

### 1.1. Required content filenames

In [2]:
config = {}

<IPython.core.display.Javascript object>

In [3]:
# Model (Needs to be [verified for] flipped, non-zero with respect to reference strain)
config["model_fname"] = '../models/iJB1325_HP.nonnative_genes.pubchem.flipped.nonzero.reduced.json'

# Model solver attributes
config["model_solver"] = 'glpk'#'gurobi' # NOTE: 'gurobi' solver is proprietary
config["model_objective"] = 'BOUNDARY_He'

# List of Strains
config["line_strain_list_fname"] = '../data/experiment_w_reps.tsv'

# Reference Strain
config["ref_line"] = 'SF ABF93_7-R3'


<IPython.core.display.Javascript object>

### 1.2. Raw Data

In [4]:
### EDD info
config["edd_direct"] = True

if config["edd_direct"]:
    # Acquire study slug from EDD direectly
    config["study_slug"] = "aniger_3hp_omics_shakeflask-93_pnnl"
    config["edd_server"] = "edd.agilebiofoundry.org"
    config["user"] = "SMMahserejian"
else:
    # Alternative to EDD login (local file in EDD format)
    config["EDD_data_fname"] = "../data/EDD.csv.gz"


# Protocol to Omics-data-type mapping
# Will filter EDD data Protocol column using these factors
config["protocol_to_omics_map"] = {
    "Flux_Constraint": "PNNL Global Prot Intensity",
    "Enzyme_Abundance": "Targeted Proteomics",
    "Internal_Metabolomics": "PNNL Global Metabolomics (intracellular)",
    "External_Metabolomics": "HPLC",
    "Internal_Fluxes": "",
    "External_Fluxes": "",
}


### Line Rates
config["line_rates_fname"] = "../data/a.niger_strain_specific_rates.csv"


### Protein Concentrations (proteomics)


# Internal Metabolite Concentrations (metabolomics)
# met_conc_fname = '../data/metabolite_concentrations.csv'
# (x_file)
# !!! THIS can come from EDD + mapping IDs

# External Metabolite Time Series
# ext_met_ts_fname = '../data/normalized_external_metabolites.csv'
# !!! this isn't a time series...
# (y_file)
# # !!! THIS can come from EDD + mapping IDs


# ===> v_star_file = '../data/Eflux2_flux_rates.flipped.csv'
# ===> v_file = '../data/Eflux2_flux_rates.flipped.csv'
# ===> e_file = '../data/normalized_targeted_enzyme_activities.csv'

<IPython.core.display.Javascript object>

### 1.3. Mapping files
NOTE: Will likely be data specific; may be developed by hand

In [5]:
# # Data ID to Model gene ID mapping
# config["dataID_to_modelGeneID_map_fname"] = "../data/uniprot_to_transcript_map.csv"

# maps protein IDs to model gene IDs
config["protein_to_gene_map_fname"] = "../data/Aspni7_FilteredModels1_deflines.gff3.gz"
proteinID_to_modelGene_map = {}

# maps metabolite IDs to model metabolite IDs
metaboliteID_to_modelMetabolite_map = {}

# maps data file strain IDs to canonical strain IDs (wrt EDD)
dataFileStrainID_to_canonicalStrainID_map = {}

<IPython.core.display.Javascript object>

In [6]:
config

{'model_fname': '../models/iJB1325_HP.nonnative_genes.pubchem.flipped.nonzero.reduced.json',
 'model_solver': 'glpk',
 'model_objective': 'BOUNDARY_He',
 'line_strain_list_fname': '../data/experiment_w_reps.tsv',
 'ref_line': 'SF ABF93_7-R3',
 'edd_direct': True,
 'study_slug': 'aniger_3hp_omics_shakeflask-93_pnnl',
 'edd_server': 'edd.agilebiofoundry.org',
 'user': 'SMMahserejian',
 'protocol_to_omics_map': {'Flux_Constraint': 'PNNL Global Prot Intensity',
  'Enzyme_Abundance': 'Targeted Proteomics',
  'Internal_Metabolomics': 'PNNL Global Metabolomics (intracellular)',
  'External_Metabolomics': 'HPLC',
  'Internal_Fluxes': '',
  'External_Fluxes': ''},
 'line_rates_fname': '../data/a.niger_strain_specific_rates.csv',
 'protein_to_gene_map_fname': '../data/Aspni7_FilteredModels1_deflines.gff3.gz'}

<IPython.core.display.Javascript object>

## 2. Pre-processing

### 2.1. Extract and organize content from provided filenames

#### 2.1.1. Model

In [7]:
### Model
# Get model from file
model = cobra.io.load_json_model(config["model_fname"])

# Set model attributes
model.solver = config["model_solver"]
model.objective = config["model_objective"]

<IPython.core.display.Javascript object>

#### 2.1.2. Line/Strain info

In [8]:
### Lines list

# Get lines list from file
line_descr_list_df = pd.read_csv(config["line_strain_list_fname"], sep="\t")

# Map line descriptions with ICE IDs
line_descr_list_df["ICE"] = line_descr_list_df["Strain"].str.replace(
    "https://registry.agilebiofoundry.org/entry/", "ABF_00"
)
line_descr_list_df = (
    line_descr_list_df.sort_values(["ICE", "Line Name"]).dropna().set_index("Line Name")
)

# Get line names
line_list = line_descr_list_df.index

<IPython.core.display.Javascript object>

In [9]:
### Line rates

# Set variable for the reference line
ref_line = config["ref_line"]

# Get line rates from file
line_rates = pd.read_csv(config["line_rates_fname"])

# Map line rates with ICE IDs
line_rates = line_rates.sort_values(["Strain (ICE)", "Omics Sample ID"]).dropna()
if all(line_rates["Strain (ICE)"].values == line_descr_list_df["ICE"].values):
    line_rates.index = line_descr_list_df.index
else:
    print("Error: ICE IDs don't match between line_strain_list and line_rates files")
# line_rates.to_csv('line_rates.csv')

# Normalize line uptake and secretion rates wirth respect to reference line glucose uptake rate
ref_line_glucose_rate = line_rates.loc[
    ref_line, "glucose_uptake_rates (mmol/gDCW * hr)"
]
normalized_line_rates = line_rates.drop(
    ["Omics Sample ID", "Strain (ICE)", "Genotype"], axis=1
)
normalized_line_rates = normalized_line_rates.divide(ref_line_glucose_rate, axis=1)


# Get strain rates (is this needed?)
# strain_rates = line_rates.groupby(['Strain (ICE)', 'Genotype']).mean()
# strain_rates_std = line_rates.groupby(['Strain (ICE)', 'Genotype']).std()
# strain_rates.to_csv('Strain_rates_mean.csv')
# strain_rates_std.to_csv('Strain_rates_std.csv')

# Get reference line's strain ID (is this needed?)
# ref_strain = strain_list_df['ICE'][strain_list_df.index == ref_line][0]

<IPython.core.display.Javascript object>

#### 2.1.3. Prepare mapping functionality

##### 2.1.3.1 Map proteins to genes

In [1]:
# Protein to gene map (& gene to protein)
prot_gene_map, gene_prot_map = {}, {}
prot_gene_re = re.compile(r"proteinId=(\d+);.*transcriptId=(\d+)")
with gzip.open(config["protein_to_gene_map_fname"], "rt") as gff3:
    for line in gff3:
        m = prot_gene_re.search(line)
        if m:
            gene_prot_map[m.group(2)] = m.group(1)
            prot_gene_map[m.group(1)] = m.group(2)

NameError: name 're' is not defined

##### 2.1.3.2 Function to map proteins to enzyme activity (reactions)

In [11]:
# Define function to get enzyme activity (reactions) as a function of protein expression
def get_enzyme_activity_expression(proteins, model):
    """Get enzyme activity expression as a function of protein expression

    Take the min over all subunits for each isoenzyme and sum over all isoenzymes

    :param proteins:  protein x experiment dataframe
    :param model:  cobra model
    :returns:  reaction x experiment dataframe
    """
    enzyme_expression = {}
    transcripts = list(proteins.index)
    for r in model.reactions:
        if r.gene_reaction_rule and (
            len(set([g.id for g in r.genes]) & set(transcripts)) > 0
        ):
            subunits_expression = {}
            for x in [x.strip("() ") for x in r.gene_reaction_rule.split(" or ")]:
                # Take the min over all subunits for each study line
                subunits = [
                    y.strip("() ")
                    for y in x.split(" and ")
                    if y.strip("() ") in transcripts
                ]
                if len(subunits) > 0:
                    subunits_expression[x] = proteins.loc[subunits].min(axis=0)
            enzyme_expression[r.id] = pd.DataFrame(subunits_expression).sum(axis=1)
    enzyme_expression = pd.DataFrame(enzyme_expression).T
    enzyme_expression.index.name = "rxn"
    return enzyme_expression

<IPython.core.display.Javascript object>

##### 2.1.3.3. Map kegg compounds to model metabolites

In [12]:
# For mapping pubchem to model IDs



##### !!!! Hard-coded filename for metabolomics



metab = pd.read_excel(
    "Multiomics/210525_Aspergillus_niger_3HP_ABFSF93_multiomics_data_fixed.xlsx",
    sheet_name="Intra_metabolites_NOTnormalized",
    engine="openpyxl",
)


pubchem_metabs = metab[~metab["PubChem"].isnull()].set_index("PubChem")

kegg_metabs = metab[~metab["Kegg"].isnull()].set_index("Kegg")

from collections import defaultdict

def make_metabolite_map(db, model, prefix=None):
    db_model_mets = defaultdict(list)
    for m in model.metabolites:
        if db in m.annotation:
            if prefix:
                db_model_mets[f"{prefix}:{m.annotation[db]}"].append(m)
            else:
                db_model_mets[m.annotation[db]].append(m)
    return db_model_mets


kegg_model_mets = make_metabolite_map("kegg.compound", model)
len(set(kegg_metabs.index) & set(kegg_model_mets.keys()))



FileNotFoundError: [Errno 2] No such file or directory: 'Multiomics/210525_Aspergillus_niger_3HP_ABFSF93_multiomics_data_fixed.xlsx'

<IPython.core.display.Javascript object>

##### 2.1.3.4. Map pubchem annotation to model metabolites

In [13]:

def add_db_annotations_to_model(
    src_model,
    annot_db_name,
    data_db_name,
    model_metabolite_map,
    data_metabolite_map,
    compartment_restrictions=["c"],
):
    model = deepcopy(src_model)
    for db_id, metabolites in model_metabolite_map.items():
        if db_id in data_metabolite_map.index:
            print(db_id, ",".join(m.id for m in metabolites))
            for m in metabolites:
                if m.compartment in compartment_restrictions:
                    m.annotation[annot_db_name] = data_metabolite_map.loc[
                        db_id, data_db_name
                    ]
    return model

model_w_pubchem = add_db_annotations_to_model(
    model, "pubchem", "PubChem", kegg_model_mets, kegg_metabs, ["c"]
)

pubchem_model_map = dict([
    (f"""cid:{m.annotation['pubchem']}""", m.id)
    for m in model_w_pubchem.metabolites
    if ("pubchem" in m.annotation) and ("c" == m.compartment)
    ])

def map_pubchem_to_model_id(pubchem_model_map, dataset):
    idx = [i for i in dataset.index if i in pubchem_model_map]
    return dataset.loc[idx].rename(index=pubchem_model_map)


NameError: name 'kegg_model_mets' is not defined

<IPython.core.display.Javascript object>

### 2.2. Extract and Parse EDD Data

#### 2.2.1. Download and extract data from EDD

In [None]:
if config["edd_direct"]:
    ### Download the data from EDD
    EDD_session = login(edd_server=config["edd_server"], user=config["user"])
    EDD_df = export_study(
        EDD_session, config["study_slug"], edd_server=config["edd_server"]
    )
    # EDD_df.groupby(["Line ID", "Line Name", "Line Description"]).count()
else:
    ### Alternative: Read data already downloaded from EDD
    EDD_df = pd.read_csv(config["EDD_data_fname"])

EDD_df.head()

#### 2.2.2. Prepare Flux Constraint Data

In [44]:
# Extract flux constraints from EDD data
flux_constraints = EDD_df[
    EDD_df["Protocol"] == config["protocol_to_omics_map"]["Flux_Constraint"]
]

# Reorganize and normalize flux constraints with respect to reference line
normalized_flux_constraints = flux_constraints[
    ["Formal Type", "Line Name", "Value"]
].pivot_table(index="Formal Type", columns="Line Name", values="Value")
normalized_flux_constraints = (
    normalized_flux_constraints.divide(normalized_flux_constraints[ref_line], axis=0)
    .replace(-np.inf, 1)
    .replace(np.inf, 1)
    .fillna(1)
)

# Apply JGI names (model genes) to normalized flux constraints
normalized_flux_constraints_jgi = normalized_flux_constraints.join(
    pd.Series(prot_gene_map).to_frame("Transcript")
).set_index("Transcript")



### NOTE: It's not obvious where this gets used...
# Map flux constraints to enzyme activity (reactions)
normalized_rxns_flux_constraints = get_enzyme_activity_expression(normalized_flux_constraints_jgi, model)

normalized_rxns_flux_constraints

<IPython.core.display.Javascript object>

#### 2.2.3. Prepare Enzyme Abundance Data

In [None]:
# Extract enzyme abdundances from EDD data
enzyme_abundance = EDD_df[
    EDD_df["Protocol"] == config["protocol_to_omics_map"]["Enzyme_Abundance"]
]

# Reorganize and normalize enzyme abdundances with respect to reference line
normalized_enzyme_abundance = enzyme_abundance.set_index(
    ["Formal Type", "Line Name"]
)["Value"].unstack(fill_value=1)
normalized_enzyme_abundance = normalized_enzyme_abundance.divide(
    normalized_enzyme_abundance[ref_line], axis=0
)

# Apply JGI names (model genes) to normalized enzyme abdundances
normalized_enzyme_abundance_jgi = (
    normalized_enzyme_abundance.join(
        pd.Series(prot_gene_map).to_frame("Transcript")
    )
    .dropna()
    .set_index("Transcript")
)

# Map enzyme abdundances to enzyme activity (reactions)
normalized_rxns_enzyme_abundance = get_enzyme_activity_expression(
    normalized_enzyme_abundance_jgi, model
)

normalized_rxns_enzyme_abundance

#### 2.2.4. Prepare Internal Metabolomics Data

In [48]:
# Extract internal metabolomics from EDD data
internal_metab = EDD_df[
    EDD_df["Protocol"] == config["protocol_to_omics_map"]["Internal_Metabolomics"]
]

# Reorganize and normalize internal metabolomics with respect to reference line
normalized_internal_metab = (
    internal_metab.set_index(["Measurement Type", "Formal Type", "Line Name"])["Value"]
    .unstack()
    .apply(np.log2)
)
normalized_internal_metab = (
    normalized_internal_metab.sub(normalized_internal_metab[ref_line], axis=0)
    .replace(-np.inf, 0)
    .fillna(0)
    .replace(np.inf, 0)
)
normalized_internal_metab







Unnamed: 0_level_0,Line Name,SF ABF93_1-R1,SF ABF93_1-R2,SF ABF93_1-R3,SF ABF93_10-R1,SF ABF93_10-R2,SF ABF93_10-R3,SF ABF93_11-R1,SF ABF93_11-R2,SF ABF93_11-R3,SF ABF93_12-R1,...,SF ABF93_6-R3,SF ABF93_7-R1,SF ABF93_7-R2,SF ABF93_7-R3,SF ABF93_8-R1,SF ABF93_8-R2,SF ABF93_8-R3,SF ABF93_9-R1,SF ABF93_9-R2,SF ABF93_9-R3
Measurement Type,Formal Type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
"(1S,2R,4S,5S)-6-[(2R,3R,4S,5R,6R)-3,4,5-trihydroxy-6-methylol-tetrahydropyran-2-yl]oxycyclohexane-1,2,3,4,5-pentol",cid:11727586,-1.959075,0.0,-0.078922,-0.853756,-0.094756,-0.904397,0.106334,-0.460177,0.005258,-1.167276,...,0.153362,0.005365,-0.033289,0.0,0.159875,0.005721,-0.068941,-0.587076,-0.612524,-1.27879
"(2R,3R,4R,5R)-hexane-1,2,3,4,5,6-hexol",cid:6251,-0.544022,-0.569916,0.063486,-0.103061,-0.297648,-0.161703,-0.701674,-0.248329,-0.051807,-0.185847,...,0.015707,-0.01218,0.102425,0.0,-0.110606,-0.13844,-0.132544,-0.0445,-0.215153,-0.159968
"(2R,3R,4S,5R)-2-adenin-9-yl-5-methylol-tetrahydrofuran-3,4-diol",cid:60961,-2.405182,-1.391174,-0.995859,-1.217824,-0.060109,-0.968332,-0.282518,-0.674158,0.055353,-1.143886,...,0.266623,0.389166,-0.269014,0.0,0.429297,-0.530966,0.031438,-0.342743,-0.768083,-1.277334
"(2R,3S,4S,5R,6R)-2-methylol-6-[(2R,3R,4S,5S,6R)-3,4,5-trihydroxy-6-methylol-tetrahydropyran-2-yl]oxy-tetrahydropyran-3,4,5-triol",cid:7427,-0.767383,-0.64823,-0.333403,0.033285,-0.165826,-0.116006,-0.052038,-0.487081,-0.179344,-0.553755,...,0.20426,0.302972,0.006208,0.0,0.286248,-0.095086,-0.004988,-0.002468,-0.159171,-0.421687
"(2R,4R)-pentane-1,2,3,4,5-pentol",cid:94154,-2.963872,-0.293565,-0.536156,-1.800644,-0.272179,-1.228698,-0.633371,-1.216555,-0.376251,-1.193621,...,0.095655,0.293351,0.060226,0.0,-0.304055,-0.985559,-0.82965,-0.962517,-1.081043,-2.189466
"(2S)-2,5-diammoniopentanoate",cid:6992088,-0.752378,0.355753,0.433667,-0.45036,0.102332,-0.164533,-0.009866,-0.546326,0.229864,-0.634233,...,0.413589,-0.054702,-0.199691,0.0,0.450409,0.257115,0.276419,0.175737,-0.18515,-0.088678
(2S)-2-ammonio-3-hydroxypropanoate,cid:6857581,-2.4899,-4.402278,0.523571,-1.37967,-0.64443,-0.596543,-1.70087,-1.083394,-0.140653,-1.392045,...,0.714563,0.203389,-0.253875,0.0,0.796569,0.275378,0.123798,-0.630236,-1.036385,-0.557287
(2S)-2-azaniumyl-3-(4-hydroxyphenyl)propanoate,cid:6942100,2.686721,2.889632,3.309223,0.159618,0.00817,0.372515,2.925808,2.640397,2.241722,2.567164,...,1.983872,2.635824,0.497125,0.0,2.384198,1.695278,1.71299,0.742526,2.50934,0.729847
(2S)-2-azaniumyl-3-methylbutanoate,cid:6971018,-2.267587,-3.579287,0.009215,-1.549778,-1.216943,-0.971653,-2.030052,-0.730957,-1.133487,-1.990215,...,-0.076515,-0.692629,-0.457877,0.0,-0.513619,-0.109456,-0.000488,-1.03983,-1.150285,-1.003209
(2S)-2-azaniumyl-3-phenylpropanoate,cid:6925665,-1.141341,-0.774836,0.83636,-1.306743,-0.165118,-0.313237,-0.30277,-0.42289,-0.016696,-0.894074,...,1.086897,0.017543,-0.536722,0.0,0.799382,0.097307,0.191102,-0.315401,-1.303381,-0.137483


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### 2.3. Pre-Process Metabolite Concentrations

#### 2.3.2. Internal Metabolites

In [25]:
# Extract internal metabolites from EDD data
internal_metab = EDD_df[EDD_df["Protocol"] == config["protocol_to_omics_map"]["Internal_Metabolomics"]]

# Normalize internal metabolites
normalized_internal_metab = (
    internal_metab.set_index(["Measurement Type", "Formal Type", "Line Name"])["Value"]
    .unstack()
    .apply(np.log2)
)
normalized_internal_metab = (
    normalized_internal_metab.sub(normalized_internal_metab[reference_strain], axis=0)
    .replace(-np.inf, 0)
    .fillna(0)
    .replace(np.inf, 0)
)

# Add model IDs to data
pubchem_ids = list(
    internal_metab.groupby(["Formal Type", "Measurement Type"]).count().index
)
normalized_internal_metab_w_model_ids = map_pubchem_to_model_id(
    pubchem_model_map, normalized_internal_metab.droplevel(0)
)

Unnamed: 0_level_0,ID,Transcript
Entry name,Unnamed: 1_level_1,Unnamed: 2_level_1
A7U8C7,PAND_Tribolium_castaneum,PAND_Tribolium_castaneum
C2ZAL1,BAPAT_Bacillus_cereus,BAPAT_Bacillus_cereus
P3983,HPDH_escherichia_coli,HPDH_escherichia_coli
ABF_005934,PAND_Tribolium_castaneum,PAND_Tribolium_castaneum
ABF_005935,BAPAT_Bacillus_cereus,BAPAT_Bacillus_cereus
...,...,...
G3XM75,56950,56950
G3Y8S4,57046,57046
G3Y7D7,57150,57150
G3Y4S3,57297,57297


In [None]:
### Fluxes

## !!! Hard coded for 3hp

# Run Eflux2 to calculate fluxes
fluxes = {}
for rep in global_prot.columns:
    with model:
        glucose_uptake = normalized_line_rates.loc[rep,'glucose_uptake_rates (mmol/gDCW * hr)' ]
        secrete_3hp = normalized_line_rates.loc[rep,'3hp_secretion_rates (mmol/gDCW * hr)']
        if glucose_uptake:
            model.reactions.BOUNDARY_GLCe.upper_bound = glucose_uptake
        if secrete_3hp:
            model.reactions.EX_3hpp_e.lower_bound =  secrete_3hp
        try:
            print(rep)
            fluxes[rep] = EFlux2(model, global_prot[rep])
        except TypeError:
            print(f"Replicate {rep} with glucose {glucose_uptake} and 3hp {secrete_3hp} is infeasible")
fluxes[rep]

efluxes = pd.DataFrame(dict([(rep, fluxes[rep].to_frame()['fluxes']) for rep in fluxes]))
reduced_costs = pd.DataFrame(dict([(rep, fluxes[rep].to_frame()['reduced_costs']) for rep in fluxes]))
eflux_rates = efluxes*ref_line_glucose_rate

#efluxes.to_csv('Eflux2_flux_yield.csv')
#reduced_costs.to_csv('Eflux2_yield_reduced_costs.csv')
#eflux_rates.to_csv('Learn/Eflux2_flux_rates.csv')

In [20]:
### Internal(?) metabolites

# Get metabolite concentrations from file
met_conc = pd.read_csv(met_conc_fname, index_col=0)
met_conc = met_conc.loc[[m.id for m in model.metabolites if m.id in met_conc.index]]

In [21]:
### External metabolites

# Get external metabolites from file
ext_met_ts = pd.read_csv(ext_met_ts_fname, index_col=0)
ext_met_ts = ext_met_ts.loc[[m.id for m in model.metabolites if m.id in ext_met_ts.index]]

Unnamed: 0,SF ABF93_1-R1,SF ABF93_1-R2,SF ABF93_1-R3,SF ABF93_10-R1,SF ABF93_10-R2,SF ABF93_10-R3,SF ABF93_11-R1,SF ABF93_11-R2,SF ABF93_11-R3,SF ABF93_12-R1,...,SF ABF93_6-R3,SF ABF93_7-R1,SF ABF93_7-R2,SF ABF93_7-R3,SF ABF93_8-R1,SF ABF93_8-R2,SF ABF93_8-R3,SF ABF93_9-R1,SF ABF93_9-R2,SF ABF93_9-R3
bDGLCe,0.96856,0.595006,1.019095,0.306643,0.370181,0.332601,0.286244,0.425582,0.483575,0.327102,...,0.236882,0.061311,0.077775,0.0,0.636998,0.488817,0.733679,0.646963,0.42526,0.528886
3hpp_e,0.0,0.0,0.0,-0.032113,-0.071955,-0.125552,-0.478471,-0.538206,-0.46332,-0.012481,...,-0.453518,0.109321,0.070619,0.0,-1.247424,-1.177829,-1.291246,-0.445347,-0.466815,-0.611556


### 2.2. Check input file/data formats

In [None]:
# Ensure all fluxes are positive with respect to reference strain


# Ensure all data have same line IDs


# Ensure columns line up across data sets; error message if not


# Ensure all rows correspond to model variables across data sets


### 2.3. Calculate rates

In [2]:
# Growth Rates

# Uptake Rates

# Secretion Rates


### 2.4. Map data identifiers

In [None]:
# Map metabolomics data

# Map proteomics data

# Automated mapping: use InChi mapping where available

# Semi-Manual:
#     Metabolites: PubChem, Metacyc (metabolite translation service), MetaNetX, PNNL stuff
#     Proteins: BBH (bidirectional best hits)


### 2.5. Normalize metabolomics & proteomics data wrt reference strain ==> reference strain = 1

### 2.6. Generate Enzyme Data

Input: Proteomics data, model, protein to enzyme-reaction map

Output: enzyme to reaction map; 

Example: (Gene A and Gene B) or Gene C --> Enzyme Activity X

Helper Function: use model to obtain protein to enzyme-reaction map (see "get_enzyme_activity_expression()" A.niger_MultiOmics Section 4.1)

### 2.6. Generate eFlux Data

Input: Proteomics data, model, growth rates, uptake rates, secretion rates

Output: strain specific set of fluxes (different than enzyme activities; same shape but for reaction rate)

Helper function: see [https://github.com/AgileBioFoundry/AspergillusQ4Milestone/blob/main/notebooks/Eflux4A.niger.ipynb] section "Normalize uptake and secretion rates by glucose uptake rate of reference study line." where "fluxes" are defined
    

## 3. Main Process

In [None]:
# Plot heatmaps (see A.niger_MultiOmics)

In [None]:
# Normalized Enzyme Activites
normalized_enzyme_activities_fname = '../data/normalized_targeted_enzyme_activities.csv'




### Define wild-type strain IDs
wild_type = ['SF ABF93_1-R1', 'SF ABF93_1-R2','SF ABF93_1-R3']

In [1]:
# Load Model
model = cobra.io.load_json_model(model_fname)

# Load normalized enzyme activites as a dataframe
normalized_enzyme_activities_df = pd.read_csv(normalized_enzyme_activities_fname,index_col=0)

In [None]:
# Remove wild-type from normalized enzyme activities dataframe
