In [1]:
import pandas as pd
from cobra.io.json import load_json_model
import re

In [2]:
model = load_json_model('./data/Recon3D.json')
model.solver = 'gurobi' 

Set parameter Username
Set parameter LicenseID to value 2732830
Academic license - for non-commercial use only - expires 2026-11-04


In [3]:
CCLE_expression = pd.read_csv('./data/CCLE_expression.csv', index_col=0)

In [4]:
CCLE_expression.head()

Unnamed: 0,TSPAN6 (7105),TNMD (64102),DPM1 (8813),SCYL3 (57147),C1orf112 (55732),FGR (2268),CFH (3075),FUCA2 (2519),GCLC (2729),NFYA (4800),...,H3C2 (8358),H3C3 (8352),AC098582.1 (8916),DUS4L-BCAP29 (115253422),C8orf44-SGK3 (100533105),ELOA3B (728929),NPBWR1 (2831),ELOA3D (100506888),ELOA3 (162699),CDR1 (1038)
ACH-001113,4.331992,0.0,7.364397,2.792855,4.470537,0.028569,1.226509,3.042644,6.499686,4.739848,...,2.689299,0.189034,0.201634,2.130931,0.555816,0.0,0.275007,0.0,0.0,0.0
ACH-001289,4.566815,0.584963,7.106537,2.543496,3.50462,0.0,0.189034,3.813525,4.221104,3.481557,...,1.286881,1.049631,0.321928,1.464668,0.632268,0.0,0.014355,0.0,0.0,0.0
ACH-001339,3.15056,0.0,7.379032,2.333424,4.227279,0.056584,1.31034,6.687061,3.682573,3.273516,...,0.594549,1.097611,0.831877,2.946731,0.475085,0.0,0.084064,0.0,0.0,0.042644
ACH-001538,5.08534,0.0,7.154109,2.545968,3.084064,0.0,5.868143,6.165309,4.489928,3.956986,...,0.214125,0.632268,0.298658,1.641546,0.443607,0.0,0.028569,0.0,0.0,0.0
ACH-000242,6.729145,0.0,6.537607,2.456806,3.867896,0.799087,7.208381,5.569856,7.127014,4.568032,...,1.117695,2.358959,0.084064,1.910733,0.0,0.0,0.464668,0.0,0.0,0.0


In [5]:
#checking different outputs and what they look like
r = model.reactions[1000]  
print("Reaction ID:", r.id)
print("Reaction name:", r.name)
print("Gene-reaction rule:", r.gene_reaction_rule)
print("Genes:", [g.id for g in r.genes])
print(model.genes[3].id)
print(model.genes[3].name)

Reaction ID: LYStm
Reaction name: Lysine mitochondrial transport via ornithine carrier
Gene-reaction rule: 83884_AT1 or 10166_AT1
Genes: ['10166_AT1', '83884_AT1']
8639_AT1
AOC3


In [6]:
def clean_gene_name(name: str) -> str:
    """
    Clean up a gene name so it matches the model's format.
    Removes extra info in parentheses, spaces, etc.
    Example: 'AOC3 (8639)' → 'AOC3'
    """
    cleaned = re.sub(r"\s*\(.*\)$", "", str(name))  # remove text in parentheses
    return cleaned.strip().upper()  # remove spaces, make uppercase for consistency

In [7]:
def has_transcriptomics(reaction, CCLE_genes):
    """
    Checks whether transcriptomics data are available for the gene(s)
    associated with a given reaction.
    
    Parameters
    ----------
    reaction : cobra.Reaction
        Reaction object from the COBRA model.
    CCLE_expression : pandas.DataFrame
        Transcriptomics dataset where columns are gene IDs (e.g., ENSG IDs).
    
    Returns
    -------
    bool
        True if at least one gene in the reaction is found in CCLE_expression, else False.
    """
    
    # Get all gene IDs linked to this reaction
    reaction_genes = [gene.name for gene in reaction.genes]
    
    
    # Check if any gene in the reaction exists in the expression dataframe
    for g in reaction_genes:
        if g in CCLE_genes:
            return True  # we have transcriptomics for at least one gene
    
    # If none of the genes matched
    return False


In [8]:
# Create a quick-access set of cleaned CCLE column names
CCLE_gene_names = { clean_gene_name(c) for c in CCLE_expression.columns }

In [9]:
list = []
for rxn in model.reactions:
    list.append(has_transcriptomics(rxn, CCLE_gene_names))
if any(list):
    print("At least one element is True")

At least one element is True


In [10]:
def open_bounds(rxn):
    """
    Set reaction bounds fully open depending on reversibility.
    """
    if rxn.reversibility:
        rxn.lower_bound = -1000
        rxn.upper_bound = 1000
    else:
        rxn.lower_bound = 0
        rxn.upper_bound = 1000

In [11]:
def classify_rule(rxn):
    rule = rxn.gene_reaction_rule.lower()
    if "and" in rule and "or" not in rule:
        return "and_rule"
    elif "or" in rule and "and" not in rule:
        return "or_rule"
    elif "and" not in rule and "or" not in rule and rule != "":
        return "one_gene"
    else:
        return None

In [12]:
# create mapping: cleaned_name → original_column_name
CCLE_name_map = { clean_gene_name(col): col for col in CCLE_expression.columns }

def calculate_new_bounds(rxn, rule_type, cell_line):
    """
    Set new flux bounds for a reaction whose genes have transcriptomics data.
    Assumes model gene names (gene.name) match cleaned CCLE names.
    """
    # Map model gene names directly to CCLE columns
    matched_cols = [CCLE_name_map[g.name] for g in rxn.genes if g.name in CCLE_name_map]

    expr_values = CCLE_expression.loc[cell_line, matched_cols].tolist()

    if rule_type == "one_gene":
        E = expr_values[0]
    elif rule_type == "or_rule":
        E = sum(expr_values)
    elif rule_type == "and_rule":
        E = min(expr_values)
    else:
        return

    if rxn.reversibility:
        rxn.lower_bound = -E
        rxn.upper_bound = E
    else:
        rxn.lower_bound = 0
        rxn.upper_bound = E



In [None]:
#Before values 
rxn = model.reactions[15]
print("Lower bound:", rxn.lower_bound)
print("Upper bound:", rxn.upper_bound)

Lower bound: 0.0
Upper bound: 1000.0


In [14]:
for rxn in model.reactions:
    genes_found = has_transcriptomics(rxn, CCLE_gene_names)
    print(genes_found)
    
    if (genes_found):
        rule_type = classify_rule(rxn)
        print(rule_type)
        calculate_new_bounds(rxn, rule_type, "ACH-001113")
    else:
        open_bounds(rxn)

False
False
False
False
False
False
False
False
False
False
True
one_gene
False
False
False
False
True
or_rule
False
False
False
True
one_gene
True
one_gene
True
one_gene
True
one_gene
False
True
None
True
one_gene
True
one_gene
True
or_rule
False
True
or_rule
True
one_gene
False
False
True
or_rule
False
True
or_rule
False
False
False
True
or_rule
False
False
True
or_rule
False
False
True
or_rule
False
True
or_rule
True
or_rule
False
False
False
False
True
one_gene
True
one_gene
True
one_gene
False
True
one_gene
True
one_gene
True
one_gene
True
one_gene
True
one_gene
True
one_gene
True
or_rule
False
True
one_gene
True
or_rule
True
one_gene
True
one_gene
True
one_gene
False
False
True
or_rule
True
one_gene
True
or_rule
False
False
False
True
one_gene
True
one_gene
False
False
False
True
one_gene
False
False
False
True
one_gene
False
False
False
True
or_rule
True
one_gene
True
or_rule
True
or_rule
True
one_gene
True
or_rule
True
or_rule
True
or_rule
True
one_gene
False
True
one_gene
True

In [None]:
#Testing effectivness, seeing after values
rxn = model.reactions[15]
print("Reaction ID:", rxn.id)
print("Reaction name:", rxn.name)
print("Gene rule:", rxn.gene_reaction_rule)
print("Genes:", [g.name for g in rxn.genes])
print("Lower bound:", rxn.lower_bound)
print("Upper bound:", rxn.upper_bound)

Reaction ID: 13DAMPPOX
Reaction name: 1,3-Diaminopropane:oxygen oxidoreductase (deaminating)
Gene rule: 8639_AT1 or 26_AT1 or 314_AT2 or 314_AT1
Genes: ['AOC2', 'AOC1', 'AOC3', 'AOC2']
Lower bound: 0
Upper bound: 11.638367455273752
