In [1]:
import numpy as np
import pandas as pd

import cobra
import cobra.test
from cobra.test import create_test_model

import cobrapy_func as cf

In [2]:
# Load zbgem2 model
fn_zgem2 = "/Users/don/Documents/mlardelli/data/zebragem_20200228_mod.xml"
model = cobra.io.read_sbml_model(fn_zgem2)

# Print stuff out
print(f"num reactions = {len(model.reactions)}")
print(f"num metabs = {len(model.metabolites)}")
print(f"num genes = {len(model.genes)}")
print(f"num exchanges = {len(model.exchanges)}")
print("")
model.objective ="BIO_L_2"

# Load DE genes
d0 = pd.read_csv("/Users/don/Documents/mlardelli/data/de_genes.csv")

# For the genes in the DE list, get only those which are in ZBGEM
zbgem_gene_ls = [g.id for g in model.genes]

de_genes_ls = []
for ncbi_id in list(d0["ncbi_id"]):
    if ncbi_id in zbgem_gene_ls:
        de_genes_ls.append(ncbi_id)
print(f"Num. DE genes present in ZBGEM = {len(de_genes_ls)}")

de_df = d0.loc[d0["ncbi_id"].isin(de_genes_ls)]

# Get dict of reactions affected by DE genes, and a flat list
rxn_dict = {}
affected_rxn_ls = []
for ncbi_id in de_genes_ls:
    val_ls = [rxn.id for rxn in list(model.genes.get_by_id(ncbi_id).reactions)]
    rxn_dict[ncbi_id] = val_ls
    affected_rxn_ls.append(val_ls)

affected_rxn_ls = list(set([item for sublist in affected_rxn_ls for item in sublist]))

# Get affected reactions AFTER checking GPR, regardless of fold change direction
affected_rxn_ls2 = []
for rxn_id in affected_rxn_ls:
    rxn = model.reactions.get_by_id(rxn_id)
    gpr_str = rxn.gene_reaction_rule
    
    eval_bool = cf.eval_gpr(gpr_str, de_genes_ls)
    if eval_bool:
        affected_rxn_ls2.append(rxn_id)

print(f"{len(affected_rxn_ls2)} reactions impacted by DE genes")

num reactions = 3023
num metabs = 2810
num genes = 1636
num exchanges = 44

Num. DE genes present in ZBGEM = 28
28 reactions impacted by DE genes


# Mark1 Eyeball

See which genes affect which reactions.

In [3]:
gene_rxn_dict = {}
for ncbi_id in de_genes_ls:
    rxn_ls = [x.id for x in list(model.genes.get_by_id(ncbi_id).reactions)]
    
    # filter only for definitely affected reactions
    temp_ls = []
    for rxn_id in rxn_ls:
        if rxn_id in affected_rxn_ls2:
            temp_ls.append(rxn_id)
    gene_rxn_dict[ncbi_id] = temp_ls

In [4]:
for k in gene_rxn_dict.keys():
    g = model.genes.get_by_id(k)
    de_direction = de_df.loc[de_df["ncbi_id"]==k]["DE_Direction"].values[0]
    print(f"### {g.name} ({g.id}, {de_direction})")
    print("")
    temp_rxn_ls = [model.reactions.get_by_id(rxn_id) for rxn_id in gene_rxn_dict[k]]
    for rxn in temp_rxn_ls:
        print(f"* {rxn.name}(**{rxn.id}**): {rxn.reaction}")
    
    print("")

### si:ch211-114l13.7 (567859, 1)

* UDPgalactose:procollagen-5-hydroxy-L-lysine D-galactosyltransferase [endoplasmic reticulum](**R1351_r**): pcollg5hlys_r + udpgal_r <=> C04487_r + udp_r

### dlst (368262, -1)

* 2-oxoadipate dehydrogenase complex [mitochondria](**2OXOADOXm**): 2oxoadp_m + coa_m + nad_m --> co2_m + glutcoa_m + h_m + nadh_m
* 2-Oxoglutarate dehydrogenase complex [mitochondria](**AKGDm**): akg_m + coa_m + nad_m --> co2_m + h_m + nadh_m + succoa_m
* succinyl-CoA:enzyme N6-(dihydrolipoyl)lysine S-succinyltransferase [mitochondria](**R1174_m**): dhlam_m + succoa_m <=> C16254_m + coa_m
* Glutaryl-CoA:dihydrolipoamide S-succinyltransferase [mitochondria](**R1175_m**): S_gtrdhdlp_m + coa_m --> dhlam_m + glutcoa_m

### atp5g1 (406301, -1)


### chst12a (407076, -1)

* 3'-Phosphoadenylylsulfate:chondroitin 4'-sulfotransferase [Golgi membrane](**R1047_g**): C00401_g + paps_g <=> cs_a_g + pap_g

### sdhdb (445500, -1)


### abcc5 (336147, -1)

* cGMP transport (ATP-dependent)(**

# Scrape for KEGG Data

In [None]:
for rxn_id in affected_rxn_ls2:
    rxn = model.reactions.get_by_id(rxn_id)
    kegg_r_id = rxn.annotation.get("kegg.reaction")
    if kegg_r_id is not None:
        print(rxn_id, kegg_r_id)
    else:
        print(rxn_id)

In [13]:
for rxn_id in affected_rxn_ls2:
    rxn_annot = model.reactions.get_by_id(rxn_id).annotation
    rxn_bigg_id = model.reactions.get_by_id(rxn_id).id
    print(rxn_bigg_id, rxn_annot)

CGMPt {}
NAt3_1g {}
INSTt2r {}
AKGDm {'kegg.reaction': 'R08549', 'ec-code': ['2.3.1.61', '1.8.1.4', '1.2.4.2']}
R1351_r {'kegg.reaction': 'R03380', 'ec-code': '2.4.1.50'}
ACYP_2 {'kegg.reaction': 'R00317', 'ec-code': '3.6.1.7'}
NDP10l {'kegg.reaction': 'R00961', 'ec-code': '3.6.1.6'}
CTPS1 {'kegg.reaction': 'R00571', 'ec-code': '6.3.4.2'}
ASNTRSm {'kegg.reaction': 'R03648', 'ec-code': '6.1.1.22'}
ASPGLUm {}
NDP3l {'kegg.reaction': 'R00328', 'ec-code': '3.6.1.6'}
G3PATrm {'kegg.reaction': 'R00851', 'ec-code': '2.3.1.15'}
ATPtm {}
R1174_m {'kegg.reaction': 'R02570', 'ec-code': '2.3.1.61'}
3SALAASPm {}
MAN6PI {'kegg.reaction': 'R01819', 'ec-code': '5.3.1.8'}
2OXOADOXm {'kegg.reaction': 'R01933', 'ec-code': ['2.3.1.61', '1.8.1.4', '1.2.4.2']}
R2127_r {'kegg.reaction': 'R07620', 'ec-code': '2.4.1.109'}
CITt4_4 {}
CTPS2 {'kegg.reaction': 'R00573', 'ec-code': '6.3.4.2'}
CAMPt {}
PI34P4Plm {'kegg.reaction': 'R07299', 'ec-code': '3.1.3.66'}
R249_mc {'kegg.reaction': 'R00112', 'ec-code': '1.6.1.

In [12]:
model.reactions.get_by_id(affected_rxn_ls2[3]).id

'AKGDm'