In [1]:
import pandas as pd
import cobra

from cobra.io import load_model

import numpy as np
import pickle

## Read the model

In [2]:
model = cobra.io.read_sbml_model('models\\Recon3D.xml')

## Read the transcriptomics data

Projection of gene names to ids

In [3]:
gene_id_name = {}
gene_name_id = {}

for g in model.genes[1:]:
    gene_id_name[g.id] = g.name
    gene_name_id[g.name] = g.id 

Read the expression data

In [4]:
df_control = pd.read_csv("data\\data_control.txt")
df_kd = pd.read_csv("data\\data_kd.txt")

Add ids to expression data

In [5]:
df_control['gene_id'] = df_control['gene'].map(lambda x: gene_name_id[x] if x in gene_name_id else "")
df_kd['gene_id'] = df_kd['gene'].map(lambda x: gene_name_id[x] if x in gene_name_id else "")

Keep only genes that are present in the model

In [6]:
df_control = df_control[df_control['gene_id'] != ""]
df_kd = df_kd[df_kd['gene_id'] != ""]

In [7]:
len(df_control), len(df_kd)

(2019, 2019)

In [8]:
df_control.describe()

Unnamed: 0,value
count,2019.0
mean,8.28894
std,3.151708
min,3.31
25%,5.595
50%,7.61
75%,10.795
max,17.54


In [9]:
df_kd.describe()

Unnamed: 0,value
count,2019.0
mean,8.324889
std,3.169539
min,3.23
25%,5.605
50%,7.68
75%,10.89
max,17.48


## Integrate transcriptomics data

CORDA algorithm

* https://resendislab.github.io/corda/
* https://github.com/resendislab/corda

Lahko nastavim, da mora model biti zmožen producirati biomaso
```
met_prod (Optional[list]): Additional metabolic targets that have to be
            achieved by the model. Can be a single object or list of objects.
            List elements can be given in various forms:
            (1) A string naming a metabolite in the model will ensure that the
            metabolite can be produced.
            For instance: met_prod = ["pi_c", "atp_c"]
            (2) A dictionary of metabolite -> int will define an irreversible
            reaction that must be able to carry flux.
            For instance: met_prod = {"adp_c": -1, "pi_c": -1, "atp_c": 1}
            (3) A string representation of a reversible or irreversible
            reaction that must be able to carry flux.
            For instance: met_prod = "adp_c + pi_c -> atp_c"

```
Primer, ki bi moral delovati:
```Python
met_prod = model.reactions.BIOMASS_maintenance.build_reaction_string()
met_prod = met_prod.replace("-->", "->")
```

https://github.com/resendislab/corda/blob/main/corda/corda.py


#### Extract biomass function from the model

In [10]:
biomass = model.reactions.BIOMASS_maintenance.build_reaction_string().replace("-->", "->")
biomass

'0.50563 ala__L_c + 0.35926 arg__L_c + 0.27942 asn__L_c + 0.35261 asp__L_c + 20.7045 atp_c + 0.020401 chsterol_c + 0.011658 clpn_hs_c + 0.039036 ctp_c + 0.046571 cys__L_c + 0.27519 g6p_c + 0.326 gln__L_c + 0.38587 glu__L_c + 0.53889 gly_c + 0.036117 gtp_c + 20.6508 h2o_c + 0.12641 his__L_c + 0.28608 ile__L_c + 0.54554 leu__L_c + 0.59211 lys__L_c + 0.15302 met__L_c + 0.023315 pail_hs_c + 0.15446 pchol_hs_c + 0.055374 pe_hs_c + 0.002914 pglyc_hs_c + 0.25947 phe__L_c + 0.41248 pro__L_c + 0.005829 ps_hs_c + 0.39253 ser__L_c + 0.017486 sphmyln_hs_c + 0.31269 thr__L_c + 0.013306 trp__L_c + 0.15967 tyr__L_c + 0.053446 utp_c + 0.35261 val__L_c -> 20.6508 adp_c + 20.6508 h_c + 20.6508 pi_c'

### Model integration

In [11]:
from corda import reaction_confidence
from corda import CORDA

In [12]:
def val_to_score(val, thr0, thr1, thr2, thr3):
    if val < thr0:
        return -1
    elif val < thr1:
        return 0
    elif val < thr2:
        return 1
    elif val < thr3:
        return 2
    else:
        return 3

#### Parameters

In [13]:
require_biomass = True
perc0 = 25 # below this ... inactive
perc1 = 50 # else, below this ... not sure
perc2 = 75 # else, below this ...  low confidence
perc3 = 95 # else, below this ... medium confidence
# else high confidence (above perc3)

In [14]:
folder = "models\\biomass" if require_biomass else "models\\no_biomass"
folder

'models\\biomass'

### Control

In [15]:
thr0 = np.percentile(df_control.value, perc0)
thr1 = np.percentile(df_control.value, perc1)
thr2 = np.percentile(df_control.value, perc2)
thr3 = np.percentile(df_control.value, perc3)

In [16]:
#gene_conf_control = {g_id:-1 if val < thr0 else 1 if val < thr1 else 2 if val < thr2 else 3 for (g_id, val) in zip(df_control['gene_id'], df_control['value'])}
gene_conf_control = {g_id : val_to_score(val, thr0, thr1, thr2, thr3) for (g_id, val) in zip(df_control['gene_id'], df_control['value'])}

In [17]:
conf_control = {}
for r in model.reactions:
    conf_control[r.id] = reaction_confidence(r, gene_conf_control)

In [18]:
conf_control

{'24_25DHVITD3tm': 0,
 '25HVITD3t': 0,
 'COAtl': 0,
 'EX_5adtststerone_e': 0,
 'EX_5adtststerones_e': 0,
 'EX_5fthf_e': 0,
 'EX_5htrp_e': 0,
 'EX_5mthf_e': 0,
 'EX_5thf_e': 0,
 'EX_6dhf_e': 0,
 '24_25VITD3Hm': 3,
 '24NPHte': 0,
 '10FTHF7GLUtl': 0,
 '10FTHFtm': 0,
 '11DOCRTSLtr': 0,
 '13DAMPPOX': 1,
 '24_25DHVITD2t': 0,
 '24_25DHVITD2tm': 0,
 '24_25DHVITD3t': 0,
 '25VITD2Hm': 0,
 '2AMACHYD': -1,
 '2AMACSULT': 0,
 '2AMADPTm': 0,
 '2MCITt': 0,
 '2OXOADOXm': 2,
 '2OXOADPTm': 0,
 '34DHPHEt': 1,
 '34DHXMANDACOX': 1,
 '34HPLFM': 0,
 '3AIBtm': 0,
 '3HAO': -1,
 '3MOBt2im': 0,
 '3MOPt2im': 0,
 '3MOXTYROX': 0,
 '3SALAOX': 0,
 '42A12BOOX': 1,
 '4HBZCOAFm': 0,
 '4HBZFm': 0,
 '4HDEBRISOQUINEte': 0,
 '4HOXPACDOX_NADP': 1,
 '4MOPt2im': 0,
 '4MPTNLte': 0,
 '4NPHSULT': 0,
 '4PYRDX': 0,
 '5ADTSTSTERONEGLCtr': 0,
 '5ADTSTSTERONESULT': 0,
 '5ADTSTSTERONEte': 0,
 '5FTHFt2_1': 0,
 '5HOXINOXDA': 0,
 '5THFtl': 0,
 '5THFtm': 0,
 '7DHCHSTEROLtr': 0,
 '7DHFtm': 0,
 'A4GALTc': -1,
 'A4GALTg': -1,
 'A4GNT2g': -1,
 

In [19]:
if require_biomass:
    opt_control = CORDA(model, conf_control, met_prod = biomass)
else:
    opt_control = CORDA(model, conf_control)
opt_control.build()
print(opt_control)

build status: reconstruction complete
Inc. reactions: 1777/10600
 - unclear: 1031/6028
 - exclude: 5/1022
 - low and medium: 268/3077
 - high: 473/473



In [20]:
with open(f'{folder}\\control.pickle', 'wb') as handle:
    pickle.dump(opt_control, handle, protocol=pickle.HIGHEST_PROTOCOL)


In [22]:
model_control = opt_control.cobra_model()

In [27]:
if require_biomass:
    model_control.add_reactions([model.reactions.BIOMASS_maintenance])
    model_control.objective=model.objective

In [28]:
model_control.summary()

Metabolite,Reaction,Flux,C-Number,C-Flux

Metabolite,Reaction,Flux,C-Number,C-Flux


In [25]:
cobra.io.write_sbml_model(model_control, f'{folder}\\model_control.xml')

In [None]:
test = cobra.io.read_sbml_model(f'{folder}\\model_control.xml')

In [None]:
#test.summary()

### Knockdown

In [29]:
thr0 = np.percentile(df_kd.value, perc0)
thr1 = np.percentile(df_kd.value, perc1)
thr2 = np.percentile(df_kd.value, perc2)
thr3 = np.percentile(df_kd.value, perc3)

In [30]:
gene_conf_kd = {g_id : val_to_score(val, thr0, thr1, thr2, thr3) for (g_id, val) in zip(df_kd['gene_id'], df_kd['value'])}

In [31]:
conf_kd = {}
for r in model.reactions:
    conf_kd[r.id] = reaction_confidence(r, gene_conf_kd)

In [32]:
if require_biomass:
    opt_kd = CORDA(model, conf_kd, met_prod = biomass)
else:
    opt_kd = CORDA(model, conf_kd)
opt_kd.build()
print(opt_kd)

build status: reconstruction complete
Inc. reactions: 2027/10600
 - unclear: 1164/6158
 - exclude: 8/1068
 - low and medium: 324/2843
 - high: 531/531



In [33]:
with open(f'{folder}\\kd.pickle', 'wb') as handle:
    pickle.dump(opt_kd, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [34]:
#with open('models\\kd.pickle', 'rb') as handle:
#    opt_kd = pickle.load(handle)

In [35]:
model_kd = opt_kd.cobra_model()

In [36]:
if require_biomass:
    model_kd.add_reactions([model.reactions.BIOMASS_maintenance])
    model_kd.objective=model.objective   

In [37]:
model_kd.summary()

Non-linear or non-reaction model objective. Falling back to minimal display.


Metabolite,Reaction,Flux,C-Number,C-Flux

Metabolite,Reaction,Flux,C-Number,C-Flux


In [38]:
cobra.io.write_sbml_model(model_kd, f'{folder}\\model_kd.xml')