# E-flux* functions used to reconstruct context specific GSMMs

#### To use the Eflux function you need:

1. The model in an Excel spreadsheet with the 'RXNS' sheet specified. This can easily be created with the RAVEN toolbox in MATLAB



2. A tab seperated TPM expression matrix with very specific column headers. Column one should be 'Gene' and the gene id's should match the genes in the model. The other columns should represent the experimental condition samples and could be more than one replicate as long as the experimental condition regular expression is in each header. For example, if you have condition1_1 and condition1_2 as column headers and 'condition1' is specified in conditions then it will use both the latter columns to calculate a mean expression

3. A conditions list that specified all the experimental conditions that you wish to calculate flux constraints for and are headers in the TPM expression matrix

Test - github Desktop

### PYTHON MODULES AND LOCAL FUNCTIONS

In [18]:
import os
import sys
import csv
import pandas as pd
import numpy as np
from collections import defaultdict, Counter
from statistics import stdev,mean
from math import sqrt,log

In [19]:
def getModelEquations(model):
    equations = {}
    dfRxns = pd.read_excel(model,sheet_name='RXNS')
    for index,row in dfRxns.iterrows():
        equations[str(row['ID'])] = str(row['EQUATION'])         
    return equations

In [20]:
def AorBMean(genes,meansDict):
    m = 0
    for g in genes:
        try:
            m = m + meansDict[g]
        except KeyError:
            pass
    return m

In [21]:
def setFluxConstraints(model,TPMMatrix,condition):
    
    # model : tissue-specific model in Excel format #
    # TPMMatrix : TPM expression matrix
    # condition : the specific column to search for in the header in TPMMatrix
    
    condDict = defaultdict(list)
    finalBounds = defaultdict(list)
    boundsDict = {}
    orphanRxns = []
    meansDict = {}
    
    TPMMatrix = pd.read_csv(TPMMatrix,sep='\t')
    
    # get all the replicates for the conditions
    cols = []
    for i in TPMMatrix.columns:
        if condition in i:
            cols.append(i)    
            
    # calculate the mean for each gene
    for index,row in TPMMatrix.iterrows():
        for col in cols:
            condDict[row['Gene']].append(row[col])
        meansDict[row['Gene']] = mean(condDict[row['Gene']])
        
    dfRxns = pd.read_excel(model,sheet_name='RXNS')   
    for index,row in dfRxns.iterrows():
        try:
            geneAssoc = row['GENE ASSOCIATION'].split(' or ')
            if len(geneAssoc) == 1:
                gene = geneAssoc[0]
                try:
                    boundsDict[row['ID']] = meansDict[gene]
                except KeyError:
                    boundsDict[row['ID']] = 0
            else:
                m = AorBMean(geneAssoc,meansDict)
                boundsDict[row['ID']] = m
        # these are orphan reactions
        except AttributeError: 
            boundsDict[row['ID']] = 1000
            orphanRxns.append(row['ID'])

    for i in boundsDict:
        if i not in orphanRxns:
            #finalBounds[i] = boundsDict[i]
            #if boundsDict[i] == 0.1:
            #    finalBounds[i] = 1
            if boundsDict[i] > 0 and boundsDict[i] <= 1:
                finalBounds[i] = boundsDict[i]
            if boundsDict[i] > 1:
                #finalBounds[i] = boundsDict[i]
                finalBounds[i] = log(boundsDict[i])
        else:
            finalBounds[i] = boundsDict[i]
    return finalBounds

In [22]:
def setReactionBoundaries(bounds,equations):
    reactionBounds = defaultdict(dict)
    for reaction in bounds:
        if '<=>' in equations[reaction]:
            reactionBounds[reaction]['LB'] = -bounds[reaction]
            reactionBounds[reaction]['UB'] = bounds[reaction]
        else:
            reactionBounds[reaction]['LB'] = 0
            reactionBounds[reaction]['UB'] = bounds[reaction]
    return reactionBounds

### DEFINE THE PARAMETERS

In [23]:
# this is the specific experimental conditions that we want to extract TPM values and create 
# context-specific models for. This can be changed as you wish. 

conditions = ['nonDEN_Liver_WD',
              'nonDEN_Liver_CD',
              'DEN_Liver_CD',
              'DEN_AdjLiver_WD',
              'DEN_Tumour_WD']

# the generic model we want to impose flux boundaries on. This model needs to be in Excel format that
# can be constructed using the exportToExcelFormat function in the Raven2.0 toolbox
model = str('data/models/xlsx/genericLiver.xlsx')

# the TPMMatrix

tpm = 'data/tpm_mat.csv'

### GET THE FLUX BOUNDARIES AND WRITE TO FILE

In [24]:
dfRxns = pd.read_excel(model,sheet_name='RXNS',index_col='ID')
for condition in conditions:
    print(condition)
    
    # get the flux constraints
    fluxConstraints = setFluxConstraints(model,tpm,condition)
    boundaries = setReactionBoundaries(fluxConstraints,getModelEquations(model))
    
    # write this to a file that can be used to impose boundaries on the model
    fout = open('data/Eflux/' + condition + '.csv','w')
    fout.write('rxn\tLB\tUB\n')
    for rxn in boundaries:
        if dfRxns.at[rxn,'SUBSYSTEM'] != 'Exchange reactions':
            fout.write(rxn + '\t')
            fout.write(str(boundaries[rxn]['LB']) + '\t')
            fout.write(str(boundaries[rxn]['UB']) + '\n')
        else:
            pass
    fout.close()

nonDEN_Liver_WD
nonDEN_Liver_CD
DEN_Liver_CD
DEN_AdjLiver_WD
DEN_Tumour_WD
