In [1]:
import itertools
import gemcat as gc
import pandas as pd
import numpy as np
import recon
import utilities
import warnings
warnings.filterwarnings('ignore')

model = recon.Recon()
util = utilities.Utilities()

In [2]:
def add_gene_to_rxn(cobra_model, rxn_id, gene_number):
    '''
    add gene to reaction
    
    Args:
        cobra_model (cobra.Model): cobra model
        rxn_id (str): reaction id
        gene_number (str): gene number
        
    Returns:
        cobra.Model: cobra model
    '''
    cobra_model.reactions.get_by_id(rxn_id).gene_reaction_rule = gene_number
    return cobra_model

In [3]:
# Update model to add NAD transport reaction GPR with gene_number
rxn_id = 'NADtm'
gene_number = '6398.1'
cmodel = add_gene_to_rxn(cobra_model=model.model, rxn_id=rxn_id, gene_number=gene_number)

assert cmodel.reactions.get_by_id(rxn_id).gene_reaction_rule == gene_number, f"No GPR with {gene_number} exists in reaction {rxn_id}"

In [4]:
def diff_exp(rna, wt, ko):
    df = pd.DataFrame()
    for i in itertools.product(wt, ko):
        df[i[1]+'_'+i[0]] = rna[i[1]].div(rna[i[0]], axis=0)
    df['base'] = 1.0
    return df

## Predictions (based on RNA-Seq)

In [25]:
rna = pd.read_csv('../data/rnaseq_HEK293_complete.csv', sep='\t')
rna.loc[:, 'ensemble_id'] = rna['gene_id']
rna = util.map_gene(df=rna.set_index('gene_id'), g_mapping=model.genes,
                    mapping_column='ensembl_gene')

In [26]:
rna_ko = rna.filter(regex='SLC25A51ko')
rna_wt = rna.filter(regex='wtHEK293')

In [27]:
df_ko_wt = diff_exp(rna=rna, wt=rna_wt.columns, ko=rna_ko.columns)
df_ko_wt = df_ko_wt.dropna().replace(0.0, 1.0)
df_ko_wt

Unnamed: 0_level_0,SLC25A51ko_1_wtHEK293_1,SLC25A51ko_2_wtHEK293_1,SLC25A51ko_3_wtHEK293_1,SLC25A51ko_1_wtHEK293_2,SLC25A51ko_2_wtHEK293_2,SLC25A51ko_3_wtHEK293_2,SLC25A51ko_1_wtHEK293_3,SLC25A51ko_2_wtHEK293_3,SLC25A51ko_3_wtHEK293_3,base
gene_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
4513.1,0.922471,0.821123,0.866507,0.869681,0.774133,0.816920,0.964375,0.858423,0.905869,1.0
4512.1,1.087919,0.987118,1.001998,0.998512,0.905996,0.919652,1.184174,1.074455,1.090650,1.0
4514.1,0.925079,0.817946,0.790910,0.848822,0.750521,0.725713,0.969759,0.857452,0.829110,1.0
4537.1,1.012700,0.939351,1.013544,0.959880,0.890357,0.960679,1.082298,1.003909,1.083200,1.0
4539.1,1.170492,0.911421,0.986303,1.204746,0.938094,1.015168,1.285541,1.001006,1.083249,1.0
...,...,...,...,...,...,...,...,...,...,...
9153.1,0.347363,1.000000,0.656775,0.447659,1.000000,0.846410,0.426814,1.000000,0.806998,1.0
2687.1,0.347363,0.284588,1.000000,0.895318,0.733517,1.000000,0.426814,0.349681,1.000000,1.0
6557.1,1.000000,0.426882,0.492581,1.000000,0.366759,0.423205,1.000000,0.349681,0.403499,1.0
6529.1,5.210439,1.707527,1.000000,2.238294,0.733517,1.000000,2.134071,0.699362,1.000000,1.0


In [28]:
# Simulate SLC25A51 knockout
_df_ko_wt = df_ko_wt.T
_df_ko_wt.loc[_df_ko_wt.index == 'base', '6398.1'] = 1.0
_df_ko_wt.loc[_df_ko_wt.index != 'base', '6398.1'] = 0.0
df_ko_wt = _df_ko_wt.T

In [29]:
df_ko_wt

Unnamed: 0_level_0,SLC25A51ko_1_wtHEK293_1,SLC25A51ko_2_wtHEK293_1,SLC25A51ko_3_wtHEK293_1,SLC25A51ko_1_wtHEK293_2,SLC25A51ko_2_wtHEK293_2,SLC25A51ko_3_wtHEK293_2,SLC25A51ko_1_wtHEK293_3,SLC25A51ko_2_wtHEK293_3,SLC25A51ko_3_wtHEK293_3,base
gene_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
4513.1,0.922471,0.821123,0.866507,0.869681,0.774133,0.816920,0.964375,0.858423,0.905869,1.0
4512.1,1.087919,0.987118,1.001998,0.998512,0.905996,0.919652,1.184174,1.074455,1.090650,1.0
4514.1,0.925079,0.817946,0.790910,0.848822,0.750521,0.725713,0.969759,0.857452,0.829110,1.0
4537.1,1.012700,0.939351,1.013544,0.959880,0.890357,0.960679,1.082298,1.003909,1.083200,1.0
4539.1,1.170492,0.911421,0.986303,1.204746,0.938094,1.015168,1.285541,1.001006,1.083249,1.0
...,...,...,...,...,...,...,...,...,...,...
2687.1,0.347363,0.284588,1.000000,0.895318,0.733517,1.000000,0.426814,0.349681,1.000000,1.0
6557.1,1.000000,0.426882,0.492581,1.000000,0.366759,0.423205,1.000000,0.349681,0.403499,1.0
6529.1,5.210439,1.707527,1.000000,2.238294,0.733517,1.000000,2.134071,0.699362,1.000000,1.0
3294.1,0.347363,1.000000,1.000000,0.895318,1.000000,1.000000,1.280443,1.000000,1.000000,1.0


In [None]:
res = pd.DataFrame()
for col in df_ko_wt.columns.difference(['base']):
    _res = gc.workflows.workflow_standard(cobra_model=model.model, mapped_genes_baseline=df_ko_wt['base'],
                                          mapped_genes_comparison=df_ko_wt[col], gene_fill=1.0)
    res[col] = _res

## Only integrate significantly changed genes

In [30]:
import scipy.stats as stats
pvalue = {}
for gene in rna_ko.index:
    pvalue[gene] = stats.ttest_ind(
        rna_ko.loc[gene], rna_wt.loc[gene], nan_policy='omit')[1]
    
_rna = pd.concat((rna_wt, rna_ko,
                  pd.DataFrame.from_dict(pvalue, orient='index', columns=['pvalue'])), axis=1)
_rna = _rna[_rna['pvalue'] < 0.05]

In [31]:
_rna

Unnamed: 0,wtHEK293_1,wtHEK293_2,wtHEK293_3,SLC25A51ko_1,SLC25A51ko_2,SLC25A51ko_3,pvalue
4513.1,7953.784822,8436.582189,7608.177837,7337.134137,6531.033136,6892.011044,0.032138
4514.1,5278.790824,5753.025674,5035.577767,4883.296127,4317.764671,4175.047491,0.041084
4538.1,2144.555715,2350.538306,1985.635568,1714.806672,1718.399339,1732.004746,0.014304
4519.1,1720.850316,1816.917352,1600.862933,1041.815383,1089.226491,1111.542239,0.000656
4536.1,1436.100641,1547.813056,1342.029832,1028.876773,1063.606004,1075.078883,0.003203
...,...,...,...,...,...,...,...
390928.1,0.000000,0.000000,0.000000,0.018563,0.030417,0.017549,0.005812
50614.1,0.007942,0.009244,0.019391,0.000000,0.000000,0.000000,0.028067
491.1,0.005357,0.006236,0.008720,0.000000,0.004574,0.000000,0.045408
8170.1,0.000000,0.000000,0.000000,0.010663,0.034944,0.030242,0.027263


In [32]:
df_ko_wt = diff_exp(rna=_rna, wt=rna_wt.columns, ko=rna_ko.columns)
df_ko_wt = df_ko_wt.replace([np.inf, -np.inf], np.nan).dropna(axis=0)
df_ko_wt = df_ko_wt.dropna().replace(0.0, 1.0)

In [None]:
# df_ko_wt.to_csv('../data/rnaseq_slc25a51ko_vs_parental_pvalue.csv')

In [33]:
_df_ko_wt = df_ko_wt.T
_df_ko_wt.loc[_df_ko_wt.index == 'base', '6398.1'] = 1.0
_df_ko_wt.loc[_df_ko_wt.index != 'base', '6398.1'] = 0.0
df_ko_wt = _df_ko_wt.T

In [34]:
df_ko_wt

Unnamed: 0,SLC25A51ko_1_wtHEK293_1,SLC25A51ko_2_wtHEK293_1,SLC25A51ko_3_wtHEK293_1,SLC25A51ko_1_wtHEK293_2,SLC25A51ko_2_wtHEK293_2,SLC25A51ko_3_wtHEK293_2,SLC25A51ko_1_wtHEK293_3,SLC25A51ko_2_wtHEK293_3,SLC25A51ko_3_wtHEK293_3,base
4513.1,0.922471,0.821123,0.866507,0.869681,0.774133,0.816920,0.964375,0.858423,0.905869,1.0
4514.1,0.925079,0.817946,0.790910,0.848822,0.750521,0.725713,0.969759,0.857452,0.829110,1.0
4538.1,0.799609,0.801285,0.807629,0.729538,0.731066,0.736855,0.863606,0.865415,0.872267,1.0
4519.1,0.605407,0.632958,0.645926,0.573397,0.599491,0.611774,0.650784,0.680400,0.694339,1.0
4536.1,0.716438,0.740621,0.748610,0.664729,0.687167,0.694579,0.766657,0.792535,0.801084,1.0
...,...,...,...,...,...,...,...,...,...,...
9348.1,1.000000,0.569176,1.000000,1.000000,0.489011,1.000000,1.000000,0.699362,1.000000,1.0
50614.1,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.0
491.1,1.000000,0.853763,1.000000,1.000000,0.733517,1.000000,1.000000,0.524522,1.000000,1.0
6557.1,1.000000,0.426882,0.492581,1.000000,0.366759,0.423205,1.000000,0.349681,0.403499,1.0


In [None]:
res = pd.DataFrame()
for col in df_ko_wt.columns.difference(['base']):
    _res = gc.workflows.workflow_standard(cobra_model=model.model, mapped_genes_baseline=df_ko_wt['base'],
                                          mapped_genes_comparison=df_ko_wt[col], gene_fill=1.0)
    res[col] = _res