In [None]:
import itertools
import gemcat as gc
import pandas as pd
import numpy as np
import recon
import utilities
import warnings
warnings.filterwarnings('ignore')

model = recon.Recon()
util = utilities.Utilities()

In [None]:
def add_gene_to_rxn(cobra_model, rxn_id, gene_number):
    '''
    add gene to reaction
    
    Args:
        cobra_model (cobra.Model): cobra model
        rxn_id (str): reaction id
        gene_number (str): gene number
        
    Returns:
        cobra.Model: cobra model
    '''
    cobra_model.reactions.get_by_id(rxn_id).gene_reaction_rule = gene_number
    return cobra_model

In [None]:
# Update model to add NAD transport reaction GPR with gene_number
rxn_id = 'NADtm'
gene_number = '6398.1'
cmodel = add_gene_to_rxn(cobra_model=model.model, rxn_id=rxn_id, gene_number=gene_number)

assert cmodel.reactions.get_by_id(rxn_id).gene_reaction_rule == gene_number, f"No GPR with {gene_number} exists in reaction {rxn_id}"

In [None]:
def diff_exp(rna, wt, ko):
    df = pd.DataFrame()
    for i in itertools.product(wt, ko):
        df[i[1]+'_'+i[0]] = rna[i[1]].div(rna[i[0]], axis=0)
    df['base'] = 1.0
    return df

## Predictions (based on RNA-Seq)

In [None]:
rna = pd.read_csv('../../biomarker_predictions/data/NAD_cell_lines/gene_fpkm.xls.csv')
rna.loc[:, 'ensemble_id'] = rna['gene_id']
rna = util.map_gene(df=rna.set_index('gene_id'), g_mapping=model.genes,
                    mapping_column='ensembl_gene')
rna = rna[rna.columns[rna.columns.str.contains('HEK25a51ko|wtHEK|gene_name|gene_id|ensemble_id')]]

In [None]:
rna_ko = rna.filter(regex='HEK25a51ko')
rna_wt = rna.filter(regex='wtHEK')
rna_oe = rna.filter(regex='HEK25A51oe')

In [None]:
df_ko_wt = diff_exp(rna=rna, wt=rna_wt.columns, ko=rna_ko.columns)
df_ko_wt = df_ko_wt.dropna().replace(0.0, 1.0)
df_ko_wt

In [None]:
# Simulate SLC25A51 knockout
_df_ko_wt = df_ko_wt.T
_df_ko_wt.loc[_df_ko_wt.index == 'base', '6398.1'] = 1.0
_df_ko_wt.loc[_df_ko_wt.index != 'base', '6398.1'] = 0.0
df_ko_wt = _df_ko_wt.T

In [None]:
df_ko_wt

In [None]:
res = pd.DataFrame()
for col in df_ko_wt.columns.difference(['base']):
    _res = gc.workflows.workflow_standard(cobra_model=model.model, mapped_genes_baseline=df_ko_wt['base'],
                                          mapped_genes_comparison=df_ko_wt[col], gene_fill=1.0)
    res[col] = _res

## Only integrate significantly changed genes

In [None]:
import scipy.stats as stats
pvalue = {}
for gene in rna_ko.index:
    pvalue[gene] = stats.ttest_ind(
        rna_ko.loc[gene], rna_wt.loc[gene], nan_policy='omit')[1]
    
_rna = pd.concat((rna_wt, rna_ko,
                  pd.DataFrame.from_dict(pvalue, orient='index', columns=['pvalue'])), axis=1)
_rna = _rna[_rna['pvalue'] < 0.05]

In [None]:
_rna

In [None]:
df_ko_wt = diff_exp(rna=_rna, wt=rna_wt.columns, ko=rna_ko.columns)
df_ko_wt = df_ko_wt.replace([np.inf, -np.inf], np.nan).dropna(axis=0)
df_ko_wt = df_ko_wt.dropna().replace(0.0, 1.0)

In [None]:
# df_ko_wt.to_csv('../data/rnaseq_slc25a51ko_vs_parental_pvalue.csv')

In [None]:
_df_ko_wt = df_ko_wt.T
_df_ko_wt.loc[_df_ko_wt.index == 'base', '6398.1'] = 1.0
_df_ko_wt.loc[_df_ko_wt.index != 'base', '6398.1'] = 0.0
df_ko_wt = _df_ko_wt.T

In [None]:
df_ko_wt

In [None]:
res = pd.DataFrame()
for col in df_ko_wt.columns.difference(['base']):
    _res = gc.workflows.workflow_standard(cobra_model=model.model, mapped_genes_baseline=df_ko_wt['base'],
                                          mapped_genes_comparison=df_ko_wt[col], gene_fill=1.0)
    res[col] = _res