In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import plotting as plot
import itertools

import pyreporter as pr

import recon
model = recon.recon.Recon()
util = recon.utilities.Utilities()

# sns.set_style('white')
sns.set_style('ticks')

In [5]:
model.gpr

{'13DAMPPOX': '8639.1 or 26.1 or 314.2 or 314.1',
 '24_25VITD2Hm': '1591.1',
 '24_25VITD3Hm': '1591.1',
 '25VITD2Hm': '1594.1',
 '25VITD3Hm': '1594.1',
 '2AMACHYD': '10993.1',
 '2AMACSULT': '6818.1',
 '2AMADPTm': '89874.1',
 '2HBO': '92483.1 or 3948.2 or 55293.1 or (3945.1 and 3939.1) or 3939.1 or 160287.1 or 3945.1 or 3948.1',
 '2HBt2': '9123.1 or 9194.1 or 6566.1',
 '2HCO3_NAt': '57835.3 or 8671.1 or 57835.2 or 57835.1 or 57835.4',
 '2OXOADOXm': '(4967.2 and 1738.1 and 8050.1 and 1743.1) or (4967.1 and 1738.1 and 8050.1 and 1743.1)',
 '2OXOADPTm': '89874.1',
 '34DHOXPEGOX': '130.1 or 127.1 or (125.1 and 124.1) or 131.1 or (126.1 and 124.1) or 128.1 or 137872.1 or (125.1 and 126.1)',
 '34DHPHAMT': '1312.1 or 1312.2',
 '34DHPHEt': '117247.1',
 '34DHPLACOX': '218.1 or 222.1 or 220.1 or 221.1',
 '34DHPLACOX_NADP_': '222.1 or 221.1 or 218.1',
 '34DHXMANDACOX': '221.1 or 218.1 or 220.1',
 '34DHXMANDACOX_NADP_': '221.1 or 222.1',
 '34HPPOR': '3242.1',
 '3AIBTm': '18.1 or 18.2',
 '3AIBtmi': 

In [3]:
mets = pd.read_csv('../../data/NAD_cell_lines/merged_metabolites_measurement.csv')
sc = util.reshape(model.gs[~model.gs.index.str.contains('HC02111')], include=None)
sc = sc.query('metabolites in @mets.fullName')
sc = sc.drop(['metabolites', 'compartment'], axis=1)

In [None]:
rna = pd.read_csv('../data/NAD_cell_lines/gene_fpkm.xls.csv', index_col=0)
rna1 = util.map_gene(df=rna, g_mapping=model.genes, mapping_column='ensembl_gene')
rna_hek = rna1[rna1.columns[rna1.columns.str.contains('wtHEK293|HEK25a51|HEK25A51')]]
rna_hek = rna_hek.replace(0.0, np.nan).dropna(axis=0)

wt = rna_hek.columns[rna_hek.columns.str.contains('wtHEK293')]
ko = rna_hek.columns[rna_hek.columns.str.contains('HEK25a51ko')]
oe = rna_hek.columns[rna_hek.columns.str.contains('HEK25A51oe')]

In [None]:
rna[oe].mean(axis=1).div(rna[wt].mean(axis=1)).replace(np.inf, np.nan).dropna().max()

In [None]:
df = model.get_subsystem_from_gene(model.genes.gene_number.astype('str'))

In [None]:
df

In [None]:
list_of_mapped_compounds = ['L-Methionine', 'L-Tryptophan']
pathways = {}
for met in model.metabolites[model.metabolites.fullName.isin(list_of_mapped_compounds)]['abbreviation']:
    pathways[met] = list(
                set(model.reactions[model.reactions.formula.str.contains(met)]['subsystem']))
    
subsytems = []
for v in pathways.values():
    subsytems = subsytems + v

m_paths = {}
for p in list(set(subsytems)):
    m_paths[p] = [k for k, v in pathways.items() if p in v]

In [None]:
_m = model.get_reactions_from_metabolite('Oxoglutaric acid').set_index('abbreviation')
# _g = df[['gene_number', 'abbreviation', 'uniprot_gname']].set_index('abbreviation')

In [None]:
_m.subsystem.unique()

In [None]:
len(_g.merge(_m, on='abbreviation').index.intersection(_m.index))

In [None]:
_g.merge(_m, on='abbreviation')

In [None]:
len(set(_m.index))

In [None]:
model.reactions.query('abbreviation == "ALASm"')

In [None]:
model.reactions.query('subsystem == "Citric acid cycle"')

In [None]:
rxn_nad = model.get_reactions_from_metabolite('NAD')
rxn_nad.query('subsystem == "NAD metabolism"')

In [None]:
model.reactions.query('formula.str.contains("nad") and description.str.contains("Transport")')

In [None]:
model.reactions.query('abbreviation.str.contains("NADt")')

In [None]:
gpr = model.gpr.copy()
gpr['NADtm'] = '6398.1'
# genes = model.genes.copy()
# genes.gene_number = genes.gene_number.astype('str')
# genes[genes.gene_number.str.contains('6398')]

In [None]:
model.model.reactions.get_by_id('NADtm')#.gene_reaction_rule = '6398.1'

In [None]:
cmodel = model.add_gene_to_rxn(cobra_model=model.model, rxn_id='NADtm', gene_number='6398.1')

In [None]:
cmodel.reactions.get_by_id('NADtm')

In [None]:
import pyreporter as pr

In [None]:
# pd.Series({i:1.0 for i in model.genes.gene_number})
gr_rule = model.model.reactions.get_by_id('nd')#.gene_reaction_rule# = '6398.1'
# assert len(gr_rule.genes) == 0, 'Reaction is not enzyme catalysed.'
gr_rule

In [None]:
rxn.gene_reaction_rule
# len(model.model.reactions.get_by_id('13DAMPPOX').gene_reaction_rule)

In [None]:
pr.workflows.workflow_Fang2012(cobra_model=cmodel, mapped_genes_baseline=pd.Series({i:1.0 for i in model.genes.gene_number}),
                            #    mapped_genes_comparison=pd.Series({'8639.1': 1.0}), 
                               mapped_genes_comparison=pd.Series({'6398.1': 1e-6}), 
                               gene_fill=1.0)

In [None]:
df = pd.DataFrame(_)

In [None]:
df.query('index.str.contains("nad")')

In [None]:
def get_top_rxns(sc, metabolite, n=10):
    _cc = sc[sc.index == metabolite].T.abs().sort_values(by=metabolite)
    tail = _cc.tail(n)
    rxns = tail.index.tolist()
    # rxns.sort()
    return rxns

In [None]:
prot = pd.read_csv('../data/NAD_cell_lines/mapped_proteome_DDA_five_of_five.csv', sep='\t', index_col=0)
prot.index = prot.index.astype('str')

In [None]:
pko = prot.filter(like="ko").mean(axis=1).div(prot.filter(like="wt").mean(axis=1))
rko = rna1[ko].mean(axis=1).div(rna1[wt].mean(axis=1))

In [None]:
prko = pd.concat([pko, rko], axis=1).dropna()
prko.columns = ['proteomics', 'transcriptomics']

In [None]:
_prko = prko.query('(proteomics > 1.0 and transcriptomics > 1.0) or (proteomics < 1.0 and transcriptomics < 1.0)')
# sns.scatterplot(data=np.log2(_prko), x='proteomics', y='transcriptomics')

In [None]:
gene2rxn = model.get_subsystem_from_gene(_prko.index)

In [None]:
gene2rxn

In [None]:
met2rxn = pd.DataFrame()
for met in mets.fullName.unique():
    _rxns = model.get_reactions_from_metabolite(met)
    _rxns['metabolite'] = met
    met2rxn = pd.concat((met2rxn, _rxns), axis=0)

In [None]:
met2rxn.query('abbreviation in @gene2rxn.abbreviation').metabolite.unique()

In [None]:
sns.scatterplot(data=prko, x='proteomics', y='transcriptomics', alpha=0.5)

In [None]:
rxns = [get_top_rxns(sc, i, n=20) for i in sc.index]
top_rxns = pd.DataFrame(rxns, index=sc.index).T
top_rxns

In [None]:
rna.columns

In [None]:
met_wt = rna1[rna1.index.isin(get_top_rxns(sc, 'L-Methionine[c]', n=50))][wt].mean(axis=1)
met_ko = rna1[rna1.index.isin(get_top_rxns(sc, 'L-Methionine[c]', n=50))][ko].mean(axis=1)
met_oe = rna1[rna1.index.isin(get_top_rxns(sc, 'L-Methionine[c]', n=50))][oe].mean(axis=1)


In [None]:
_met = sc[(sc.index == 'L-Methionine[c]')].T
_met = _met[_met['L-Methionine[c]'] != 0.0]
_met = _met[_met.index.isin(rna1.index)]


In [None]:
_df = pd.concat((rna1[rna1.index.isin(_met.index)][wt].mean(axis=1),
                 rna1[rna1.index.isin(_met.index)][ko].mean(axis=1),
                 rna1[rna1.index.isin(_met.index)][oe].mean(axis=1)), axis=1)
_df.columns = ['wt', 'ko', 'oe']

In [None]:
sns.kdeplot(data=np.log2(_df), fill=True, alpha=0.2)

In [None]:
sns.scatterplot(data=_df, x='wt', y='ko', alpha=0.5)

In [None]:
_df1 = _df['ko'].div(_df['wt']).replace([np.inf, np.nan]).dropna()
_df1[(_df1 != 1.0) & (_df1 > 0.0)].min()

In [None]:
model.model.reactions.get_by_id('10FTHF5GLUtm')

In [None]:
model.get_reactions(_df1[:1].index)

In [None]:
list_of_gene_number = rna_hek[:10].index.to_list()

In [None]:
g_paths = []
for p in list_of_gene_number:
    g_paths = g_paths + [(p, k) for k, v in model.gpr.items() if p in v]
g_paths = pd.DataFrame(g_paths, columns=['gene_number', 'reaction_id'])

for rxn in g_paths.reaction_id:
    g_paths.loc[g_paths['reaction_id'] == rxn, 'subsystem'] = model.reactions[model.reactions.abbreviation == rxn].subsystem.iloc[0]

g_paths.gene_number  = g_paths.gene_number.astype('float')   
g_paths.merge(model.genes[['gene_number', 'symbol', 'uniprot_gname', 'description', 'ensembl_gene', 'ensembl_trans']], on='gene_number')

In [None]:
len(model.reactions[model.reactions.abbreviation.isin(g_paths.reaction_id)].subsystem)

In [None]:
# p = list_of_gene_number[0]
# [(p, k) for k, v in model.gpr.items() if p in v]
_df = pd.DataFrame(g_paths)

In [None]:
for rxn in 
_df.loc[_df[1] == "HMR_0597", 'subsystem'] = model.reactions[model.reactions.abbreviation == "HMR_0597"].subsystem.iloc[0]

In [None]:
_df

In [None]:
# model.reactions.query('abbreviation in @m_paths')
model.reactions[model.reactions.abbreviation == "HMR_0597"].subsystem.iloc[0]