# Task performance evaluation

*Troppo* includes a module that allows for the evaluation of metabolic tasks performance.
The class responsible for this is called `TaskEvaluator`.
This class is a wrapper around a model that allows the evaluation of tasks on the model. It can be used to evaluate a single task, or to evaluate a batch of tasks on a batch of models.

To initialize a `TaskEvaluator` object, you need to pass a model object and the tasks to evaluate.
The tasks should be instances of the `Task` class, which is a simple data structure that contains the following fields:


- `reaction_dict`: a dictionary with the reaction identifiers as keys and a dictionary with the metabolites and their respective stoichiometry as values. (eg. rxd = {'r1':({'m1':-1, 'm2':2}, (lb, ub)), ... })
- `inflow_dict`: a dictionary with the metabolite identifiers as keys and the inflow rate as values. (eg. inflow = {'m1':(1, 1), ... })
- `outflow_dict`: a dictionary with the metabolite identifiers as keys and the outflow rate as values. (eg. outflow = {'m5':(5, 5), ... })

### Imports

In [2]:
from troppo.tasks.core import TaskEvaluator
from troppo.tasks.task_io import JSONTaskIO
import pandas as pd
from json import JSONEncoder, JSONDecoder
from cobamp.utilities.parallel import batch_run
from cobra.io import read_sbml_model

from numpy import log
import re

### Initial setup

In [4]:
#load the model
task_model = read_sbml_model('data/Recon3D_301_consistent.xml')
task_model

'M_10fthf5glu[c]' is not a valid SBML 'SId'.
'M_10fthf5glu[l]' is not a valid SBML 'SId'.
'M_10fthf5glu[m]' is not a valid SBML 'SId'.
'M_10fthf6glu[c]' is not a valid SBML 'SId'.
'M_10fthf6glu[l]' is not a valid SBML 'SId'.
'M_10fthf6glu[m]' is not a valid SBML 'SId'.
'M_10fthf7glu[c]' is not a valid SBML 'SId'.
'M_10fthf7glu[l]' is not a valid SBML 'SId'.
'M_10fthf7glu[m]' is not a valid SBML 'SId'.
'M_10fthf[c]' is not a valid SBML 'SId'.
'M_10fthf[l]' is not a valid SBML 'SId'.
'M_10fthf[m]' is not a valid SBML 'SId'.
'M_11docrtsl[c]' is not a valid SBML 'SId'.
'M_11docrtsl[m]' is not a valid SBML 'SId'.
'M_11docrtsl[r]' is not a valid SBML 'SId'.
'M_11docrtstrn[c]' is not a valid SBML 'SId'.
'M_11docrtstrn[m]' is not a valid SBML 'SId'.
'M_11docrtstrn[r]' is not a valid SBML 'SId'.
'M_13dampp[c]' is not a valid SBML 'SId'.
'M_h2o[c]' is not a valid SBML 'SId'.
'M_o2[c]' is not a valid SBML 'SId'.
'M_bamppald[c]' is not a valid SBML 'SId'.
'M_h2o2[c]' is not a valid SBML 'SId'.
'M_

0,1
Name,Recon3D
Memory address,1f5d4df9a48
Number of metabolites,6594
Number of reactions,11556
Number of genes,3085
Number of groups,0
Objective expression,1.0*biomass_reaction - 1.0*biomass_reaction_reverse_32a6c
Compartments,"c, l, m, r, e, x, n, g, i"


In [5]:
#load the reconstruction results as a dictionary
fastcore_res_dict = pd.read_csv('data/r3d_compact_ccle_bc_fastcore.csv', index_col=[0, 1]).T.to_dict()

#get only first sample
sample = list(fastcore_res_dict.keys())[0]
fastcore_res_dict = {sample: fastcore_res_dict[sample]}
fastcore_res_dict

{('fastcore', 'ACH-001396'): {'10FTHF5GLUtl': False,
  '10FTHF5GLUtm': False,
  '10FTHF6GLUtl': False,
  '10FTHF6GLUtm': False,
  '10FTHF7GLUtl': False,
  '10FTHF7GLUtm': False,
  '10FTHFtl': False,
  '10FTHFtm': False,
  '11DOCRTSLtm': False,
  '11DOCRTSLtr': False,
  '11DOCRTSTRNtm': False,
  '11DOCRTSTRNtr': False,
  '13DAMPPOX': False,
  '1MNCAMti': False,
  '1PPDCRp': False,
  '24_25DHVITD2t': False,
  '24_25DHVITD2tm': False,
  '24_25DHVITD3t': False,
  '24_25DHVITD3tm': False,
  '24_25VITD2Hm': False,
  '24_25VITD3Hm': False,
  '24NPHte': False,
  '25HVITD2t': False,
  '25HVITD2tin': False,
  '25HVITD2tin_m': False,
  '25HVITD2tm': False,
  '25HVITD3t': False,
  '25HVITD3tin_m': False,
  '25HVITD3tm': False,
  '25VITD2Hm': False,
  '25VITD3Hm': False,
  '2AMACHYD': False,
  '2AMACSULT': False,
  '2AMADPTm': False,
  '2DR1PP': False,
  '2HBO': False,
  '2HBt2': False,
  '2HCO3_NAt': False,
  '2MCITt': False,
  '2OXOADOXm': False,
  '2OXOADPTm': False,
  '34DHOXPEGOX': False,
  '3

In [6]:
# parse tasks from a previously existing JSON
# the supplied file contains tasks adapted from the publication of Richelle et. al, 2019
TASKS_PATH = 'data/nl2019_tasks_r3d_compact.json'
task_list = [t for t in JSONTaskIO().read_task(TASKS_PATH) if len((set(t.inflow_dict) |
             set(t.outflow_dict)) - set([m.id for m in task_model.metabolites])) == 0]
task_list



[Task '1' expecting success:inflow_dict: 3 ; outflow_dict: 3 ; annotations: 4 ; mandatory_activity: 1 -- Oxidative phosphorylation via NADH-coenzyme Q oxidoreductase (COMPLEX I),
 Task '2' expecting success:inflow_dict: 2 ; outflow_dict: 2 ; annotations: 4 ; mandatory_activity: 1 -- Oxidative phosphorylation via succinate-coenzyme Q oxidoreductase (COMPLEX II),
 Task '3' expecting success:inflow_dict: 3 ; outflow_dict: 3 ; annotations: 4 ; mandatory_activity: 1 -- Krebs cycle - oxidative decarboxylation of pyruvate,
 Task '4' expecting success:inflow_dict: 6 ; outflow_dict: 6 ; annotations: 4 ; mandatory_activity: 8 -- Krebs cycle - NADH generation,
 Task '5' expecting success:inflow_dict: 5 ; outflow_dict: 3 ; annotations: 4 -- ATP regeneration from glucose (normoxic conditions) - glycolysis + krebs cycle,
 Task '6' expecting success:inflow_dict: 4 ; outflow_dict: 5 ; annotations: 4 ; mandatory_activity: 10 -- ATP generation from glucose (hypoxic conditions) - glycolysis,
 Task '7' ex

In [7]:
for task in task_list:
    task.inflow_dict = {k: v if k not in task.outflow_dict.keys() else [-1000, 1000] for k, v in
                        task.inflow_dict.items()}
    task.outflow_dict = {k: v for k, v in task.outflow_dict.items()
                         if k not in task.inflow_dict.items()}
for task in task_list:
    task.mandatory_activity = []

task_list

[Task '1' expecting success:inflow_dict: 3 ; outflow_dict: 3 ; annotations: 4 -- Oxidative phosphorylation via NADH-coenzyme Q oxidoreductase (COMPLEX I),
 Task '2' expecting success:inflow_dict: 2 ; outflow_dict: 2 ; annotations: 4 -- Oxidative phosphorylation via succinate-coenzyme Q oxidoreductase (COMPLEX II),
 Task '3' expecting success:inflow_dict: 3 ; outflow_dict: 3 ; annotations: 4 -- Krebs cycle - oxidative decarboxylation of pyruvate,
 Task '4' expecting success:inflow_dict: 6 ; outflow_dict: 6 ; annotations: 4 -- Krebs cycle - NADH generation,
 Task '5' expecting success:inflow_dict: 5 ; outflow_dict: 3 ; annotations: 4 -- ATP regeneration from glucose (normoxic conditions) - glycolysis + krebs cycle,
 Task '6' expecting success:inflow_dict: 4 ; outflow_dict: 5 ; annotations: 4 -- ATP generation from glucose (hypoxic conditions) - glycolysis,
 Task '7' expecting failure:inflow_dict: 11 ; outflow_dict: 1 ; annotations: 4 -- ATP generation from ions,
 Task '8' expecting succe

In [8]:
 # tasks should be evaluated without open boundary reactions. We can easily close them on
# the COBRA model
for k in task_model.boundary:
    k.knock_out()

# get the names of all reactions in the model - this will be useful further on
all_reactions = set([r.id for r in task_model.reactions])

all_reactions

{'r1908',
 'GMPtn',
 'DOPAVESSEC',
 'r1624',
 'OMHDOCOSACTD',
 'LDH_L',
 'EX_3bcrn[e]',
 'ACLEUtm',
 'DESAT20_1',
 'HMR_2644',
 'r1571',
 'DGTPtn',
 '25HVITD2t',
 'SRTNtu',
 'r1593',
 'DOPA4GLCURt',
 'HC00900t1',
 'RE2427M',
 'AG13T17g',
 'HMR_2193',
 'HMR_3341',
 'r2195',
 'EX_ind3ac[e]',
 'RE1827C',
 'r0587',
 'HMR_9642',
 'HMR_2348',
 'GLCAASE1ly',
 '3TETD7ECOACRNtr',
 'HMR_0271',
 'HMR_9669',
 'HMR_1739',
 'TETDECA511ACc',
 'CSBPASEly',
 'HMR_6619',
 'RE3334X',
 'DTMPK',
 'RE3004M',
 'HMR_0894',
 'HMR_0256',
 'HMR_2793',
 'RE2997X',
 '3AIB_Dtm',
 'EX_gumgchol[e]',
 'MMALtm',
 'HMR_1761',
 'r1454',
 'DASPO1p',
 'TETDEC2CRNe',
 'r2237',
 'FUT95g',
 'HMR_2650',
 'ACMPGLUthc',
 'HMR_4783',
 'GLCAE1g',
 'CE6031t',
 'HMR_1968',
 'HMR_2917',
 'HMR_0428',
 'r1608',
 'DHEAtr',
 'C142ACBP',
 'EX_ethamp[e]',
 'ARACHCPT2',
 'RE3243C',
 'r2312',
 'HMR_0496',
 '7HPVStev',
 'r1612',
 '4HMDZGLUChr',
 'ADAe',
 'TIGCRNe',
 'HMR_1764',
 'ESTRSABCtc',
 'PELINETH',
 'GDPFUCtg',
 'HMR_3668',
 'HMR_3059'

### Evaluate a set of tasks

In [10]:
# create a structure to hold all of these results - a dictionary
task_eval_results = {}

# for each k (tuple with algorithm and sample information) and result (dict with reaction presences)
for k, result in fastcore_res_dict.items():
    # using with statements to change the COBRA model temporarily
    # this is done to knock-out reaction not appearing the FASTCORE result
    with task_model as context_specific_model:
        # get reactions included in the sample-specific model
        protected = set([k for k, v in result.items() if v])
        # get reactions except the protected ones
        to_remove = all_reactions - protected
        for rid in to_remove:
            # knock-out reactions not in the model
            context_specific_model.reactions.get_by_id(rid).knock_out()

        # create a task evaluator instance with the context specific model and the supplied
        # task list and solver
        task_eval = TaskEvaluator(model=context_specific_model, tasks=task_list, solver='CPLEX')

        # get task names (for future reference)
        task_names = task_eval.tasks

        # use the batch_function from the TaskEvaluator class (takes the name of a loaded task,
        # a params dictionary with the task evaluator associated to the 'tev' key) and set the
        # amount of threads to be used
        batch_res_tasks = batch_run(TaskEvaluator.batch_function, task_names,
                                    {'tev': task_eval}, threads=1)
    # each element in the list of results in batch_res_tasks is a tuple of length 3:
    # 0 - boolean flag representing the task evaluation
    # 1 - Solution instance used to evaluate the task
    # 2 - A dictionary with reactions supposed to be active mapped to True/False
    # according to that criterion

    # keep only items 0 and 2 of the task result - we don't need the flux distribution
    task_csm_res = {k: (v[0], v[2]) for k, v in dict(zip(task_names, batch_res_tasks)).items()}
    print(k, len(protected), len([v for k, v in task_csm_res.items() if v[0]]), 'tasks completed.')
    # assign this dictionary to it's sample on the master results dictionary
    task_eval_results[k] = task_csm_res

  'Will not normalize rules with more than ' + str(token_to_gene_ratio) + ' average tokens per gene')


('fastcore', 'ACH-001396') 0 15 tasks completed.


{('fastcore', 'ACH-001396'): {'1': (False, {}),
  '2': (False, {}),
  '3': (False, {}),
  '4': (False, {}),
  '5': (False, {}),
  '6': (False, {}),
  '7': (True, {}),
  '8': (False, {}),
  '9': (False, {}),
  '10': (False, {}),
  '11': (False, {}),
  '12': (False, {}),
  '13': (False, {}),
  '14': (False, {}),
  '15': (False, {}),
  '16': (False, {}),
  '17': (False, {}),
  '18': (False, {}),
  '19': (False, {}),
  '20': (False, {}),
  '21': (False, {}),
  '22': (False, {}),
  '23': (False, {}),
  '24': (False, {}),
  '25': (False, {}),
  '26': (False, {}),
  '27': (False, {}),
  '28': (False, {}),
  '29': (False, {}),
  '30': (False, {}),
  '31': (False, {}),
  '32': (False, {}),
  '33': (False, {}),
  '34': (False, {}),
  '35': (False, {}),
  '36': (False, {}),
  '37': (False, {}),
  '38': (False, {}),
  '39': (False, {}),
  '40': (False, {}),
  '41': (False, {}),
  '42': (False, {}),
  '43': (False, {}),
  '44': (False, {}),
  '45': (False, {}),
  '46': (False, {}),
  '47': (False, 

In [12]:
# save these results for later analysis as a JSON file
with open('data/r3d_compact_task_results_ccle_bc_new_nodrains_only_feas.json', 'w') as f:
    f.write(JSONEncoder().encode([(k, v) for k, v in task_eval_results.items()]))