In [15]:
import itertools
from optlang import Constraint, Variable
import pandas as pd
import cobra.flux_analysis
from cobra import Metabolite, Reaction, Model
import time
import numpy as np
from functools import partial
from src.mp_functions import combinations_subset, parallelize_dataframe, knockout_FBA, knockout_FBA_w_tasks

from functools import partial
from src.met_task_functions import get_met_ids, constrain_model, create_reactions


"""A mess of a document with different code cells.
Good to to use for any testing that involves the Recon3D model as it takes some time to load in."""

start_time = time.time()
model_file_path = 'C:/Users/Sigve/Genome_Data/Human1/Human1_GEM/GTEx/brain.xml'
model = cobra.io.read_sbml_model(model_file_path)

model_list = constrain_model(model, ALLMETSIN=True)

end_time = time.time()
print('Model load and preparation time: %.6f seconds' % (end_time - start_time))


Read LP format model from file C:\Users\Sigve\AppData\Local\Temp\tmpdcj61irn.lp
Reading time = 0.07 seconds
: 5482 rows, 15138 columns, 66608 nonzeros
Read LP format model from file C:\Users\Sigve\AppData\Local\Temp\tmplulq1_db.lp
Reading time = 0.07 seconds
: 5482 rows, 15138 columns, 66608 nonzeros
Read LP format model from file C:\Users\Sigve\AppData\Local\Temp\tmpx8qd_s0h.lp
Reading time = 0.06 seconds
: 5482 rows, 15138 columns, 66608 nonzeros
Model load and preparation time: 77.078590 seconds


In [2]:
def tasks_test(task_list: list, model_list: list, gene_ids: list) -> list:
    """Performs knockout FBA and checks tasks for the knockout."""
    with model_list[0]:
        for gene_id in gene_ids:
            try:
                model_list[0].genes.get_by_id(gene_id).knock_out()
            except KeyError:
                return gene_id + ' not in model.'
        res = [model_list[0].slim_optimize()]

    for task in task_list:
        t_model = model_list[task[3]]

        with t_model:
            for subset in [task[0], task[1]]:
                for rx in subset:
                    if rx == 'ALLMETSIN':
                        # Adds boundary metabolites for other reactions when ALLMETSIN is used
                        for r in subset[1:]:
                            for m2 in r.metabolites:
                                for r2 in m2.reactions:
                                    if r2.boundary and r2.id != r.id:
                                        r2.add_metabolites({Metabolite(
                                                            m2.id[:-4] + 'x[x]',
                                                            formula=m2.formula,
                                                            name=' '.join(m2.name.split(' ')[:-1]) + ' [Boundary]',
                                                            compartment='x'): 1})
                        continue
                    t_model.add_reaction(rx)

            if task[2] != 'nan':
                t_model.add_reaction(task[2])

            for gene_id in gene_ids:
                t_model.genes.get_by_id(gene_id).knock_out()


            if t_model.slim_optimize() is None:
                res += [0]
            else:
                res += [1]


    return res

In [9]:
def create_constraints(tasks: pd.DataFrame) -> pd.DataFrame:
    # Producing reactions based on tasks
    rx_list = []
    in_list = []
    out_list = []

    for ind, data in tasks.iterrows():

        for i, name, ml, lbs, ubs in zip([1, -1], ['in', 'out'], [data.inputs, data.outputs], [data.LBin, data.LBout],
                                         [data.UBin, data.UBout]):

            rxl = []
            for j, m, lb, ub in zip(range(len(ml)), ml, lbs, ubs):

                if m[:9] == 'ALLMETSIN':
                    rxl.append('ALLMETSIN')
                    continue
                if i == -1:
                    rxl.append(Variable(data['met_ids'][m].id, lb=float(lb), ub=float(ub)))
                else:
                    rxl.append(Variable(data['met_ids'][m].id, lb=-float(ub), ub=-float(lb)))

                """
                rx = Reaction('ess_{0}_{1}_{2}'.format(ind+1, name, j))
                rx.add_metabolites({data['met_ids'][m]: i})
                rx.lower_bound = float(lb)
                rx.upper_bound = float(ub)
                rxl.append(rx)"""

            if name == 'in':
                in_list.append(rxl)
            else:
                out_list.append(rxl)

        if data.equations != 'nan':
            t = [[data['met_ids'][subsub] for subsub in sub.split(' ') if len(subsub) > 1] for sub in data.equations.split('=')]
            d = {}

            for i, ml in zip([-1, 1], t):
                for m in ml:
                    d[m] = i

            rx = Reaction('ess_{0}'.format(ind + 1))
            rx.add_metabolites(d)
            rx.lower_bound = float(data.LBequ)
            rx.upper_bound = float(data.UBequ)
            rx.name = data.description

            rx_list.append(rx)

        else:
            rx_list.append('nan')

    return pd.DataFrame(list(zip(in_list, out_list, rx_list, tasks['model_num'].tolist())), columns=['in_rx', 'out_rx', 'equ', 'model_num'])

In [10]:
# Read and format task data
tasks_df = pd.read_table('C:/Users/Sigve/Genome_Data/Human1/Human1_GEM/tasks/essential_tasks.tsv')

for b in ['LBin', 'LBout', 'UBin', 'UBout']:
    tasks_df[b] = tasks_df[b].apply(lambda x: x.split(','))

for put in ['inputs', 'outputs']:
    tasks_df[put] = tasks_df[put].apply(lambda x: [e + ']' for e in x[1:-1].split(']')][0:-1])

tasks_df['equations'] = tasks_df['equations'].apply(str)

tasks_df[['met_ids', 'model_num']] = tasks_df.apply(partial(get_met_ids, model_list), axis=1, result_type='expand')

tasks_df = create_constraints(tasks_df)

task_list = list(tasks_df.values.tolist())
print(task_list)

[[[-1000.0 <= m02630s[s] <= -0.0, -1000.0 <= m01965s[s] <= -0.0], [0.0 <= m02040s[s] <= 1000.0, 0.0 <= m01596s[s] <= 1000.0], <Reaction ess_1 at 0x12b11c01760>, 1], [[-1000.0 <= m02630s[s] <= -0.0, -1000.0 <= m01965s[s] <= -0.0], [0.0 <= m02040s[s] <= 1000.0, 0.0 <= m01596s[s] <= 1000.0], <Reaction ess_2 at 0x12b11a220a0>, 1], [[-1000.0 <= m02630s[s] <= -0.0, -1000.0 <= m01965s[s] <= -0.0], [0.0 <= m02040s[s] <= 1000.0, 0.0 <= m01596s[s] <= 1000.0], <Reaction ess_3 at 0x12b118d3e20>, 1], [[-1000.0 <= m02630s[s] <= -0.0, -1000.0 <= m01965s[s] <= -0.0], [0.0 <= m02040s[s] <= 1000.0, 0.0 <= m01596s[s] <= 1000.0], <Reaction ess_4 at 0x12b0ff27790>, 1], [[-1000.0 <= m02630s[s] <= -0.0, -1000.0 <= m01965s[s] <= -0.0, -1000.0 <= m02578s[s] <= -0.0, -1000.0 <= m02751s[s] <= -0.0], [0.0 <= m02040s[s] <= 1000.0, 0.0 <= m01596s[s] <= 1000.0, 1.0 <= m01371c[c] <= 1000.0], <Reaction ess_5 at 0x12b11099fd0>, 1], [[-1000.0 <= m02630s[s] <= -0.0, -1000.0 <= m01965s[s] <= -0.0, -1000.0 <= m02578s[s] <=

In [None]:
# Read test data
test_data = pd.read_csv('C:/Users/Sigve/Genome_Data/results/model_tests/test_data.csv')
test_data['pass/fail'] = test_data.values[:, 4:].tolist()
test_data = test_data[['phewas_code', 'gene_ids', 'solution', 'pass/fail']]
test_data['gene_ids'] = test_data['gene_ids'].apply(lambda x: x.split(','))
print(test_data.head())

# Reduce number of entries
test_data = test_data.iloc[:10, :]

In [None]:
g = ['ENSG00000100030', 'ENSG00000197375']
%load_ext line_profiler
%lprun -f tasks_test tasks_test(task_list, model_list, g)

In [None]:
start_time = time.time()

test_data['results'] = test_data['gene_ids'].apply(partial(tasks_test, task_list, model_list))

end_time = time.time()
print('FBA runtime: %.6f seconds' % (end_time - start_time))

In [None]:
# Data comparison

result_df = test_data.copy()

print(result_df.shape[0])


result_df['main_obj'] = result_df['results'].apply(lambda x: x[0])
result_df['results'] = result_df['results'].apply(lambda x: x[1:])

result_df['comparison'] = result_df[['pass/fail', 'results']].apply(lambda x: any([False if i == j else True for i, j in zip(x[0], x[1])]), axis=1)
error_df = result_df[result_df['comparison']]
print('Number of detected errors: ' + str(error_df.shape[0]))


In [20]:
t_model = model_list[1]

with t_model:
    tasks = task_list[0][0] + task_list[0][1]
    t_model.add_cons_vars(task_list[0][0:1])
    t_model.add_reaction(task_list[0][2])
    print(t_model.slim_optimize())

    variables = t_model.variables
    for v in variables:
        print(v)


nan
0.0 <= biomass_human <= 1000.0
0.0 <= biomass_human_reverse_fb2f2 <= 0.0
0.0 <= HMR_0241 <= 1000.0
0.0 <= HMR_0241_reverse_40008 <= 1000.0
0.0 <= HMR_0242 <= 1000.0
0.0 <= HMR_0242_reverse_0a022 <= 0.0
0.0 <= HMR_0240 <= 1000.0
0.0 <= HMR_0240_reverse_6e2eb <= 0.0
0.0 <= HMR_0243 <= 1000.0
0.0 <= HMR_0243_reverse_abdec <= 0.0
0.0 <= HMR_10033 <= 1000.0
0.0 <= HMR_10033_reverse_aa17a <= 1000.0
0.0 <= HMR_2955 <= 1000.0
0.0 <= HMR_2955_reverse_6e373 <= 1000.0
0.0 <= HMR_3695 <= 1000.0
0.0 <= HMR_3695_reverse_d3d9b <= 0.0
0.0 <= HMR_0015 <= 1000.0
0.0 <= HMR_0015_reverse_d37e4 <= 0.0
0.0 <= EX_M00003[e] <= 1000.0
0.0 <= EX_M00003[e]_reverse_3fde7 <= 1000.0
0.0 <= HMR_2288 <= 1000.0
0.0 <= HMR_2288_reverse_8ee17 <= 0.0
0.0 <= HMR_2648 <= 1000.0
0.0 <= HMR_2648_reverse_8a135 <= 1000.0
0.0 <= HMR_0497 <= 1000.0
0.0 <= HMR_0497_reverse_0f0fc <= 0.0
0.0 <= HMR_0553 <= 1000.0
0.0 <= HMR_0553_reverse_00e01 <= 0.0
0.0 <= HMR_2650 <= 1000.0
0.0 <= HMR_2650_reverse_06283 <= 1000.0
0.0 <= HMR_33