In [15]:
import itertools
import pandas as pd
import cobra.flux_analysis
from cobra.flux_analysis import find_essential_genes

import time
import numpy as np
from src.mp_functions import  parallelize_dataframe, knockout_FBA_w_tasks, knockout_FBA_simple

from functools import partial
from src.task_functions import constrain_model, read_tasks


In [40]:
# Model to load
tissue = 'liver'
model_file_path = 'C:/Users/Sigve/Genome_Data/Human1/Human1_GEM/GTEx/{0}.xml'.format(tissue)

In [12]:
#Simple Load
model = cobra.io.read_sbml_model(model_file_path)

In [41]:

model_list = constrain_model(model_file_path, ALLMETSIN_OUT=False)


Read LP format model from file C:\Users\Sigve\AppData\Local\Temp\tmp6d3imn8m.lp
Reading time = 0.07 seconds
: 5788 rows, 15892 columns, 69376 nonzeros


In [61]:
# Which genes tp filter by.
essential = None

if essential == 'biomass_ess':
    # Get essential genes using cobra, takes some time.
    genes = [gene.id for gene in find_essential_genes(model_list[0])]

elif essential == 'task_ess':
    # Use taks essential genes
    genes = pd.read_table('C:/Users/Sigve/Genome_Data/results/model_tests/essential_genes/{0}_non_essential.tsv'.format(tissue), index_col=0)['gene_ids'].tolist()

else:
    # Don't use essential genes
    genes = [gene.id for gene in model_list[0].genes]

In [93]:
# Create and run combinations for individual samples
t='ENSG00000198130;ENSG00000084754;ENSG00000105552;ENSG00000248098;ENSG00000137992;ENSG00000146085;ENSG00000114054;ENSG00000175198'
input = t.split(';')
print('Num genes: ' + str(len(input)))
input = list(set(input).intersection(genes))
print(input)

combs = []
for i in range(1, 4): #len(input) + 1):
    for c in itertools.combinations(input, i):
        combs.append(list(c))

combs.append(input)
test_df = pd.DataFrame({'gene_ids': combs})

print('Num combs: ' + str(len(combs)))

#essential_tasks_min_ess_aa.tsv
#tissue_full_tasks/full_tasks_minus_ess_{0}.tsv

task_list = read_tasks('C:/Users/Sigve/Genome_Data/Human1/Human1_GEM/tasks/test_tasks/valine_degradation_task.tsv'.format(tissue), model_list)
# Adds empty task result list column
test_df['tasks_results'] = np.empty((len(test_df), 0)).tolist()
test_df.reset_index(inplace=True)
test_df.rename(columns={'index': 'sample_ids'}, inplace=True)
test_df['sample_ids'] = test_df['sample_ids'].apply(lambda x: x + 1)


print(test_df.shape[0])
#print(test_df)

Num genes: 8
['ENSG00000198130', 'ENSG00000175198', 'ENSG00000248098', 'ENSG00000137992', 'ENSG00000105552', 'ENSG00000114054', 'ENSG00000146085', 'ENSG00000084754']
Num combs: 93
93


In [94]:
#Output file
output_file = 'C:/Users/Sigve/Genome_Data/results/model_tests/task_tests/degradation_test_{0}.tsv'.format(tissue)


test_df.loc[-1] = ['REF', [], []]

# Put REF on top
test_df.index = test_df.index + 1  # shifting index
test_df.sort_index(inplace=True)

start_time = time.time()
# FBA
results = parallelize_dataframe(test_df, partial(knockout_FBA_w_tasks, task_list, model_list), n_cores=2)

end_time = time.time()
print('Total time: %.6f seconds' % (end_time - start_time))



results['gene_ids'] = results['gene_ids'].apply(';'.join)
results['tasks_results'] = results['tasks_results'].apply(lambda x: x if not all(x) else ['ALL PASS'])

results.reset_index(inplace=True, drop=True)
results[['sample_ids', 'gene_ids', 'solution', 'tasks_results']].to_csv(path_or_buf=output_file, sep='\t')


Total time: 41.062735 seconds
