In [1]:
import pandas as pd
import numpy as np
import json
import pandas as pd
from cobra import Model, Reaction, Metabolite
from cobra.flux_analysis import flux_variability_analysis
import pickle
from tqdm import tqdm 
from ast import literal_eval
import cobra
import matplotlib.pyplot as plt
import numpy as np
from multiprocessing import Pool
from tqdm import tqdm
from functools import partial 
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.gridspec import GridSpec
from matplotlib.ticker import MaxNLocator, FormatStrFormatter
import multiprocessing as mp
import gurobipy
gurobipy.setParam('OutputFlag', 0)
import sys
# sys.path.append('../Script/')
from common import *


### input and output

In [2]:
###input
yeast870_path =  '../../Data/model/yeast-GEM.yml'
yeast8U_del_path = '../../Data/model/yeast8U_del_re.yml'
biolog_data_path = '../../Data/Biolog_Substrate.tsv'

###output
biolog_del_reaction_path = '../../Results/analysis/Biolog_del_reaction_re.csv'

In [3]:
def get_yeast8_id_from_name(name,model):
    met_id = ''
    if met_id == '':
        for i in model.metabolites:
            if i.name == name and i.compartment=='e':
                met_id = i.id
                break
    return met_id

In [4]:
def get_exchange_reaction(Target_met,model):
    if Target_met!='':
        exchange_reaction = ''
        for i in model.reactions:
            if Target_met in i.reaction and len(i.metabolites) == 1:
                exchange_reaction = i.id
        if exchange_reaction == '':
            reaction_name = 'DM_'+ Target_met
            reaction = Reaction(reaction_name)
            reaction.name = reaction_name
            reaction.subsystem = ''
            reaction.lower_bound = 0.  # This is the default
            reaction.upper_bound = 1000.  # This is the default

            reactant_met_num = {Target_met:-1}
            for met_id, coeff in reactant_met_num.items():
                met = model.metabolites.get_by_id(met_id)
                if met:
                    reaction.add_metabolites({met: coeff})
            model.add_reactions([reaction])  
            exchange_reaction = reaction_name     
        return(exchange_reaction)
    else:
        pass

In [5]:
def biology_simulation(Target_met,Substrate_type,model):
    if Target_met!='':
        if Substrate_type == 'C':
            with model:
                model.reactions.get_by_id('r_1714').bounds = (0,0)
                exchange_reaction = get_exchange_reaction(Target_met,model)
                model.reactions.get_by_id(exchange_reaction).bounds = (-5,1000)
                model.solver = 'gurobi'
                model.optimize()
                if model.reactions.get_by_id('r_2111').flux > 0.00001:
                    return 'G'
                else:
                    return 'NG'

        elif Substrate_type == 'N':
            with model:
                model.reactions.get_by_id('r_1654').bounds = (0,0)
                exchange_reaction = get_exchange_reaction(Target_met,model)
                model.reactions.get_by_id(exchange_reaction).bounds = (-5,1000)
                model.solver = 'gurobi'
                model.optimize()
                if model.reactions.get_by_id('r_2111').flux > 0.00001:
                    return 'G'
                else:
                    return 'NG'

        elif Substrate_type == 'P':
            with model:
                model.reactions.get_by_id('r_2005').bounds = (0,0)
                exchange_reaction = get_exchange_reaction(Target_met,model)
                model.reactions.get_by_id(exchange_reaction).bounds = (-5,1000)
                model.solver = 'gurobi'
                model.optimize()
                if model.reactions.get_by_id('r_2111').flux > 0.00001:
                    return 'G'
                else:
                    return 'NG'

        elif Substrate_type == 'S':
            with model:
                model.reactions.get_by_id('r_2060').bounds = (0,0)
                exchange_reaction = get_exchange_reaction(Target_met,model)
                model.reactions.get_by_id(exchange_reaction).bounds = (-5,1000)
                model.solver = 'gurobi'
                model.optimize()
                if model.reactions.get_by_id('r_2111').flux > 0.00001:
                    return 'G'
                else:
                    return 'NG'
        else:
            print('error')      
    else:
        return 'NG'

In [6]:
def yeast8_yeast8U_biolog_simulation(biolog_data_path,yeast870_path,yeast8U_path):
    yeast8 = cobra.io.load_yaml_model(yeast870_path)
    yeast8U = cobra.io.load_yaml_model(yeast8U_path)

    biolog_data = pd.read_csv(biolog_data_path,sep='\t')
    biolog_data['yeast8_id'] = biolog_data['Name_in_Model'].apply(lambda x:get_yeast8_id_from_name(x,yeast8))

    biolog_data['biology_simulation'] = biolog_data.apply(lambda x: biology_simulation(x['yeast8_id'], x['Substrate_type'], yeast8), axis=1)
    biolog_data['biology_simulation_plus'] = biolog_data.apply(lambda x: biology_simulation(x['yeast8_id'], x['Substrate_type'], yeast8U), axis=1)
    return biolog_data

In [7]:
biolog_data_with_simulation = yeast8_yeast8U_biolog_simulation(biolog_data_path,yeast870_path,yeast8U_del_path)
biolog_data_with_simulation.head(3)



Unnamed: 0,Substrate,Name_in_Model,Substrate_type,Growth_Biolog,Growth_Model,yeast8_id,biology_simulation,biology_simulation_plus
0,L-Malic Acid,(S)-malate,C,NG,G,s_0067,G,G
1,L-Glutamic Acid,L-glutamate,C,NG,G,s_0992,G,G
2,Thymidine,thymidine,C,NG,NG,s_1494,NG,G


# find FP in yeast8U but not in yeast8

In [8]:
error_FP = biolog_data_with_simulation[(biolog_data_with_simulation['Growth_Biolog']=='NG')&(biolog_data_with_simulation['biology_simulation_plus']=='G')&(biolog_data_with_simulation['biology_simulation']=='NG')]
error_FP = error_FP.reset_index(drop=True)
error_FP

Unnamed: 0,Substrate,Name_in_Model,Substrate_type,Growth_Biolog,Growth_Model,yeast8_id,biology_simulation,biology_simulation_plus
0,Thymidine,thymidine,C,NG,NG,s_1494,NG,G
1,Butyric Acid,butyrate,C,NG,NG,s_2822,NG,G
2,L-L eucine,L-leucine,C,NG,NG,s_1022,NG,G
3,L-Valine,L-valine,C,NG,NG,s_1057,NG,G
4,2-Aminoethanol (Ethanol amine),ethanolamine,C,NG,NG,s_0684,NG,G
5,D-Galacturonic Acid,D-galacturonate,C,NG,NG,s_0560,NG,G
6,L-Isoleucine,L-isoleucine,C,NG,NG,s_1017,NG,G
7,Putrescine,putrescine,C,NG,NG,s_1390,NG,G
8,Pectin,pectin,C,NG,NG,s_1309,NG,G
9,L-Phenylalanine,L-phenylalanine,C,NG,NG,s_1033,NG,G


In [9]:
# biolog_del_reaction = {'yeast8_id':[],
#                        'Substrate_type':[],
#                         'reaction':[]}

# for index,row in tqdm(error_FP.iterrows(),total=len(error_FP)):
#     yeast8U = cobra.io.load_yaml_model(yeast8U_del_path)    

#     yeast8_id = row['yeast8_id']
#     Substrate_type = row['Substrate_type']    
#     for i in yeast8U.reactions:
#         if 'rxn' in i.id:
#             i.bounds = (0,0)

#     error_reaction = []
#     for i in tqdm(yeast8U.reactions):
#         if 'rxn' in i.id:
#             i.bounds = (0,1000)
#             yeast8U.solver = 'gurobi'
#             if biology_simulation(yeast8_id, Substrate_type, yeast8U)=='G':
#                 i.bounds = (0,0)
#                 error_reaction.append(i.id)

#     biolog_del_reaction['yeast8_id'].append(yeast8_id)
#     biolog_del_reaction['Substrate_type'].append(Substrate_type)
#     biolog_del_reaction['reaction'].append(error_reaction)

In [10]:

# Define a function to process a single row
def process_row(index_row_tuple):
    index, row = index_row_tuple  # Unpack the tuple
    yeast8U = cobra.io.load_yaml_model(yeast8U_del_path)    

    yeast8_id = row['yeast8_id']
    Substrate_type = row['Substrate_type']    
    for i in yeast8U.reactions:
        if 'rxn' in i.id:
            i.bounds = (0,0)

    error_reaction = []
    for i in yeast8U.reactions:
        if 'rxn' in i.id:
            i.bounds = (0,1000)
            yeast8U.solver = 'gurobi'
            if biology_simulation(yeast8_id, Substrate_type, yeast8U) == 'G':
                i.bounds = (0,0)
                error_reaction.append(i.id)

    return yeast8_id, Substrate_type, error_reaction

# Initialize the dictionary
biolog_del_reaction = {'yeast8_id': [], 'Substrate_type': [], 'reaction': []}

# Use multiprocessing.Pool to parallelize the processing
def parallel_process(data):
    with mp.Pool(14) as pool:
        results = list(tqdm(pool.imap(process_row, data.iterrows()), total=len(data)))
    return results

# Get the results from parallel processing
results = parallel_process(error_FP)

# Add the results to the dictionary
for yeast8_id, Substrate_type, error_reaction in results:
    biolog_del_reaction['yeast8_id'].append(yeast8_id)
    biolog_del_reaction['Substrate_type'].append(Substrate_type)
    biolog_del_reaction['reaction'].append(error_reaction)

  0%|          | 0/15 [00:00<?, ?it/s]

100%|██████████| 15/15 [39:09<00:00, 156.62s/it]  


In [11]:
biolog_del_reaction_df = pd.DataFrame(biolog_del_reaction)
biolog_del_reaction_df['num'] = biolog_del_reaction_df['reaction'].apply(lambda x:len(x))
biolog_del_reaction_df

Unnamed: 0,yeast8_id,Substrate_type,reaction,num
0,s_1494,C,[rxn1014],1
1,s_2822,C,"[rxn23144, rxn9474]",2
2,s_1022,C,"[rxn33834, rxn33835, rxn33836, rxn33837, rxn33...",16
3,s_1057,C,"[rxn33834, rxn33835, rxn33836, rxn33837, rxn33...",16
4,s_0684,C,"[rxn37639, rxn390]",2
5,s_0560,C,"[rxn39714, rxn39734, rxn39735, rxn39736, rxn39...",47
6,s_1017,C,"[rxn33834, rxn33835, rxn33836, rxn33837, rxn33...",16
7,s_1390,C,"[rxn16677, rxn16678, rxn16679, rxn16680, rxn16...",29
8,s_1309,C,"[rxn39714, rxn39734, rxn39735, rxn39736, rxn39...",46
9,s_1033,C,"[rxn27095, rxn33834, rxn33835, rxn33836, rxn33...",39


In [12]:
biolog_del_reaction_df.to_csv(biolog_del_reaction_path,index=False)