In [1]:
import cobra
import pandas as pd

In [2]:
model = cobra.io.load_json_model('./input/model/y834_hvd_v3_rba.json')

#### Automated assignments

In [5]:
df_uni = pd.read_excel('./output/enz_info_uniprot_step2.xlsx')
df_uni.index = df_uni.id.to_list()

In [6]:
cols = ['id', 'gpr', 'subloc_assigned', 'notes']
df_rxns = pd.DataFrame(columns=cols)

for rxn in model.reactions:
    if rxn.subsystem == 'Exchange':
        i = 'RXN-' + rxn.id + '-SPONT'
        df_rxns.loc[i, 'id'] = i
        df_rxns.loc[i, 'gpr'] = 'SPONT'
        continue
        
    genes = [g.id for g in rxn.genes]
    if len(genes) == 0:
        i = 'RXN-' + rxn.id + '-DUMMYENZ'
        df_rxns.loc[i, 'id'] = i
        df_rxns.loc[i, 'gpr'] = 'DUMMYENZ'
        #df_rxns.loc[i, 'notes'] = 'useDummyEnzyme'
        
    else:
        gpr = rxn.gene_reaction_rule
        # If not composed of different subunits
        if ' and ' not in gpr:
            for g in genes:
                if g == 'UNKNOWN':
                    i = 'RXN-' + rxn.id + '-DUMMYENZ'
                    df_rxns.loc[i, 'id'] = i
                    df_rxns.loc[i, 'gpr'] = 'DUMMYENZ'
                elif g in ['TRUE', 'SPONT']:
                    i = 'RXN-' + rxn.id + '-SPONT'
                    df_rxns.loc[i, 'id'] = i
                    df_rxns.loc[i, 'gpr'] = 'SPONT'
                else:
                    i = 'RXN-' + rxn.id + '-' + g
                    df_rxns.loc[i, 'id'] = i
                    df_rxns.loc[i, 'gpr'] = g
                    df_rxns.loc[i, 'subloc_assigned'] = df_uni.subloc_assigned[g]
                    if g in df_uni.index:
                        sublocs = df_uni.subloc_assigned[g].split(',')
                        if len(sublocs) > 1.5:
                            df_rxns.loc[i, 'notes'] = 'manualMultiCompartment'
                            
                    else:
                        df_rxns.loc[i, 'notes'] = 'missingEnzEntry'
                        
        # If composed of subunits
        else:
            i = 'RXN-' + rxn.id
            df_rxns.loc[i, 'id'] = i
            df_rxns.loc[i, 'gpr'] = rxn.gene_reaction_rule
            df_rxns.loc[i, 'notes'] = 'manualBuildSubunit'
            sublocs = sum([df_uni.subloc_assigned[g].split(',') for g in genes], [])
            sublocs = set(sublocs)
            df_rxns.loc[i, 'subloc_assigned'] = ','.join(sublocs)

#### Export entries required manual curation

#### Incorporated manual curation

In [7]:
df_cure = pd.read_excel('./curation/rxn2enz_curation.xlsx')

In [8]:
df_rxns2 = df_rxns.copy()
df_rxns2 = df_rxns2[df_rxns2.notes.isnull()]

if 'enz' not in df_rxns2.columns:
    df_rxns2.insert(2, 'enz', df_rxns2.gpr)

cols = ['gpr', 'enz', 'subloc_assigned', 'notes']
for i in df_cure.index:
    rxn = df_cure.id[i] + '-' + df_cure.enz[i]
    df_rxns2.loc[rxn, 'id'] = rxn
    df_rxns2.loc[rxn, cols] = df_cure.loc[i, cols]

#### Spliting reversible rxns into forward and reverse reactions

In [9]:
cols = ['id', 'rxn_src', 'enz', 'gpr']
df_rxns3 = pd.DataFrame(columns=cols)

rxns_model = [rxn.id for rxn in model.reactions]

for i in df_rxns2.index:
    entry = df_rxns2.id[i]
    header = entry.split('-')[0]
    rxnid = entry.split('-')[1]
    gid = entry[len(header)+len(rxnid)+2:]
    
    if rxnid not in rxns_model:
        continue
    
    rxn = model.reactions.get_by_id(rxnid)
    if rxn.upper_bound > 0:
        enew = 'RXN-' + rxnid + '_FWD' + '-' + gid
        df_rxns3.loc[enew, 'id'] = enew
        df_rxns3.loc[enew, 'rxn_src'] = rxn.id
        df_rxns3.loc[enew, 'enz'] = df_rxns2.enz[i]
        df_rxns3.loc[enew, 'gpr'] = df_rxns2.gpr[i]
    if rxn.lower_bound < 0:
        enew = 'RXN-' + rxnid + '_REV' + '-' + gid
        df_rxns3.loc[enew, 'id'] = enew
        df_rxns3.loc[enew, 'rxn_src'] = rxn.id
        df_rxns3.loc[enew, 'enz'] = df_rxns2.enz[i]
        df_rxns3.loc[enew, 'gpr'] = df_rxns2.gpr[i]

In [11]:
df_rxns3.to_excel('./output/rxns_enz.xlsx', index=False)