In [1]:
import torch
from build_vocab import WordVocab
from pretrain_trfm import TrfmSeq2seq
from utils import split
# build_vocab, pretrain_trfm, utils packages are from SMILES Transformer
from transformers import T5EncoderModel, T5Tokenizer
# transformers package is from ProtTrans
import re
import gc
import numpy as np
import pandas as pd
import pickle
import math
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def smiles_to_vec(Smiles):
    pad_index = 0
    unk_index = 1
    eos_index = 2
    sos_index = 3
    mask_index = 4
    vocab = WordVocab.load_vocab('vocab.pkl')
    def get_inputs(sm):
        seq_len = 220
        sm = sm.split()
        if len(sm)>218:
            print('SMILES is too long ({:d})'.format(len(sm)))
            sm = sm[:109]+sm[-109:]
        ids = [vocab.stoi.get(token, unk_index) for token in sm]
        ids = [sos_index] + ids + [eos_index]
        seg = [1]*len(ids)
        padding = [pad_index]*(seq_len - len(ids))
        ids.extend(padding), seg.extend(padding)
        return ids, seg
    def get_array(smiles):
        x_id, x_seg = [], []
        for sm in smiles:
            a,b = get_inputs(sm)
            x_id.append(a)
            x_seg.append(b)
        return torch.tensor(x_id), torch.tensor(x_seg)
    trfm = TrfmSeq2seq(len(vocab), 256, len(vocab), 4)
    trfm.load_state_dict(torch.load('trfm_12_23000.pkl'))
    trfm.eval()
    x_split = [split(sm) for sm in Smiles]
    xid, xseg = get_array(x_split)
    X = trfm.encode(torch.t(xid))
    return X


def Seq_to_vec(Sequence):
    for i in range(len(Sequence)):
        if len(Sequence[i]) > 1000:
            Sequence[i] = Sequence[i][:500] + Sequence[i][-500:]
    sequences_Example = []
    for i in range(len(Sequence)):
        zj = ''
        for j in range(len(Sequence[i]) - 1):
            zj += Sequence[i][j] + ' '
        zj += Sequence[i][-1]
        sequences_Example.append(zj)
    ###### you should place downloaded model into this directory.
    tokenizer = T5Tokenizer.from_pretrained("./prot_t5_xl_uniref50", do_lower_case=False)
    model = T5EncoderModel.from_pretrained("./prot_t5_xl_uniref50")
    gc.collect()
    # print(torch.cuda.is_available())
    # 'cuda:0' if torch.cuda.is_available() else
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    model = model.eval()
    features = []
    for i in tqdm(range(len(sequences_Example))):
        # print('For sequence ', str(i+1))
        sequences_Example_i = sequences_Example[i]
        sequences_Example_i = [re.sub(r"[UZOB]", "X", sequences_Example_i)]
        ids = tokenizer.batch_encode_plus(sequences_Example_i, add_special_tokens=True, padding=True)
        input_ids = torch.tensor(ids['input_ids']).to(device)
        attention_mask = torch.tensor(ids['attention_mask']).to(device)
        with torch.no_grad():
            embedding = model(input_ids=input_ids, attention_mask=attention_mask)
        embedding = embedding.last_hidden_state.cpu().numpy()
        for seq_num in range(len(embedding)):
            seq_len = (attention_mask[seq_num] == 1).sum()
            seq_emd = embedding[seq_num][:seq_len - 1]
            features.append(seq_emd)
    features_normalize = np.zeros([len(features), len(features[0][0])], dtype=float)
    for i in range(len(features)):
        for k in range(len(features[0][0])):
            for j in range(len(features[i])):
                features_normalize[i][k] += features[i][j][k]
            features_normalize[i][k] /= len(features[i])
    return features_normalize


In [3]:
# sequences = ['MEDIPDTSRPPLKYVKGIPLIKYFAEALESLQDFQAQPDDLLISTYPKSGTTWVSEILDMIYQDGDVEKCRRAPVFIRVPFLEFKA',
#                 'PGIPTGLEVLKDTPAPRLIKTHLPLALLPQTLLDQKVKVVYVARNAKDVAVSYYHFYRMAKVHPDPDTWDSFLEKFMAGEVSYGSW',
#                 'YQHVQEWWELSHTHPVLYLFYEDMKENPKREIQKILKFVGRSLPEETVDLIVQHTSFKEMKNNSMANYTTLSPDIMDHSISAFMRK',
#                 'GISGDWKTTFTVAQNERFDADYAKKMEGCGLSFRTQL']
# Smiles = ['OC1=CC=C(C[C@@H](C(O)=O)N)C=C1','OC1=CC=C(C[C@@H](C(O)=O)N)C=C1','OC1=CC=C(C[C@@H](C(O)=O)N)C=C1','OC1=CC=C(C[C@@H](C(O)=O)N)C=C1']
# seq_vec = Seq_to_vec(sequences)
# smiles_vec = smiles_to_vec(Smiles)
# fused_vector = np.concatenate((smiles_vec, seq_vec), axis=1)

# ###### you should place downloaded model into this directory.
# # For kcat
# # with open('./unikp_model/UniKP for kcat.pkl', "rb") as f:
# #     model = pickle.load(f)
# # For Km
# with open('./unikp_model/UniKP for Km.pkl', "rb") as f:
#     model = pickle.load(f)
# # For kcat/Km
# # with open('./unikp_model/UniKP for kcat_Km.pkl', "rb") as f:
# #     model = pickle.load(f)

# Pre_label = model.predict(fused_vector)
# Pre_label_pow = [math.pow(10, Pre_label[i]) for i in range(len(Pre_label))]
# print(len(Pre_label_pow))
# res = pd.DataFrame({'sequences': sequences, 'Smiles': Smiles, 'Pre_label': Pre_label_pow})
# # res.to_excel('Kinetic_parameters_predicted_label.xlsx')
# res

In [4]:
# yeast8U_sequence_smiles_pre_path = '../Final-AIGEM-new/Data/kcat_km_predict/yeast8U_sequence_smiles_prepare.csv'
yeast8U_sequence_smiles_pre_path = '../../../Results/kcat_km_predict/yeast8U_sequence_smiles_prepare.csv'

In [5]:
KM_pre_df = pd.read_csv(yeast8U_sequence_smiles_pre_path)
# KM_pre_df = KM_pre_df[['id','gene','sub','Sequence']]
print(KM_pre_df.shape)
KM_pre_df.head()

(214244, 9)


Unnamed: 0,rea_id,gene,met_id,reactant_met_id,product_met_id,SMILES,reactant_SMILES,product_SMILES,Sequence
0,r_0001_1,YEL071W,s_0025,"['s_0025', 's_0709']","['s_0710', 's_1399']",CC(C(=O)O)O,"['CC(C(=O)O)O', 'CC1=C(C2=CC3=NC(=CC4=C(C(=C([...",['CC1=C(C2=CC3=NC(=CC4=C(C(=C([N-]4)C=C5C(=C(C...,MTAAHPVAQLTAEAYPKVKRNPNFKVLDSEDLAYFRSILSNDEILN...
1,r_0001_2,YEL071W,s_0709,"['s_0025', 's_0709']","['s_0710', 's_1399']",CC1=C(C2=CC3=NC(=CC4=C(C(=C([N-]4)C=C5C(=C(C(=...,"['CC(C(=O)O)O', 'CC1=C(C2=CC3=NC(=CC4=C(C(=C([...",['CC1=C(C2=CC3=NC(=CC4=C(C(=C([N-]4)C=C5C(=C(C...,MTAAHPVAQLTAEAYPKVKRNPNFKVLDSEDLAYFRSILSNDEILN...
2,r_0001_3,YJR048W,s_0025,"['s_0025', 's_0709']","['s_0710', 's_1399']",CC(C(=O)O)O,"['CC(C(=O)O)O', 'CC1=C(C2=CC3=NC(=CC4=C(C(=C([...",['CC1=C(C2=CC3=NC(=CC4=C(C(=C([N-]4)C=C5C(=C(C...,MTEFKAGSAKKGATLFKTRCLQCHTVEKGGPHKVGPNLHGIFGRHS...
3,r_0001_4,YJR048W,s_0709,"['s_0025', 's_0709']","['s_0710', 's_1399']",CC1=C(C2=CC3=NC(=CC4=C(C(=C([N-]4)C=C5C(=C(C(=...,"['CC(C(=O)O)O', 'CC1=C(C2=CC3=NC(=CC4=C(C(=C([...",['CC1=C(C2=CC3=NC(=CC4=C(C(=C([N-]4)C=C5C(=C(C...,MTEFKAGSAKKGATLFKTRCLQCHTVEKGGPHKVGPNLHGIFGRHS...
4,r_0001_5,YEL039C,s_0025,"['s_0025', 's_0709']","['s_0710', 's_1399']",CC(C(=O)O)O,"['CC(C(=O)O)O', 'CC1=C(C2=CC3=NC(=CC4=C(C(=C([...",['CC1=C(C2=CC3=NC(=CC4=C(C(=C([N-]4)C=C5C(=C(C...,MAKESTGFKPGSAKKGATLFKTRCQQCHTIEEGGPNKVGPNLHGIF...


In [6]:
smiles_list = list(set(KM_pre_df['SMILES'].to_list()))
enzyme_list = list(set(KM_pre_df['Sequence'].to_list()))
print(len(smiles_list))
print(len(enzyme_list))

1027
1622


In [7]:
smiles_vec = smiles_to_vec(smiles_list)
smiles_vec_dict = {}
for i in range(len(smiles_list)):
    smiles_vec_dict[smiles_list[i]] = smiles_vec[i]
smiles_vec_dict

There are 1027 molecules. It will take a little time.


{'CC(=CCCC(=CCCC(=CCCC(=CCCC(=CCCC(=CCC1=C(C(=CC=C1)OC)O)C)C)C)C)C)C': array([ 0.03863817,  0.20659895, -0.04326536, ...,  0.97974944,
         1.0670712 , -0.0949388 ], dtype=float32),
 'CCCCCCCCCC(O)CC(=O)[O-]': array([0.08345455, 0.13959883, 0.25500605, ..., 1.0371954 , 1.3222659 ,
        0.31826827], dtype=float32),
 'C(C1C(C(C(C(O1)OCC2C(C(C(O2)(CO)O)O)O)O)O)O)O': array([0.09878268, 0.5377554 , 0.08189436, ..., 0.17759223, 0.70945615,
        1.1639308 ], dtype=float32),
 'C(CCC(=O)N)CC(CCS)S': array([-0.09179875,  0.01198043, -0.04082174, ...,  1.0193456 ,
         0.5686278 ,  0.16115366], dtype=float32),
 '[NH3+]C(CC[SeH])C(=O)[O-]': array([ 0.03749036, -0.23413362,  0.21905157, ...,  1.2998874 ,
         0.6091315 ,  0.8932569 ], dtype=float32),
 'CCC(C)C(=O)C(=O)O': array([0.02233319, 0.06127566, 0.16168962, ..., 0.9680755 , 0.9973894 ,
        0.25349066], dtype=float32),
 'CCCCCCCCCC(=O)OC(CO)COP(=O)([O-])OC1C(O)C(O)C(O)C(O)C1O': array([0.13476415, 0.30553606, 0.02374757, 

In [8]:
enzyme_vec = Seq_to_vec(enzyme_list)
enzyme_vec_dict = {}
for i in range(len(enzyme_list)):
    enzyme_vec_dict[enzyme_list[i]] = enzyme_vec[i]
enzyme_vec_dict

Some weights of the model checkpoint at ./prot_t5_xl_uniref50 were not used when initializing T5EncoderModel: ['decoder.block.8.layer.2.layer_norm.weight', 'decoder.block.22.layer.0.SelfAttention.k.weight', 'decoder.block.16.layer.2.DenseReluDense.wo.weight', 'decoder.block.13.layer.1.EncDecAttention.k.weight', 'decoder.block.1.layer.2.layer_norm.weight', 'decoder.block.3.layer.1.EncDecAttention.k.weight', 'decoder.block.7.layer.2.DenseReluDense.wo.weight', 'decoder.block.6.layer.1.EncDecAttention.q.weight', 'decoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight', 'decoder.block.13.layer.0.layer_norm.weight', 'decoder.block.20.layer.2.DenseReluDense.wo.weight', 'decoder.block.11.layer.0.SelfAttention.k.weight', 'decoder.block.6.layer.1.layer_norm.weight', 'decoder.block.10.layer.0.SelfAttention.v.weight', 'decoder.block.15.layer.0.SelfAttention.o.weight', 'decoder.block.16.layer.1.EncDecAttention.v.weight', 'decoder.block.11.layer.1.EncDecAttention.k.weight', 'decoder.bl

{'MAVSKVYARSVYDSRGNPTVEVELTTEKGVFRSIVPSGASTGVHEALEMRDEDKSKWMGKGVMNAVNNVNNVIAAAFVKANLDVKDQKAVDDFLLSLDGTANKSKLGANAILGVSMAAARAAAAEKNVPLYQHLADLSKSKTSPYVLPVPFLNVLNGGSHAGGALALQEFMIAPTGAKTFAEAMRIGSEVYHNLKSLTKKRYGASAGNVGDEGGVAPNIQTAEEALDLIVDAIKAAGHDGKVKIGLDCASSEFFKDGKYDLDFKNPESDKSKWLTGVELADMYHSLMKRYPIVSIEDPFAEDDWEAWSHFFKTAGIQIVADDLTVTNPARIATAIEKKAADALLLKVNQIGTLSESIKAAQDSFAANWGVMVSHRSGETEDTFIADLVVGLRTGQIKTGAPARSERLAKLNQLLRIEEELGDKAVYAGENFHHGDKL': array([ 0.02164413, -0.00130453,  0.0053534 , ..., -0.03453084,
         0.00664847,  0.01829589]),
 'MPVCKNCHGTEFERDLSNANNDLVCKACGVVSEDNPIVSEVTFGETSAGAAVVQGSFIGAGQSHAAFGGSSALESREATLNNARRKLRAVSYALHIPEYITDAAFQWYKLALANNFVQGRRSQNVIASCLYVACRKEKTHHMLIDFSSRLQVSVYSIGATFLKMVKKLHITELPLADPSLFIQHFAEKLDLADKKIKVVKDAVKLAQRMSKDWMFEGRRPAGIAGACILLACRMNNLRRTHTEIVAVSHVAEETLQQRLNEFKNTKAAKLSVQKFRENDVEDGEARPPSFVKNRKKERKIKDSLDKEEMFQTSEEALNKNPILTQVLGEQELSSKEVLFYLKQFSERRARVVERIKATNGIDGENIYHEGSENETRKRKLSEVSIQNEHVEGEDKETEGTEEKVKKVKTKTSEEKKENESGHFQDAIDGYSLETDPYCPRNLHLLPTTDTYLSKVS

In [9]:
KM_pre_df['enzyme_vec'] = KM_pre_df['Sequence'].apply(lambda x: enzyme_vec_dict[x] if x in enzyme_vec_dict.keys() else '')
print(KM_pre_df.shape)
KM_pre_df = KM_pre_df[KM_pre_df['enzyme_vec']!='']
print(KM_pre_df.shape)
KM_pre_df.head()

(214244, 10)
(201634, 10)


  result = libops.scalar_compare(x.ravel(), y, op)


Unnamed: 0,rea_id,gene,met_id,reactant_met_id,product_met_id,SMILES,reactant_SMILES,product_SMILES,Sequence,enzyme_vec
0,r_0001_1,YEL071W,s_0025,"['s_0025', 's_0709']","['s_0710', 's_1399']",CC(C(=O)O)O,"['CC(C(=O)O)O', 'CC1=C(C2=CC3=NC(=CC4=C(C(=C([...",['CC1=C(C2=CC3=NC(=CC4=C(C(=C([N-]4)C=C5C(=C(C...,MTAAHPVAQLTAEAYPKVKRNPNFKVLDSEDLAYFRSILSNDEILN...,"[0.03586542009332099, 0.03176993108480665, 0.0..."
1,r_0001_2,YEL071W,s_0709,"['s_0025', 's_0709']","['s_0710', 's_1399']",CC1=C(C2=CC3=NC(=CC4=C(C(=C([N-]4)C=C5C(=C(C(=...,"['CC(C(=O)O)O', 'CC1=C(C2=CC3=NC(=CC4=C(C(=C([...",['CC1=C(C2=CC3=NC(=CC4=C(C(=C([N-]4)C=C5C(=C(C...,MTAAHPVAQLTAEAYPKVKRNPNFKVLDSEDLAYFRSILSNDEILN...,"[0.03586542009332099, 0.03176993108480665, 0.0..."
2,r_0001_3,YJR048W,s_0025,"['s_0025', 's_0709']","['s_0710', 's_1399']",CC(C(=O)O)O,"['CC(C(=O)O)O', 'CC1=C(C2=CC3=NC(=CC4=C(C(=C([...",['CC1=C(C2=CC3=NC(=CC4=C(C(=C([N-]4)C=C5C(=C(C...,MTEFKAGSAKKGATLFKTRCLQCHTVEKGGPHKVGPNLHGIFGRHS...,"[0.011968037550371789, 0.08801914164396628, -0..."
3,r_0001_4,YJR048W,s_0709,"['s_0025', 's_0709']","['s_0710', 's_1399']",CC1=C(C2=CC3=NC(=CC4=C(C(=C([N-]4)C=C5C(=C(C(=...,"['CC(C(=O)O)O', 'CC1=C(C2=CC3=NC(=CC4=C(C(=C([...",['CC1=C(C2=CC3=NC(=CC4=C(C(=C([N-]4)C=C5C(=C(C...,MTEFKAGSAKKGATLFKTRCLQCHTVEKGGPHKVGPNLHGIFGRHS...,"[0.011968037550371789, 0.08801914164396628, -0..."
4,r_0001_5,YEL039C,s_0025,"['s_0025', 's_0709']","['s_0710', 's_1399']",CC(C(=O)O)O,"['CC(C(=O)O)O', 'CC1=C(C2=CC3=NC(=CC4=C(C(=C([...",['CC1=C(C2=CC3=NC(=CC4=C(C(=C([N-]4)C=C5C(=C(C...,MAKESTGFKPGSAKKGATLFKTRCQQCHTIEEGGPNKVGPNLHGIF...,"[0.010595964587219389, 0.0727967328795817, -0...."


In [10]:
KM_pre_df['smiles_vec'] = KM_pre_df['SMILES'].apply(lambda x: smiles_vec_dict[x] if x in smiles_vec_dict.keys() else '')
print(KM_pre_df.shape)
KM_pre_df = KM_pre_df[KM_pre_df['smiles_vec']!='']
print(KM_pre_df.shape)
KM_pre_df.head()

  result = libops.scalar_compare(x.ravel(), y, op)


(201634, 11)
(201634, 11)


Unnamed: 0,rea_id,gene,met_id,reactant_met_id,product_met_id,SMILES,reactant_SMILES,product_SMILES,Sequence,enzyme_vec,smiles_vec
0,r_0001_1,YEL071W,s_0025,"['s_0025', 's_0709']","['s_0710', 's_1399']",CC(C(=O)O)O,"['CC(C(=O)O)O', 'CC1=C(C2=CC3=NC(=CC4=C(C(=C([...",['CC1=C(C2=CC3=NC(=CC4=C(C(=C([N-]4)C=C5C(=C(C...,MTAAHPVAQLTAEAYPKVKRNPNFKVLDSEDLAYFRSILSNDEILN...,"[0.03586542009332099, 0.03176993108480665, 0.0...","[0.047971945, 0.18148443, 0.18118219, 0.362441..."
1,r_0001_2,YEL071W,s_0709,"['s_0025', 's_0709']","['s_0710', 's_1399']",CC1=C(C2=CC3=NC(=CC4=C(C(=C([N-]4)C=C5C(=C(C(=...,"['CC(C(=O)O)O', 'CC1=C(C2=CC3=NC(=CC4=C(C(=C([...",['CC1=C(C2=CC3=NC(=CC4=C(C(=C([N-]4)C=C5C(=C(C...,MTAAHPVAQLTAEAYPKVKRNPNFKVLDSEDLAYFRSILSNDEILN...,"[0.03586542009332099, 0.03176993108480665, 0.0...","[0.07785472, -0.1687159, -0.32547167, 0.842465..."
2,r_0001_3,YJR048W,s_0025,"['s_0025', 's_0709']","['s_0710', 's_1399']",CC(C(=O)O)O,"['CC(C(=O)O)O', 'CC1=C(C2=CC3=NC(=CC4=C(C(=C([...",['CC1=C(C2=CC3=NC(=CC4=C(C(=C([N-]4)C=C5C(=C(C...,MTEFKAGSAKKGATLFKTRCLQCHTVEKGGPHKVGPNLHGIFGRHS...,"[0.011968037550371789, 0.08801914164396628, -0...","[0.047971945, 0.18148443, 0.18118219, 0.362441..."
3,r_0001_4,YJR048W,s_0709,"['s_0025', 's_0709']","['s_0710', 's_1399']",CC1=C(C2=CC3=NC(=CC4=C(C(=C([N-]4)C=C5C(=C(C(=...,"['CC(C(=O)O)O', 'CC1=C(C2=CC3=NC(=CC4=C(C(=C([...",['CC1=C(C2=CC3=NC(=CC4=C(C(=C([N-]4)C=C5C(=C(C...,MTEFKAGSAKKGATLFKTRCLQCHTVEKGGPHKVGPNLHGIFGRHS...,"[0.011968037550371789, 0.08801914164396628, -0...","[0.07785472, -0.1687159, -0.32547167, 0.842465..."
4,r_0001_5,YEL039C,s_0025,"['s_0025', 's_0709']","['s_0710', 's_1399']",CC(C(=O)O)O,"['CC(C(=O)O)O', 'CC1=C(C2=CC3=NC(=CC4=C(C(=C([...",['CC1=C(C2=CC3=NC(=CC4=C(C(=C([N-]4)C=C5C(=C(C...,MAKESTGFKPGSAKKGATLFKTRCQQCHTIEEGGPNKVGPNLHGIF...,"[0.010595964587219389, 0.0727967328795817, -0....","[0.047971945, 0.18148443, 0.18118219, 0.362441..."


In [11]:
KM_pre_df_enzyme_vec = KM_pre_df['enzyme_vec'].to_list()
KM_pre_df_smiles_vec = KM_pre_df['smiles_vec'].to_list()

In [12]:
fused_vector = np.concatenate((KM_pre_df_smiles_vec, KM_pre_df_enzyme_vec), axis=1)

In [13]:
with open('./unikp_model/UniKP for Km.pkl', "rb") as f:
    model = pickle.load(f)
Pre_label = model.predict(fused_vector)
Pre_label_pow = [math.pow(10, Pre_label[i]) for i in range(len(Pre_label))]
KM_pre_df['km'] = Pre_label_pow
print(len(Pre_label_pow))

https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


201634


In [14]:
with open('./unikp_model/UniKP for kcat.pkl', "rb") as f:
    model = pickle.load(f)
Pre_label = model.predict(fused_vector)
Pre_label_pow = [math.pow(10, Pre_label[i]) for i in range(len(Pre_label))]
KM_pre_df['kcat'] = Pre_label_pow
print(len(Pre_label_pow))

https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


201634


In [15]:
# KM_pre_df.to_csv('./result/yeast8U_unikp.csv')
KM_pre_df.to_csv('../../../Results/kcat_km_predict/yeast8U_unikp.csv')
KM_pre_df

Unnamed: 0,rea_id,gene,met_id,reactant_met_id,product_met_id,SMILES,reactant_SMILES,product_SMILES,Sequence,enzyme_vec,smiles_vec,km,kcat
0,r_0001_1,YEL071W,s_0025,"['s_0025', 's_0709']","['s_0710', 's_1399']",CC(C(=O)O)O,"['CC(C(=O)O)O', 'CC1=C(C2=CC3=NC(=CC4=C(C(=C([...",['CC1=C(C2=CC3=NC(=CC4=C(C(=C([N-]4)C=C5C(=C(C...,MTAAHPVAQLTAEAYPKVKRNPNFKVLDSEDLAYFRSILSNDEILN...,"[0.03586542009332099, 0.03176993108480665, 0.0...","[0.047971945, 0.18148443, 0.18118219, 0.362441...",0.523190,3.539613
1,r_0001_2,YEL071W,s_0709,"['s_0025', 's_0709']","['s_0710', 's_1399']",CC1=C(C2=CC3=NC(=CC4=C(C(=C([N-]4)C=C5C(=C(C(=...,"['CC(C(=O)O)O', 'CC1=C(C2=CC3=NC(=CC4=C(C(=C([...",['CC1=C(C2=CC3=NC(=CC4=C(C(=C([N-]4)C=C5C(=C(C...,MTAAHPVAQLTAEAYPKVKRNPNFKVLDSEDLAYFRSILSNDEILN...,"[0.03586542009332099, 0.03176993108480665, 0.0...","[0.07785472, -0.1687159, -0.32547167, 0.842465...",0.026891,5.190370
2,r_0001_3,YJR048W,s_0025,"['s_0025', 's_0709']","['s_0710', 's_1399']",CC(C(=O)O)O,"['CC(C(=O)O)O', 'CC1=C(C2=CC3=NC(=CC4=C(C(=C([...",['CC1=C(C2=CC3=NC(=CC4=C(C(=C([N-]4)C=C5C(=C(C...,MTEFKAGSAKKGATLFKTRCLQCHTVEKGGPHKVGPNLHGIFGRHS...,"[0.011968037550371789, 0.08801914164396628, -0...","[0.047971945, 0.18148443, 0.18118219, 0.362441...",0.468249,8.048236
3,r_0001_4,YJR048W,s_0709,"['s_0025', 's_0709']","['s_0710', 's_1399']",CC1=C(C2=CC3=NC(=CC4=C(C(=C([N-]4)C=C5C(=C(C(=...,"['CC(C(=O)O)O', 'CC1=C(C2=CC3=NC(=CC4=C(C(=C([...",['CC1=C(C2=CC3=NC(=CC4=C(C(=C([N-]4)C=C5C(=C(C...,MTEFKAGSAKKGATLFKTRCLQCHTVEKGGPHKVGPNLHGIFGRHS...,"[0.011968037550371789, 0.08801914164396628, -0...","[0.07785472, -0.1687159, -0.32547167, 0.842465...",0.019475,3.193169
4,r_0001_5,YEL039C,s_0025,"['s_0025', 's_0709']","['s_0710', 's_1399']",CC(C(=O)O)O,"['CC(C(=O)O)O', 'CC1=C(C2=CC3=NC(=CC4=C(C(=C([...",['CC1=C(C2=CC3=NC(=CC4=C(C(=C([N-]4)C=C5C(=C(C...,MAKESTGFKPGSAKKGATLFKTRCQQCHTIEEGGPNKVGPNLHGIF...,"[0.010595964587219389, 0.0727967328795817, -0....","[0.047971945, 0.18148443, 0.18118219, 0.362441...",0.401951,15.595124
...,...,...,...,...,...,...,...,...,...,...,...,...,...
214237,rxn1363_136,YOR311C,s_3958,"['s_3958', 'sn_22']","['s_0188', 's_3998']",C(C(C(=O)[O-])O)O,"['C(C(C(=O)[O-])O)O', 'O=P(O)(O)OCC1OC(O[C@]2(...","['C(C(C(=O)O)OP(=O)(O)O)O', 'C(C1C(C(C(C(O1)OC...",MGTEDAIALPNSTLEPRTEAKQRLSSKSHQVSAKVTIPAKEEISSS...,"[0.008279101800470581, -0.04217020960621022, 0...","[0.13983586, -0.104135334, 0.2159923, 0.740510...",0.292814,5.826829
214240,rxn1363_139,YDR051C,sn_22,"['s_3958', 'sn_22']","['s_0188', 's_3998']",O=P(O)(O)OCC1OC(O[C@]2(CO)OC(CO)C(O)C2O)C(O)C(...,"['C(C(C(=O)[O-])O)O', 'O=P(O)(O)OCC1OC(O[C@]2(...","['C(C(C(=O)O)OP(=O)(O)O)O', 'C(C1C(C(C(C(O1)OC...",MCEENVHVSEDVAGSHGSFTNARPRLIVLIRHGESESNKNKEVNGY...,"[0.0271220527978977, 0.012314955590112961, -0....","[0.15487777, 0.2238443, 0.023671307, 0.0422015...",0.106773,1.251718
214241,rxn1363_140,YDR051C,s_3958,"['s_3958', 'sn_22']","['s_0188', 's_3998']",C(C(C(=O)[O-])O)O,"['C(C(C(=O)[O-])O)O', 'O=P(O)(O)OCC1OC(O[C@]2(...","['C(C(C(=O)O)OP(=O)(O)O)O', 'C(C1C(C(C(C(O1)OC...",MCEENVHVSEDVAGSHGSFTNARPRLIVLIRHGESESNKNKEVNGY...,"[0.0271220527978977, 0.012314955590112961, -0....","[0.13983586, -0.104135334, 0.2159923, 0.740510...",0.587472,1.991955
214242,rxn1363_141,YAL038W,sn_22,"['s_3958', 'sn_22']","['s_0188', 's_3998']",O=P(O)(O)OCC1OC(O[C@]2(CO)OC(CO)C(O)C2O)C(O)C(...,"['C(C(C(=O)[O-])O)O', 'O=P(O)(O)OCC1OC(O[C@]2(...","['C(C(C(=O)O)OP(=O)(O)O)O', 'C(C1C(C(C(C(O1)OC...",MSRLERLTSLNVVAGSDLRRTSIIGTIGPKTNNPETLVALRKAGLN...,"[0.043762888965080494, 0.012939620855264365, -...","[0.15487777, 0.2238443, 0.023671307, 0.0422015...",0.104253,17.532364
