# Search through gnps libraries to select dataset

Starting from gnps public libraries (speclibs folders, version from 5 July 2019), we here create one large MGF file with all spectra.
In addition Smiles or Inchi's are added from metadata file.

In [1]:
# Import
import json
import os

In [2]:
# Locations
ROOT = "C:\\OneDrive - Netherlands eScience Center\\Project_Wageningen_iOMEGA"
PATH_MS_DATA = ROOT + "\\Data\\GNPS_all\\"
PATH_SAVE_MODEL = ROOT + "\\Spec2Vec\\models_trained\\"
PATH_SAVE_DATA = ROOT + "\\Spec2Vec\\data\\"
PATH_SPEC2VEC = ROOT + "\\Spec2Vec\\"

## Importing additional metadata

In [6]:
json_file = PATH_MS_DATA + 'gnpslibraryjson_filtered.json'

with open(json_file, 'rb') as f:
    data = json.load(f)

In [7]:
print("Metadata available for", len(data), "spectra.")

Metadata available for 77253 spectra.


In [8]:
gnps_data = {}
for data_entry in data:
    if data_entry['GNPSID'] in gnps_data:
        print('duplicate!')
    gnps_data[data_entry['GNPSID']] = data_entry

## Briefly inspect metadata

In [9]:
keys = []
for key in gnps_data.keys():
    keys.append(key)

In [10]:
keys[0]

'CCMSLIB00000001548'

In [11]:
gnps_data[keys[0]]

{'COMPOUND_INCHIKEY': 'KNGPFNUOXXLKCN-ZNCJFREWSA-N',
 'COMPOUND_INCHI': 'InChI=1S/C45H73N5O10S3/c1-14-17-24(6)34(52)26(8)37-25(7)30(58-13)18-31-46-29(19-61-31)39-49-45(12,21-62-39)43-50-44(11,20-63-43)42(57)48-32(22(4)15-2)35(53)27(9)40(55)59-36(23(5)16-3)38(54)47-33(28(10)51)41(56)60-37/h19,22-28,30,32-37,51-53H,14-18,20-21H2,1-13H3,(H,47,54)(H,48,57)/t22-,23-,24+,25-,26-,27+,28+,30-,32-,33-,34-,35-,36-,37-,44+,45+/m0/s1',
 'LIBRARY_QUALITY': '1',
 'GNPSID': 'CCMSLIB00000001548',
 'COMPOUND_NAME': 'Hoiamide B',
 'COMPOUND_SMILES': 'CCC[C@@H](C)[C@@H]([C@H](C)[C@@H]1[C@H]([C@H](Cc2nc(cs2)C3=N[C@](CS3)(C4=N[C@](CS4)(C(=O)N[C@H]([C@H]([C@H](C(=O)O[C@H](C(=O)N[C@H](C(=O)O1)[C@@H](C)O)[C@@H](C)CC)C)O)[C@@H](C)CC)C)C)OC)C)O'}

## Functions

In [3]:
def check_spectrum_lines(spectrum):
    """ Check if desired metadata is present for spectrum.
    """
    spec_id = None
    smiles = None
    
    for l in spectrum:
        if l.startswith('SPECTRUMID'):
            spec_id = l.strip().split('=')[1]
        if l.startswith('SMILES'):
            smiles = l.strip().split('=')[1]
    
    if spec_id in gnps_data:
        return True
    else:
        return False
    if smiles is not None \
        and smiles.strip() != 'N/A' \
        and smiles.strip() != 'NA'\
        and len(smiles) > 0 \
        and smiles.strip() != 'InChI':
        return True

    return False

def augment_spectrum(spectrum):
    """ Add metadata to spectrum (taken from json file, see above).
    """
    for l in spectrum:
        if l.startswith('SPECTRUMID'):
            spectrum_id = l.strip().split('=')[1]
                
    new_spectrum = []
    for l in spectrum:
        if l.startswith('BEGIN ION'):
            new_spectrum.append(l)
            new_spectrum.append('TITLE=%s\n' % gnps_data[spectrum_id]['COMPOUND_NAME'])
            new_spectrum.append('LIBRARYQUALITY=%s\n' % gnps_data[spectrum_id]['LIBRARY_QUALITY'])
            new_spectrum.append('SMILES=%s\n' % gnps_data[spectrum_id]['COMPOUND_SMILES'])
            new_spectrum.append('INCHIKEY=%s\n' % gnps_data[spectrum_id]['COMPOUND_INCHIKEY'])
            new_spectrum.append('INCHI=%s\n' % gnps_data[spectrum_id]['COMPOUND_INCHI'])
            continue
            
        if l.startswith('INCHIKEY'):
            continue
        elif l.startswith('SMILES'):
            continue
        elif l.startswith('INCHI'):
            continue
        elif l.startswith('LIBRARYQUALITY'):
            continue
        elif l.startswith('TITLE'):
            continue
            
        new_spectrum.append(l)

    return new_spectrum

def match_spectra_in_file(filename, matching=True):
    """ Collect spectra in file (line by line). Check if matching metadata is available.
    
    matching: bool
        If matching = True than only include spectra for which desired metadata is present.
        For matching = False, include all spectra.
    """
    matched_spectra = []
    with open(filename) as f:
        for line in f.readlines():
            if line.startswith('BEGIN ION'):
                # Fresh spectrum
                spectrum = []
            spectrum.append(line)
            
            if line.startswith('END ION'):
                
                if check_spectrum_lines(spectrum): # Check if we have structural info
                    try:
                        spectrum = augment_spectrum(spectrum)
                    except KeyError as e:
                        pass
                    matched_spectra.append(spectrum)
                else: 
                    if not matching: # then include all spectra
                        matched_spectra.append(spectrum)
    return matched_spectra

## Create MGF for all spectra (also non matched ones)

In [13]:
path = PATH_MS_DATA + 'speclibs\\'
collected_spectra = []
for d in os.listdir(path):
    for f in os.listdir(os.path.join(path, d)):
        if not f.endswith('.mgf'):
            continue
        sp = match_spectra_in_file(os.path.join(path, d, f), matching=False)
        collected_spectra.extend(sp)
        print(f, len(collected_spectra))

BILELIB19.mgf 177
CASMI.mgf 745
DEREPLICATOR_IDENTIFIED_LIBRARY.mgf 1124
GNPS-COLLECTIONS-MISC.mgf 1170
GNPS-COLLECTIONS-PESTICIDES-NEGATIVE.mgf 1246
GNPS-COLLECTIONS-PESTICIDES-POSITIVE.mgf 1899
GNPS-EMBL-MCF.mgf 2484
GNPS-FAULKNERLEGACY.mgf 2611
GNPS-LIBRARY.mgf 7394
GNPS-NIH-CLINICALCOLLECTION1.mgf 7771
GNPS-NIH-CLINICALCOLLECTION2.mgf 7966
GNPS-NIH-NATURALPRODUCTSLIBRARY.mgf 9233
GNPS-NIH-NATURALPRODUCTSLIBRARY_ROUND2_NEGATIVE.mgf 11096
GNPS-NIH-NATURALPRODUCTSLIBRARY_ROUND2_POSITIVE.mgf 16892
GNPS-NIH-SMALLMOLECULEPHARMACOLOGICALLYACTIVE.mgf 18352
GNPS-NIST14-MATCHES.mgf 24115
GNPS-PRESTWICKPHYTOCHEM.mgf 24258
GNPS-SELLECKCHEM-FDA-PART1.mgf 26646
GNPS-SELLECKCHEM-FDA-PART2.mgf 27302
HMDB.mgf 29537
LDB_NEGATIVE.mgf 29763
LDB_POSITIVE.mgf 29846
MASSBANK.mgf 41845
MASSBANKEU.mgf 43337
MIADB.mgf 43509
MMV_NEGATIVE.mgf 43556
MMV_POSITIVE.mgf 43666
MONA.mgf 92907
PNNL-LIPIDS-NEGATIVE.mgf 109049
PNNL-LIPIDS-POSITIVE.mgf 139631
RESPECT.mgf 146743
SUMNER.mgf 147647


In [14]:
len(collected_spectra)

147647

# Convert smiles into inchi where necessary

In [15]:
from rdkit import Chem

def smiles_to_inchi(smiles):
    mol = Chem.MolFromSmiles(smiles)
    try: 
        inchi = Chem.inchi.MolToInchi(mol)
    except:
        print("Error for smiles:", smiles)
        inchi = "N/A"
    return inchi

def inchi_to_inchikey(inchi):
    try:
        inchikey = Chem.inchi.InchiToInchiKey(inchi)
    except:
        print("Error for inchi to inchikey:", inchi)
        inchikey = "N/A"   
    return inchikey

In [16]:
# to "save" for next testing 
#collected_spectra_save = collected_spectra.copy()

In [17]:
#collected_spectra = collected_spectra_save.copy()

### Make inchi strings more homogeneous in style

In [18]:
for i, spectrum in enumerate(collected_spectra):
    inchi = 'N/A'
    
    spectrum_new = []
    for l in spectrum:
        if l.startswith('INCHI='):
            inchi = l.strip().split('INCHI=')[1]
            
            if inchi is None \
                or inchi in ['N/A', 'NA', 0, '0'] \
                or len(inchi) == 0:
                continue
            else:
                if (inchi.startswith('"InChI=') and inchi.endswith('"')):
                    pass
                elif (inchi.startswith('InChI=')):
                    inchi = '"' + inchi + '"'
                elif inchi.endswith('"'):
                    inchi = '"InChI=' + inchi
                else:
                    inchi = '"InChI=' + inchi + '"'
                l = 'INCHI=%s\n' % inchi
        spectrum_new.append(l)
    collected_spectra[i] = spectrum_new 

In [20]:
collected_spectra[0]

['BEGIN IONS\n',
 'PEPMASS=391.285\n',
 'CHARGE=1\n',
 'MSLEVEL=2\n',
 'SOURCE_INSTRUMENT=ESI-Orbitrap\n',
 'FILENAME=Standards_n_1ugmL_chenodeoxycholic.mzXML\n',
 'SEQ=*..*\n',
 'IONMODE=Negative\n',
 'ORGANISM=BILELIB19\n',
 'NAME=chenodeoxycholic acid M-H\n',
 'PI=Dorrestein\n',
 'DATACOLLECTOR=A. Jarmusch\n',
 'SMILES=C[C@H](CCC(=O)O)[C@H]1CC[C@@H]2[C@@]1(CC[C@H]3[C@H]2[C@@H](C[C@H]4[C@@]3(CC[C@H](C4)O)C)O)C\n',
 'INCHI="InChI=1S/C24H40O4/c1-14(4-7-21(27)28)17-5-6-18-22-19(9-11-24(17,18)3)23(2)10-8-16(25)12-15(23)13-20(22)26/h14-20,22,25-26H,4-13H2,1-3H3,(H,27,28)/t14?,15-,16+,17+,18-,19-,20+,22-,23?,24?/m0/s1"\n',
 'INCHIAUX=N/A\n',
 'PUBMED=N/A\n',
 'SUBMITUSER=mpanitchpakdi\n',
 'TAGS=\n',
 'LIBRARYQUALITY=1\n',
 'SPECTRUMID=CCMSLIB00005435444\n',
 'SCANS=1688\n',
 '51.787151\t18114.681641\n',
 '58.22208\t18008.097656\n',
 '71.187103\t21309.207031\n',
 '75.990234\t20840.488281\n',
 '80.070091\t20326.140625\n',
 '91.880928\t23875.916016\n',
 '95.083244\t23740.320312\n',
 '105.069

In [22]:
# save data here
import pickle
filename = 'collected_spectra_lst'

with open(filename, "wb") as output:
    pickle.dump(collected_spectra, output, pickle.HIGHEST_PROTOCOL);

In [27]:
import pickle
filename = 'collected_spectra_lst'
collected_spectra = pickle.load(open(filename, "rb"))

### Search for spectra with smiles but no inchi (and try to convert smiles to inchi)

In [29]:
"""old version
inchi_lst = []
inchikey_lst = []
smiles_lst = []

conv = ob.OBConversion()
conv.SetInAndOutFormats("inchi", "inchikey")
mol = ob.OBMol()

changes_to_spectra = []
for i, spectrum in enumerate(collected_spectra):
    smiles = 'N/A'
    inchi = 'N/A'
    inchikey = 'N/A'
    modified = False
    #spectrum_new = []
    
    inchi_line = 0
    inchikey_line = 0
    inchiaux_line = 0
    for m, l in enumerate(spectrum):
        if l.startswith('SMILES'):
            smiles = l.strip().split('SMILES=')[1]
        if l.startswith('INCHI='):
            inchi = l.strip().split('INCHI=')[1]
            inchi_line = m
        if l.startswith('INCHIKEY='):
            inchikey = l.strip().split('INCHIKEY=')[1]
            inchikey_line = m
        if l.startswith('INCHIAUX='):
            inchiaux_line = m
        if inchikey_line == 0:
            if inchiaux_line == 0:
                spectrum.insert(inchikey_line+1, 'INCHIKEY=N/A\n')
            else:
                spectrum[inchiaux_line] = 'INCHIKEY=N/A\n'           
        
    # Add to lists
    smiles_lst.append(smiles)
    inchi_lst.append(inchi)
    inchikey_lst.append(inchikey)
    
    # Look for cases with smiles BUT NO inchi 
    if inchi is None \
        or inchi in ['N/A', 'NA', 0, '0'] \
        or len(inchi) == 0:
            if smiles is not None \
                and smiles not in ['N/A', 'NA', 0, '0', 'InChI'] \
                and len(smiles) > 0:  
                    for m, l in enumerate(spectrum):
                        if l.startswith('INCHI='):
                            inchi = smiles_to_inchi(smiles)
                            l = 'INCHI=%s\n' % inchi
                            spectrum[m] = l
                            modified = True
                            inchi_lst[-1] = inchi 
                            continue #assuming there is only one line with inchi

    # Look for missing inchikeys
    if inchikey is None \
        or inchikey in ['N/A', 'NA', 0, '0'] \
        or len(inchikey) == 0:
            if len(inchi) > 5:
                conv.ReadString(mol, inchi_lst[741])
                found_inchikey = conv.WriteString(mol)
                #found_inchikey = inchi_to_inchikey(inchi)
                for m, l in enumerate(spectrum):
                    if l.startswith('INCHIKEY='):
                        l = 'INCHIKEY=%s\n' % found_inchikey
                        spectrum[m] = l
                        modified = True
                        inchikey_lst[-1] = found_inchikey

    # Replace spectrum with modified one                                 
    if modified:                                       
        collected_spectra[i] = spectrum                  
        #print("Added inchi or inchikey to spectrum", i, inchi_lst[-1], inchikey_lst[-1])
        changes_to_spectra.append((i, inchi_lst[-1], inchikey_lst[-1]))
        """

In [28]:
from openbabel import openbabel as ob
from openbabel import pybel

inchi_lst = []
inchikey_lst = []
smiles_lst = []

inchi_to_keys = ob.OBConversion()
inchi_to_keys.SetInAndOutFormats("inchi", "inchikey")
mol = ob.OBMol()
smiles_to_inchi = ob.OBConversion()
smiles_to_inchi.SetInAndOutFormats("smi", "inchi")
#conv.SetOptions("K", conv.OUTOPTIONS)

changes_to_spectra = []
for i, spectrum in enumerate(collected_spectra[:5000]):
    # show progress
    if i // 10000 == i/10000:
        print("running spectra no", i)
    smiles = 'N/A'
    inchi = 'N/A'
    inchikey = 'N/A'
    modified = False
    #spectrum_new = []
    
    smiles_line = 0
    inchi_line = 0
    inchikey_line = 0
    inchiaux_line = 0
    for m, l in enumerate(spectrum):
        if l.startswith('SMILES'):
            smiles = l.strip().split('SMILES=')[1]
            smiles_line = 0
        if l.startswith('INCHI='):
            inchi = l.strip().split('INCHI=')[1]
            inchi_line = m
        if l.startswith('INCHIKEY='):
            inchikey = l.strip().split('INCHIKEY=')[1]
            inchikey_line = m
        if l.startswith('INCHIAUX='):
            inchiaux_line = m
            
    if inchi_line == 0: # Add inchi line if not present
        spectrum.insert(smiles_line+1, 'INCHI=N/A\n')

    if inchikey_line == 0: # Add inchikey line if not present
        if inchiaux_line == 0:
            spectrum.insert(inchi_line+1, 'INCHIKEY=N/A\n')
        else:
            spectrum[inchiaux_line] = 'INCHIKEY=N/A\n'           
        
    # Add to lists
    smiles_lst.append(smiles)
    inchi_lst.append(inchi)
    inchikey_lst.append(inchikey)
    
    # Look for cases with smiles BUT NO inchi 
    if inchi is None \
        or inchi in ['N/A', 'NA', 0, '0'] \
        or len(inchi) == 0:
            if smiles is not None \
                and smiles not in ['N/A', 'NA', 0, '0', 'InChI'] \
                and len(smiles) > 0:  
                    for m, l in enumerate(spectrum):
                        if l.startswith('INCHI='):
                            #inchi = smiles_to_inchi(smiles)
                            
                            smiles_to_inchi = ob.OBConversion()
                            smiles_to_inchi.SetInAndOutFormats("smi", "inchi")
                            mol = ob.OBMol()
                            smiles_to_inchi.ReadString(mol, smiles)
                            inchi = smiles_to_inchi.WriteString(mol)
                            if inchi.endswith("\n"):
                                #print(inchi, i)
                                inchi = inchi[:-2]
                            l = 'INCHI=' + '"' + inchi+ '"\n'
                            spectrum[m] = l
                            modified = True
                            inchi_lst[-1] = inchi 
                            continue #assuming there is only one line with inchi

    """    # Look for missing inchikeys
    if inchikey is None \
        or inchikey in ['N/A', 'NA', 0, '0'] \
        or len(inchikey) == 0:
            if len(inchi) > 5:
                inchi_to_keys = ob.OBConversion()
                inchi_to_keys.SetInAndOutFormats("inchi", "inchikey")
                mol = ob.OBMol()
                inchi_to_keys.ReadString(mol, inchi)
                found_inchikey = inchi_to_keys.WriteString(mol)

                for m, l in enumerate(spectrum):
                    if l.startswith('INCHIKEY='):
                        l = 'INCHIKEY=%s\n' % found_inchikey
                        spectrum[m] = l
                        modified = True
                        inchikey_lst[-1] = found_inchikey"""

    # Replace spectrum with modified one                                 
    if modified:                                       
        collected_spectra[i] = spectrum                  
        #print("Added inchi or inchikey to spectrum", i, inchi_lst[-1], inchikey_lst[-1])
        changes_to_spectra.append((i, inchi_lst[-1], inchikey_lst[-1]))

running spectra no 0
InChI=1S/C60H97N11O14/c1-11-15-17-22-39-30-48(75)63-42(23-25-72)53(78)64-41(14-4)60(85)71-33-40(74)31-47(71)57(82)65-43(24-26-73)54(79)67-46(29-38-20-18-16-19-21-38)55(80)66-45(28-35(7)8)56(81)69-51(37(10)13-3)59(84)70-50(36(9)12-2)58(83)68-44(27-34(5)6)52(77)61-32-49(76)62-39/h14,16,18-21,34-37,39-40,42-47,50-51,72-74H,11-13,15,17,22-33H2,1-10H3,(H,61,77)(H,62,76)(H,63,75)(H,64,78)(H,65,82)(H,66,80)(H,67,79)(H,68,83)(H,69,81)(H,70,84)/b41-14-/t36-,37-,39+,40+,42-,43-,44-,45+,46+,47-,50+,51-/m0/s1
 745
InChI=1S/C50H80N8O11/c1-17-30(7)41-46(65)52-26-38(59)56(14)36(24-28(3)4)44(63)51-27-39(60)58(16)40(29(5)6)47(66)57(15)37(25-34-22-20-19-21-23-34)45(64)53-32(9)42(61)50(11,12)49(68)55(13)33(10)43(62)54-35(18-2)31(8)48(67)69-41/h19-23,28-33,35-37,40-41H,17-18,24-27H2,1-16H3,(H,51,63)(H,52,65)(H,53,64)(H,54,62)
 746
InChI=1S/C72H110N12O20/c1-6-8-9-10-11-12-13-14-15-16-17-20-48(87)41-58(89)76-51(32-35-59(90)91)65(96)77-50(21-18-37-73)64(95)80-55-40-46-25-29-49(30-26-46)1

InChI=1S/C81H142N20O21/c1-27-80(25,71(120)90-49(31-33-54(82)104)60(109)87-48(41-102)36-42(3)4)100-70(119)78(21,22)96-63(112)52(38-44(7)8)89-64(113)53-30-29-35-101(53)73(122)79(23,24)97-62(111)51(37-43(5)6)88-57(107)40-85-67(116)75(15,16)98-65(114)58(45(9)10)92-68(117)76(17,18)95-61(110)50(32-34-55(83)105)91-72(121)81(26,28-2)99-69(118)77(19,20)94-59(108)46(11)86-56(106)39-84-66(115)74(13,14)93-47(12)103/h42-46,48-53,58,102H,27-41H2,1-26H3,(H2,82,104)(H2,83,105)(H,84,115)(H,85,116)(H,86,106)(H,87,109)(H,88,107)(H,89,113)(H,90,120)(H,91,121)(H,92,117)(H,93,103)(H,94,108)(H,95,110)(H,96,112)(H,97,111)(H,98,114)(H,99,118)(H,100,119)
 908
InChI=1S/C27H47N5O7/c1-9-10-18-12-20(33)28-13-21(34)31-22(15(4)5)26(37)30-19(11-14(2)3)25(36)29-17(8)24(35)32-23(16(6)7)27(38)39-18/h14-19,22-23H,9-13H2,1-8H3,(H,28,33)(H,29,36)(H,30,37)(H,31,34)(H,32,35)/t17-,18+,19+,22-,23-/m0/s1
 909
InChI=1S/C44H55N3O9/c1-27(2)36-39(48)45-33(24-30-18-12-9-13-19-30)42(51)55-37(28(3)4)40(49)47(8)35(26-32-22-16-11-17-23-3

 1052
InChI=1S/C61H111N11O11/c1-24-26-27-28-29-44-53(75)64-43(25-2)57(79)66(17)34-49(73)67(18)45(30-35(3)4)55(77)65-50(39(11)12)60(82)69(20)46(31-36(5)6)54(76)62-41(15)52(74)63-42(16)56(78)70(21)47(32-37(7)8)58(80)71(22)48(33-38(9)10)59(81)72(23)51(40(13)14)61(83)68(44)19/h35-48,50-51H,24-34H2,1-23H3,(H,62,76)(H,63,74)(H,64,75)(H,65,77)/t41-,42+,43+,44-,45-,46+,47-,48-,50-,51+/m0/s1
 1053
InChI=1S/C43H64N8O13/c1-20(2)15-27-35(55)36(56)42(62)49-28(16-21(3)4)38(58)48-30(19-31(44)53)37(57)45-23(7)43(63)64-25(9)34(41(61)47-27)51-40(60)33(24(8)52)50-39(59)29(46-32(54)17-22(5)6)18-26-13-11-10-12-14-26/h10-14,20-25,27-30,33-34,52H,15-19H2,1-9H3,(H2,44,53)(H,45,57)(H,46,54)(H,47,61)(H,48,58)(H,49,62)(H,50,59)(H,51,60)/t23-,24?,25?,27?,28+,29+,30+,33+,34+/m1/s1
 1054
InChI=1S/C58H102N12O13/c1-31(2)26-37(30-71)60-50(78)42-20-18-24-69(42)54(82)57(14,15)67-48(76)40(28-33(5)6)61-47(75)39(27-32(3)4)62-51(79)43-21-19-25-70(43)55(83)58(16,17)68-49(77)41(29-34(7)8)63-52(80)45(35(9)10)65-46(74)38(22-23-

InChI=1S/C9H17N5S/c1-5-10-7-12-8(11-6(2)3)14-9(13-7)15-4/h6H,5H2,1-4H3,(H2,10,11,12,13,14)
 1783
InChI=1S/C15H16Cl3N3O2/c1-2-4-20(15(22)21-5-3-19-10-21)6-7-23-14-12(17)8-11(16)9-13(14)18/h3,5,8-10H,2,4,6-7H2,1H3
 1841
InChI=1S/C19H21ClN2O/c20-16-12-10-15(11-13-16)14-22(18-8-4-5-9-18)19(23)21-17-6-2-1-3-7-17/h1-3,6-7,10-13,18H,4-5,8-9,14H2,(H,21,23)
 1856
InChI=1S/C15H18ClN3O/c1-11(12-2-3-12)15(20,8-19-10-17-9-18-19)13-4-6-14(16)7-5-13/h4-7,9-12,20H,2-3,8H2,1H3
 1876
InChI=1S/C15H18ClN3O/c1-11(12-2-3-12)15(20,8-19-10-17-9-18-19)13-4-6-14(16)7-5-13/h4-7,9-12,20H,2-3,8H2,1H3
 1877
InChI=1S/C21H27N7O14P2/c22-17-12-19(25-7-24-17)28(8-26-12)21-16(32)14(30)11(41-21)6-39-44(36,37)42-43(34,35)38-5-10-13(29)15(31)20(40-10)27-3-1-2-9(4-27)18(23)33/h1-4,7-8,10-11,13-16,20-21,29-32H,5-6H2,(H5-,22,23,24,25,33,34,35,36,37)
 1997
InChI=1S/C5H10N2O3/c6-3(5(9)10)1-2-4(7)8/h3H,1-2,6H2,(H2,7,8)(H,9,10)
 1998
InChI=1S/C2H7NO2S/c3-1-2-6(4)5/h1-3H2,(H,4,5)
 1999
InChI=1S/C4H9NO3/c1-2(6)3(5)4(7)8/h2-3,6H,5H2,

InChI=1S/C8H15NOS2/c9-8(10)4-2-1-3-7-5-6-11-12-7/h7H,1-6H2,(H2,9,10)
 2181
InChI=1S/C9H8O4/c10-7-3-1-6(2-4-7)5-8(11)9(12)13/h1-4,10H,5H2,(H,12,13)
 2182
InChI=1S/C10H10N2O/c11-10(13)5-7-6-12-9-4-2-1-3-8(7)9/h1-4,6,12H,5H2,(H2,11,13)
 2183
InChI=1S/C9H9NO3/c11-8(12)6-10-9(13)7-4-2-1-3-5-7/h1-5H,6H2,(H,10,13)(H,11,12)
 2184
InChI=1S/C15H13I2NO4/c16-11-5-8(7-13(18)15(20)21)6-12(17)14(11)22-10-3-1-9(19)2-4-10/h1-6,13,19H,7,18H2,(H,20,21)/t13-/m1/s1
 2185
InChI=1S/C7H6O/c8-6-7-4-2-1-3-5-7/h1-6H
 2186
InChI=1S/C9H13NO2/c1-12-9-6-7(4-5-10)2-3-8(9)11/h2-3,6,11H,4-5,10H2,1H3
 2187
InChI=1S/C10H7NO2/c12-10(13)9-6-5-7-3-1-2-4-8(7)11-9/h1-6H,(H,12,13)
 2188
InChI=1S/C10H12N2O/c11-4-3-7-6-12-10-2-1-8(13)5-9(7)10/h1-2,5-6,12-13H,3-4,11H2
 2189
InChI=1S/C6H5N5O/c7-6-10-4-3(5(12)11-6)8-1-2-9-4/h1-2H,(H3,7,9,10,11,12)
 2190
InChI=1S/C6H7NO/c7-5-3-1-2-4-6(5)8/h1-4,8H,7H2
 2191
InChI=1S/C11H9NO3/c13-10(11(14)15)5-7-6-12-9-4-2-1-3-8(7)9/h1-4,6,12H,5H2,(H,14,15)
 2192
InChI=1S/C6H6O2/c7-5-1-2-6(8)4-3-5/h1-

 2426
InChI=1S/C6H6N4S/c1-11-6-4-5(8-2-7-4)9-3-10-6/h2-3H,1H3,(H,7,8,9,10)
 2427
InChI=1S/C7H7NO3/c8-5-1-4(7(10)11)2-6(9)3-5/h1-3,9H,8H2,(H,10,11)
 2428
InChI=1S/C6H6O2/c7-5-3-1-2-4-6(5)8/h1-4,7-8H
 2429
InChI=1S/C7H6O4/c8-5-2-1-4(7(10)11)3-6(5)9/h1-3,8-9H,(H,10,11)
 2430
InChI=1S/C8H8O3/c9-7(8(10)11)6-4-2-1-3-5-6/h1-5,7,9H,(H,10,11)
 2431
InChI=1S/C10H20O3/c11-9-7-5-3-1-2-4-6-8-10(12)13/h11H,1-9H2,(H,12,13)
 2432
InChI=1S/C8H8O4/c9-6-2-1-5(3-7(6)10)4-8(11)12/h1-3,9-10H,4H2,(H,11,12)
 2433
InChI=1S/C10H13N5/c1-7(2)3-4-11-9-8-10(13-5-12-8)15-6-14-9/h3,5-6H,4H2,1-2H3,(H2,11,12,13,14,15)
 2434
InChI=1S/C8H8O4/c1-12-7-4-5(8(10)11)2-3-6(7)9/h2-4,9H,1H3,(H,10,11)
 2435
InChI=1S/C7H7NO3/c8-6-4(7(10)11)2-1-3-5(6)9/h1-3,9H,8H2,(H,10,11)
 2436
InChI=1S/C10H10N2O/c11-10(13)5-7-6-12-9-4-2-1-3-8(7)9/h1-4,6,12H,5H2,(H2,11,13)
 2437
InChI=1S/C9H9NO3/c11-8(12)6-10-9(13)7-4-2-1-3-5-7/h1-5H,6H2,(H,10,13)(H,11,12)
 2438
InChI=1S/C5H8O4/c1-2-3(4(6)7)5(8)9/h3H,2H2,1H3,(H,6,7)(H,8,9)
 2439
InChI=1S/C15H13I2

InChI=1S/C14H16N2O3S2/c1-16-10(14(18)19)7-21-13(16)9-6-20-12(15-9)8-4-2-3-5-11(8)17/h2-5,9-10,13,15H,6-7H2,1H3,(H,18,19)/b12-8+/t9-,10+,13?/m1/s1
 3582
InChI=1S/C20H32O2/c1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-16-17-18-19-20(21)22/h6-7,9-10,12-13,15-16H,2-5,8,11,14,17-19H2,1H3,(H,21,22)
 3695
InChI=1S/C27H46O/c1-18(2)7-6-8-19(3)23-11-12-24-22-10-9-20-17-21(28)13-15-26(20,4)25(22)14-16-27(23,24)5/h9,18-19,21-25,28H,6-8,10-17H2,1-5H3
 3696
InChI=1S/C21H28O5/c1-19-7-5-13(23)9-12(19)3-4-14-15-6-8-21(26,17(25)11-22)20(15,2)10-16(24)18(14)19/h9,14-15,18,22,26H,3-8,10-11H2,1-2H3
 3697
InChI=1S/C24H50NO7P/c1-5-6-7-8-9-10-11-12-13-14-15-16-17-18-24(27)30-21-23(26)22-32-33(28,29)31-20-19-25(2,3)4/h23,26H,5-22H2,1-4H3
 3698
InChI=1S/C26H52NO7P/c1-5-6-7-8-9-10-11-12-13-14-15-16-17-18-19-20-26(29)32-23-25(28)24-34-35(30,31)33-22-21-27(2,3)4/h12-13,25,28H,5-11,14-24H2,1-4H3
 3699
InChI=1S/C18H34O2/c1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-16-17-18(19)20/h9-10H,2-8,11-17H2,1H3,(H,19,20)
 3700
InChI=1S/C18H39

 4558
InChI=1S/C45H75NO12/c1-28-15-18-34(49)23-35-13-12-14-36(57-35)24-40(54-9)37(26-47)41(55-10)25-39(53-8)32(5)45(58-42(51)20-16-28)33(6)43(52)29(2)17-19-38(50)31(4)44(56-11)30(3)21-22-46(7)27-48/h12-13,15-16,20-22,27,29-37,39-41,43-45,47,49,52H,14,17-19,23-26H2,1-11H3/b20-16+,22-21+,28-15+/t29-,30+,31-,32-,33-,34-,35-,36-,37+,39+,40-,41-,43-,44+,45-/m0/s1
 4565
InChI=1S/C45H75NO12/c1-28-15-18-34(49)23-35-13-12-14-36(57-35)24-40(54-9)37(26-47)41(55-10)25-39(53-8)32(5)45(58-42(51)20-16-28)33(6)43(52)29(2)17-19-38(50)31(4)44(56-11)30(3)21-22-46(7)27-48/h12-13,15-16,20-22,27,29-37,39-41,43-45,47,49,52H,14,17-19,23-26H2,1-11H3/b20-16+,22-21+,28-15+/t29-,30+,31-,32-,33-,34-,35-,36-,37+,39+,40-,41-,43-,44+,45-/m0/s1
 4566
InChI=1S/C45H75NO12/c1-28-15-18-34(49)23-35-13-12-14-36(57-35)24-40(54-9)37(26-47)41(55-10)25-39(53-8)32(5)45(58-42(51)20-16-28)33(6)43(52)29(2)17-19-38(50)31(4)44(56-11)30(3)21-22-46(7)27-48/h12-13,15-16,20-22,27,29-37,39-41,43-45,47,49,52H,14,17-19,23-26H2,1-11H3/b20-16

InChI=1S/C27H31NO6/c1-16(2)21-13-15-26(3,34-21)14-12-17-6-11-20-22(23(17)29)27(31,24(33-5)25(30)28-20)18-7-9-19(32-4)10-8-18/h6-12,14,21,24,29,31H,1,13,15H2,2-5H3,(H,28,30)/b14-12+/t21-,24+,26-,27-/m1/s1
 4864
InChI=1S/C18H26O4/c1-11-5-6-12-3-2-4-16(20)18(12)15(11)8-7-14-9-13(19)10-17(21)22-14/h3,5-6,11,13-16,18-20H,2,4,7-10H2,1H3/t11-,13+,14+,15-,16-,18?/m0/s1
 4865
InChI=1S/C23H34O5/c1-4-14(2)23(26)28-20-7-5-6-16-9-8-15(3)19(22(16)20)11-10-18-12-17(24)13-21(25)27-18/h6,8-9,14-15,17-20,22,24H,4-5,7,10-13H2,1-3H3/t14-,15-,17+,18+,19-,20-,22?/m0/s1
 4866
InChI=1S/C16H12O5/c1-7-3-9-13(11(18)4-7)16(20)14-10(15(9)19)5-8(17)6-12(14)21-2/h3-6,17-18H,1-2H3
 4867
InChI=1S/C17H16O8/c1-24-13-6-9(19)5-10(17(23)25-2)14(13)16(22)15-11(20)3-8(7-18)4-12(15)21/h3-6,18-21H,7H2,1-2H3
 4868
InChI=1S/C36H43N5O6/c1-22(2)19-29-36(47)41(5)31(21-25-15-17-26(42)18-16-25)33(44)38-28-14-10-9-13-27(28)32(43)37-23(3)35(46)40(4)30(34(45)39-29)20-24-11-7-6-8-12-24/h6-18,22-23,29-31,42H,19-21H2,1-5H3,(H,37,43)(H,38,4

In [31]:
inchi_lst[:100]

['"InChI=1S/C24H40O4/c1-14(4-7-21(27)28)17-5-6-18-22-19(9-11-24(17,18)3)23(2)10-8-16(25)12-15(23)13-20(22)26/h14-20,22,25-26H,4-13H2,1-3H3,(H,27,28)/t14?,15-,16+,17+,18-,19-,20+,22-,23?,24?/m0/s1"',
 '"InChI=1S/C24H40O4/c1-14(4-7-21(27)28)17-5-6-18-22-19(9-11-24(17,18)3)23(2)10-8-16(25)12-15(23)13-20(22)26/h14-20,22,25-26H,4-13H2,1-3H3,(H,27,28)/t14?,15-,16+,17+,18-,19-,20+,22-,23?,24?/m0/s2"',
 '"InChI=1S/C24H40O4/c1-14(4-7-21(27)28)17-5-6-18-22-19(9-11-24(17,18)3)23(2)10-8-16(25)12-15(23)13-20(22)26/h14-20,22,25-26H,4-13H2,1-3H3,(H,27,28)/t14?,15-,16+,17+,18-,19-,20+,22-,23?,24?/m0/s2"',
 '"InChI=1S/C24H40O5/c1-13(4-7-21(28)29)16-5-6-17-22-18(12-20(27)24(16,17)3)23(2)9-8-15(25)10-14(23)11-19(22)26/h13-20,22,25-27H,4-12H2,1-3H3,(H,28,29)/t13?,14-,15+,16+,17-,18-,19+,20-,22-,23?,24?/m0/s1"',
 '"InChI=1S/C24H40O5/c1-13(4-7-21(28)29)16-5-6-17-22-18(12-20(27)24(16,17)3)23(2)9-8-15(25)10-14(23)11-19(22)26/h13-20,22,25-27H,4-12H2,1-3H3,(H,28,29)/t13?,14-,15+,16+,17-,18-,19+,20-,22-,23?,24?/

In [None]:
inchikeys_new = []
for i, inchi in enumerate(inchi_lst):
    inchi_to_keys = ob.OBConversion()
    inchi_to_keys.SetInAndOutFormats("inchi", "inchikey")
    mol = ob.OBMol()
    try:
        inchi_to_keys.ReadString(mol, inchi)
        found_inchikey = inchi_to_keys.WriteString(mol)
        inchikeys_new.append(found_inchikey)
    except:
        print(i)
        print(found_inchikey)

In [29]:
collected_spectra[2200] #16892]

['BEGIN IONS\n',
 'INCHI="InChI=1S/C8H9NO3/c1-5-8(12)7(4-11)6(3-10)2-9-5/h2,4,10,12H,3H2,1H"\n',
 'TITLE=PYRIDOXAL\n',
 'LIBRARYQUALITY=1\n',
 'SMILES=CC1=NC=C(C(=C1O)C=O)CO\n',
 'INCHIKEY=RADKZDMFGJYCBB-UHFFFAOYSA-N\n',
 'PEPMASS=168.07\n',
 'CHARGE=0\n',
 'MSLEVEL=2\n',
 'SOURCE_INSTRUMENT=LC-ESI-Q-Exactive Plus\n',
 'FILENAME=IROA_PLATE_pos_6_D.mzXML\n',
 'SEQ=*..*\n',
 'IONMODE=Positive\n',
 'ORGANISM=GNPS-EMBL-MCF\n',
 'NAME=PYRIDOXAL M+H\n',
 'PI=Alexandrov Theodore\n',
 'DATACOLLECTOR=Prasad\n',
 'PUBMED=1050\n',
 'SUBMITUSER=pphapale\n',
 'TAGS=\n',
 'SPECTRUMID=CCMSLIB00000578085\n',
 'SCANS=6302\n',
 '51.023617\t1000.994263\n',
 '53.039272\t3364.53125\n',
 '55.018539\t1093.819824\n',
 '62.605732\t797.977966\n',
 '64.853333\t802.51593\n',
 '65.039101\t1161.195923\n',
 '67.054955\t6010.372559\n',
 '71.049614\t3571.911865\n',
 '72.052971\t987.144287\n',
 '72.877701\t3121.326904\n',
 '77.459198\t694.634216\n',
 '79.681358\t811.427429\n',
 '81.033775\t791.575073\n',
 '81.057777\t8

In [12]:
testinchi = 'InChI=1S/C8H9NO3/c1-5-8(12)7(4-11)6(3-10)2-9-5/h2,4,10,12H,3H2,1H3'
collected_spectra[2200][1] = 'INCHI=' + '"' + testinchi+ '"\n'
collected_spectra[2200][:4]

['BEGIN IONS\n',
 'INCHI="InChI=1S/C8H9NO3/c1-5-8(12)7(4-11)6(3-10)2-9-5/h2,4,10,12H,3H2,1H3"\n',
 'TITLE=PYRIDOXAL\n',
 'LIBRARYQUALITY=1\n']

In [54]:
inchikey_lst[:20], smiles_lst[:20]

(['None',
  'None',
  'None',
  'None',
  'None',
  'None',
  'None',
  'None',
  'None',
  'None',
  'None',
  'None',
  'None',
  'None',
  'None',
  'None',
  'None',
  'None',
  'None',
  'None'],
 ['C[C@H](CCC(=O)O)[C@H]1CC[C@@H]2[C@@]1(CC[C@H]3[C@H]2[C@@H](C[C@H]4[C@@]3(CC[C@H](C4)O)C)O)C',
  'C[C@H](CCC(=O)O)[C@H]1CC[C@@H]2[C@@]1(CC[C@H]3[C@H]2[C@@H](C[C@H]4[C@@]3(CC[C@H](C4)O)C)O)C',
  'C[C@H](CCC(=O)O)[C@H]1CC[C@@H]2[C@@]1(CC[C@H]3[C@H]2[C@@H](C[C@H]4[C@@]3(CC[C@H](C4)O)C)O)C',
  'C[C@H](CCC(=O)O)[C@H]1CC[C@@H]2[C@@]1([C@H](C[C@H]3[C@H]2[C@@H](C[C@H]4[C@@]3(CC[C@H](C4)O)C)O)O)C',
  'C[C@H](CCC(=O)O)[C@H]1CC[C@@H]2[C@@]1([C@H](C[C@H]3[C@H]2[C@@H](C[C@H]4[C@@]3(CC[C@H](C4)O)C)O)O)C',
  'C[C@H](CCC(=O)O)[C@H]1CC[C@@H]2[C@@]1([C@H](C[C@H]3[C@H]2[C@@H](C[C@H]4[C@@]3(CC[C@H](C4)O)C)O)O)C',
  'C[C@H](CCC(=O)O)[C@H]1CC[C@@H]2[C@@]1([C@H](C[C@H]3[C@H]2CC[C@H]4[C@@]3(CC[C@H](C4)O)C)O)C',
  'C[C@H](CCC(=O)O)[C@H]1CC[C@@H]2[C@@]1([C@H](C[C@H]3[C@H]2CC[C@H]4[C@@]3(CC[C@H](C4)O)C)O)C',
  'C

In [51]:
inchi_lst[:20], smiles_lst[:20]

(['"InChI=1S/C24H40O4/c1-14(4-7-21(27)28)17-5-6-18-22-19(9-11-24(17,18)3)23(2)10-8-16(25)12-15(23)13-20(22)26/h14-20,22,25-26H,4-13H2,1-3H3,(H,27,28)/t14?,15-,16+,17+,18-,19-,20+,22-,23?,24?/m0/s1"',
  '"InChI=1S/C24H40O4/c1-14(4-7-21(27)28)17-5-6-18-22-19(9-11-24(17,18)3)23(2)10-8-16(25)12-15(23)13-20(22)26/h14-20,22,25-26H,4-13H2,1-3H3,(H,27,28)/t14?,15-,16+,17+,18-,19-,20+,22-,23?,24?/m0/s2"',
  '"InChI=1S/C24H40O4/c1-14(4-7-21(27)28)17-5-6-18-22-19(9-11-24(17,18)3)23(2)10-8-16(25)12-15(23)13-20(22)26/h14-20,22,25-26H,4-13H2,1-3H3,(H,27,28)/t14?,15-,16+,17+,18-,19-,20+,22-,23?,24?/m0/s2"',
  '"InChI=1S/C24H40O5/c1-13(4-7-21(28)29)16-5-6-17-22-18(12-20(27)24(16,17)3)23(2)9-8-15(25)10-14(23)11-19(22)26/h13-20,22,25-27H,4-12H2,1-3H3,(H,28,29)/t13?,14-,15+,16+,17-,18-,19+,20-,22-,23?,24?/m0/s1"',
  '"InChI=1S/C24H40O5/c1-13(4-7-21(28)29)16-5-6-17-22-18(12-20(27)24(16,17)3)23(2)9-8-15(25)10-14(23)11-19(22)26/h13-20,22,25-27H,4-12H2,1-3H3,(H,28,29)/t13?,14-,15+,16+,17-,18-,19+,20-,22-,23?

In [32]:
inchi_lst[740],inchi_lst[70]

('"InChI=1S/C24H27NO2/c1-3-5-12-19(4-2)18-27-24(26)22(17-25)23(20-13-8-6-9-14-20)21-15-10-7-11-16-21/h6-11,13-16,19H,3-5,12,18H2,1-2H3"',
 '"InChI=1S/C26H43NO6/c1-14(4-7-22(31)27-13-23(32)33)17-5-6-18-24-19(12-21(30)26(17,18)3)25(2)9-8-16(28)10-15(25)11-20(24)29/h14-21,24,28-30H,4-13H2,1-3H3,(H,27,31)(H,32,33)/t14?,15-,16+,17+,18-,19-,20+,21-,24-,25?,26?/m0/s1"')

In [27]:
from openbabel import openbabel as ob
from openbabel import pybel

#moltest = pybel.readstring("inchi", inchi_lst[70])

In [25]:
#testinchi = "InChI=1S/C24H27NO2/c1-3-5-12-19(4-2)18-27-24(26)22(17-25)23(20-13-8-6-9-14-20)21-15-10-7-11-16-21/h6-11,13-16,19H,3-5,12,18H2,1-2H3"
testinchi = "InChI=1S/C24H40O4/c1-14(4-7-21(27)28)17-5-6-18-22-19(9-11-24(17,18)3)23(2)10-8-16(25)12-15(23)13-20(22)26/h14-20,22,25-26H,4-13H2,1-3H3,(H,27,28)/t14?,15-,16+,17+,18-,19-,20+,22-,23?,24?/m0/s1"
print(inchi_to_inchikey(testinchi))

RUDATBOHQWOJDD-GYHJSIKNSA-N


In [43]:
%%time
for i in range(1000):
    testinchi = testinchis[i%2]
    conv = ob.OBConversion()
    conv.SetInAndOutFormats("inchi", "inchikey")

    mol = ob.OBMol()
    conv.ReadString(mol, testinchi)
    inchikey = conv.WriteString(mol)
    #print(inchikey)

Wall time: 437 ms


In [33]:
testinchis = ['"InChI=1S/C24H40O4/c1-14(4-7-21(27)28)17-5-6-18-22-19(9-11-24(17,18)3)23(2)10-8-16(25)12-15(23)13-20(22)26/h14-20,22,25-26H,4-13H2,1-3H3,(H,27,28)/t14?,15-,16+,17+,18-,19-,20+,22-,23?,24?/m0/s1"',
  '"InChI=1S/C24H40O4/c1-14(4-7-21(27)28)17-5-6-18-22-19(9-11-24(17,18)3)23(2)10-8-16(25)12-15(23)13-20(22)26/h14-20,22,25-26H,4-13H2,1-3H3,(H,27,28)/t14?,15-,16+,17+,18-,19-,20+,22-,23?,24?/m0/s2"',
  '"InChI=1S/C24H40O4/c1-14(4-7-21(27)28)17-5-6-18-22-19(9-11-24(17,18)3)23(2)10-8-16(25)12-15(23)13-20(22)26/h14-20,22,25-26H,4-13H2,1-3H3,(H,27,28)/t14?,15-,16+,17+,18-,19-,20+,22-,23?,24?/m0/s2"',
  '"InChI=1S/C24H40O5/c1-13(4-7-21(28)29)16-5-6-17-22-18(12-20(27)24(16,17)3)23(2)9-8-15(25)10-14(23)11-19(22)26/h13-20,22,25-27H,4-12H2,1-3H3,(H,28,29)/t13?,14-,15+,16+,17-,18-,19+,20-,22-,23?,24?/m0/s1"',
  '"InChI=1S/C24H40O5/c1-13(4-7-21(28)29)16-5-6-17-22-18(12-20(27)24(16,17)3)23(2)9-8-15(25)10-14(23)11-19(22)26/h13-20,22,25-27H,4-12H2,1-3H3,(H,28,29)/t13?,14-,15+,16+,17-,18-,19+,20-,22-,23?,24?/m0/s1"',
  '"InChI=1S/C24H40O5/c1-13(4-7-21(28)29)16-5-6-17-22-18(12-20(27)24(16,17)3)23(2)9-8-15(25)10-14(23)11-19(22)26/h13-20,22,25-27H,4-12H2,1-3H3,(H,28,29)/t13?,14-,15+,16+,17-,18-,19+,20-,22-,23?,24?/m0/s1"',
  '"InChI=1S/C24H40O4/c1-14(4-9-22(27)28)18-7-8-19-17-6-5-15-12-16(25)10-11-23(15,2)20(17)13-21(26)24(18,19)3/h14-21,25-26H,4-13H2,1-3H3,(H,27,28)/t14?,15-,16-,17+,18-,19+,20+,21+,23?,24?/m1/s1"',
  '"InChI=1S/C24H40O4/c1-14(4-9-22(27)28)18-7-8-19-17-6-5-15-12-16(25)10-11-23(15,2)20(17)13-21(26)24(18,19)3/h14-21,25-26H,4-13H2,1-3H3,(H,27,28)/t14?,15-,16-,17+,18-,19+,20+,21+,23?,24?/m1/s1"',
  '"InChI=1S/C24H40O4/c1-14(4-9-22(27)28)18-7-8-19-17-6-5-15-12-16(25)10-11-23(15,2)20(17)13-21(26)24(18,19)3/h14-21,25-26H,4-13H2,1-3H3,(H,27,28)/t14?,15-,16-,17+,18-,19+,20+,21+,23?,24?/m1/s1"',
  '"InChI=1S/C26H43NO5/c1-15(4-7-22(30)27-14-23(31)32)18-5-6-19-24-20(9-11-26(18,19)3)25(2)10-8-17(28)12-16(25)13-21(24)29/h15-21,24,28-29H,4-14H2,1-3H3,(H,27,30)(H,31,32)/t15?,16-,17+,18+,19-,20-,21+,24-,25?,26?/m0/s1"',
  '"InChI=1S/C26H43NO5/c1-15(4-7-22(30)27-14-23(31)32)18-5-6-19-24-20(9-11-26(18,19)3)25(2)10-8-17(28)12-16(25)13-21(24)29/h15-21,24,28-29H,4-14H2,1-3H3,(H,27,30)(H,31,32)/t15?,16-,17+,18+,19-,20-,21+,24-,25?,26?/m0/s1"',
  '"InChI=1S/C26H43NO5/c1-15(4-7-22(30)27-14-23(31)32)18-5-6-19-24-20(9-11-26(18,19)3)25(2)10-8-17(28)12-16(25)13-21(24)29/h15-21,24,28-29H,4-14H2,1-3H3,(H,27,30)(H,31,32)/t15?,16-,17+,18+,19-,20-,21+,24-,25?,26?/m0/s1"',
  '"InChI=1S/C26H43NO6/c1-14(4-7-22(31)27-13-23(32)33)17-5-6-18-24-19(12-21(30)26(17,18)3)25(2)9-8-16(28)10-15(25)11-20(24)29/h14-21,24,28-30H,4-13H2,1-3H3,(H,27,31)(H,32,33)/t14?,15-,16+,17+,18-,19-,20+,21-,24-,25?,26?/m0/s1"',
  '"InChI=1S/C26H43NO6/c1-14(4-7-22(31)27-13-23(32)33)17-5-6-18-24-19(12-21(30)26(17,18)3)25(2)9-8-16(28)10-15(25)11-20(24)29/h14-21,24,28-30H,4-13H2,1-3H3,(H,27,31)(H,32,33)/t14?,15-,16+,17+,18-,19-,20+,21-,24-,25?,26?/m0/s1"',
  '"InChI=1S/C26H43NO6/c1-14(4-7-22(31)27-13-23(32)33)17-5-6-18-24-19(12-21(30)26(17,18)3)25(2)9-8-16(28)10-15(25)11-20(24)29/h14-21,24,28-30H,4-13H2,1-3H3,(H,27,31)(H,32,33)/t14?,15-,16+,17+,18-,19-,20+,21-,24-,25?,26?/m0/s1"',
  '"InChI=1S/C26H43NO5/c1-15(4-9-23(30)27-14-24(31)32)19-7-8-20-18-6-5-16-12-17(28)10-11-25(16,2)21(18)13-22(29)26(19,20)3/h15-22,28-29H,4-14H2,1-3H3,(H,27,30)(H,31,32)/t15?,16-,17-,18+,19-,20+,21+,22+,25?,26?/m1/s1"',
  '"InChI=1S/C26H43NO5/c1-15(4-9-23(30)27-14-24(31)32)19-7-8-20-18-6-5-16-12-17(28)10-11-25(16,2)21(18)13-22(29)26(19,20)3/h15-22,28-29H,4-14H2,1-3H3,(H,27,30)(H,31,32)/t15?,16-,17-,18+,19-,20+,21+,22+,25?,26?/m1/s1"',
  '"InChI=1S/C26H43NO5/c1-15(4-9-23(30)27-14-24(31)32)19-7-8-20-18-6-5-16-12-17(28)10-11-25(16,2)21(18)13-22(29)26(19,20)3/h15-22,28-29H,4-14H2,1-3H3,(H,27,30)(H,31,32)/t15?,16-,17-,18+,19-,20+,21+,22+,25?,26?/m1/s1"',
  '"InChI=1S/C26H43NO6/c1-14(4-7-20(29)27-13-21(30)31)16-5-6-17-22-18(9-11-25(16,17)2)26(3)10-8-15(28)12-19(26)23(32)24(22)33/h14-19,22-24,28,32-33H,4-13H2,1-3H3,(H,27,29)(H,30,31)/t14-,15-,16-,17+,18+,19+,22+,23-,24+,25-,26-/m1/s1"',
  '"InChI=1S/C26H43NO6/c1-14(4-7-20(29)27-13-21(30)31)16-5-6-17-22-18(9-11-25(16,17)2)26(3)10-8-15(28)12-19(26)23(32)24(22)33/h14-19,22-24,28,32-33H,4-13H2,1-3H3,(H,27,29)(H,30,31)/t14-,15-,16-,17+,18+,19+,22+,23-,24+,25-,26-/m1/s1"']

In [38]:
201%2

1

In [40]:
%%time
for i in range(1000):
    testinchi = testinchis[i%2]
    conv.ReadString(mol, testinchi)
    inchikey = conv.WriteString(mol)

Wall time: 437 ms


In [49]:
smiles_to_inchi = ob.OBConversion()
smiles_to_inchi.SetInAndOutFormats("smi", "inchi")
#conv.SetOptions("K", conv.OUTOPTIONS)

mol = ob.OBMol()
smiles_to_inchi.ReadString(mol, testinchi)#smiles_lst[71])
inchi = smiles_to_inchi.WriteString(mol)
print(inchi)

InChI=1S/C26H43NO6/c1-14(4-7-22(31)27-13-23(32)33)17-5-6-18-24-19(12-21(30)26(17,18)3)25(2)9-8-16(28)10-15(25)11-20(24)29/h14-21,24,28-30H,4-13H2,1-3H3,(H,27,31)(H,32,33)/t14-,15+,16-,17-,18+,19+,20-,21+,24+,25+,26-/m1/s1



In [51]:
conv = ob.OBConversion()
conv.SetInAndOutFormats("smi", "inchi")
conv.SetOptions("K", conv.OUTOPTIONS)

mol = ob.OBMol()
conv.ReadString(mol, "CC(=O)Cl")
inchikey = conv.WriteString(mol)
print(inchikey)

WETWJCDKMRHUPV-UHFFFAOYSA-N



In [47]:
conv.ReadString(mol, inchi_lst[20])
inchikey = conv.WriteString(mol)
print(inchikey)

ZQYUKJFJPJDMMR-ZDWCHQGWSA-N



In [88]:

smiles = ['CCCC', 'CCCN']
mols = [pybel.readstring("smi", x) for x in smiles] # Create a list of two˓→molecules

fps = [x.calcfp() for x in mols]# Calculate their fingerprints
print(fps[0].bits, fps[1].bits)
print(fps[0] | fps[1])# Print the Tanimoto coefficient

[261, 385, 671] [83, 261, 349, 671, 907]
0.3333333333333333


In [98]:
#miles = ['CCCC', 'CCCN']
mols = [pybel.readstring("inchi", "InChI="+ x) for x in inchi_lst[:15]] #list of two˓→molecules

fps = [x.calcfp() for x in mols]# Calculate their fingerprints
print(fps[0].bits, fps[14].bits)
print(fps[1] | fps[14])# Print the Tanimoto coefficient

[50, 89, 261, 313, 330, 332, 385, 430, 439, 442, 516, 532, 600, 602, 612, 624, 635, 653, 671, 884, 985, 1009] [25, 50, 83, 89, 155, 191, 261, 268, 307, 313, 314, 330, 332, 337, 350, 353, 385, 402, 404, 430, 432, 434, 439, 442, 492, 516, 532, 544, 600, 602, 624, 641, 654, 671, 689, 705, 734, 782, 829, 884, 904, 907, 908, 916, 950, 980, 985, 994]
0.34615384615384615


In [67]:
collected_spectra_save[143150]

['BEGIN IONS\n',
 'PEPMASS=507.43\n',
 'CHARGE=1\n',
 'MSLEVEL=2\n',
 'SOURCE_INSTRUMENT=ESI-Flow-injection QqQ/MS\n',
 'FILENAME=respect_8_1_2014_GNPS_peaks.mgf\n',
 'SEQ=*..*\n',
 'IONMODE=Positive\n',
 'ORGANISM=RESPECT\n',
 "NAME=ReSpect:PS043401 Quercetin-3-O-glucose-6''-acetate|Quer-3-Glc-6pp-acetate|quercetin-3-O-beta-D-glucopyranosyl-6''-acetate|quercetin 3-(6''-acetylglucoside) [M+H]\n",
 'PI=Putative ReSpect Match\n',
 'DATACOLLECTOR=ReSpect\n',
 'SMILES=no data\n',
 'INCHI=N/A\n',
 'INCHIAUX=N/A\n',
 'PUBMED=N/A\n',
 'SUBMITUSER=mwang87\n',
 'TAGS=\n',
 'LIBRARYQUALITY=3\n',
 'SPECTRUMID=CCMSLIB00000215686\n',
 'SCANS=3522\n',
 '303.0\t87.0\n',
 '506.0\t275.0\n',
 '507.0\t999.0\n',
 '508.0\t299.0\n',
 'END IONS\n']

In [63]:
import sys

file_mgf = PATH_MS_DATA + 'all_gnps_inchi_added_191023.mgf'
with open(file_mgf, 'w') as f:
    for c in collected_spectra[:]:
        for l in c:
            try:
                f.write(l.encode('utf8', 'replace').decode('utf8', 'replace'))
            except UnicodeEncodeError:
                f.write(l.encode('ascii', 'replace').decode('utf8', 'replace'))
                print(l)
        f.write('\n')

TITLE=C16 Galactosyl(�) Ceramide (d18:1/16:0)

TITLE=(1R,2R,4S,16R,17R,20S)-13,20-dimethoxy-4,7,17,22,22-pentamethyl-11-oxo-5,10,21,23-tetraoxahexacyclo[18.2.1.0�,�?.0?,�?.0?,�?.0?,��]tricosa-6(14),7,12-trien-2-yl acetate

TITLE=(1R,2R,4S,16R,17R,20S)-13,20-dimethoxy-4,7,17,22,22-pentamethyl-11-oxo-5,10,21,23-tetraoxahexacyclo[18.2.1.0�,�?.0?,�?.0?,�?.0?,��]tricosa-6(14),7,12-trien-2-yl acetate

TITLE=(1R,2R,4S,16R,17R,20S)-13,20-dimethoxy-4,7,17,22,22-pentamethyl-11-oxo-5,10,21,23-tetraoxahexacyclo[18.2.1.0�,�?.0?,�?.0?,�?.0?,��]tricosa-6(14),7,12-trien-2-yl acetate

TITLE=(1R,2R,4S,16R,17R,20S)-13,20-dimethoxy-4,7,17,22,22-pentamethyl-11-oxo-5,10,21,23-tetraoxahexacyclo[18.2.1.0�,�?.0?,�?.0?,�?.0?,��]tricosa-6(14),7,12-trien-2-yl acetate

TITLE=11,22-dimethyl (6E,17E)-3,14-dioxo-8,19-bis({[(2S,3R,4S,5S,6R)-3,4,5-trihydroxy-6-(hydroxymethyl)oxan-2-yl]oxy})-4,9,15,20-tetraoxatricyclo[16.4.0.0?,��]docosa-6,10,17,21-tetraene-11,22-dicarboxylate

TITLE=11,22-dimethyl (6E,17E)-3,14-dioxo-8

TITLE=(1S,8S,9S,10S,13R)-6,9,10-trimethyl-2-oxo-4,14-dioxatetracyclo[7.5.0.0�,��.0�,?]tetradeca-3(7),5-dien-8-yl acetate

TITLE=(1S,8S,9S,10S,13R)-6,9,10-trimethyl-2-oxo-4,14-dioxatetracyclo[7.5.0.0�,��.0�,?]tetradeca-3(7),5-dien-8-yl acetate

TITLE=(4E)-8-hydroxy-4-(1-hydroxypropan-2-ylidene)-10-oxatricyclo[7.2.1.0�,?]dodecane-8-carboxylic acid

TITLE=(4E)-8-hydroxy-4-(1-hydroxypropan-2-ylidene)-10-oxatricyclo[7.2.1.0�,?]dodecane-8-carboxylic acid

TITLE=(2R,3S,4S,9R,12R,13R,14S,15R,16R,17S)-3,4,12,13,15,16-hexahydroxy-2,6,14,17-tetramethyl-10-oxatetracyclo[7.7.1.0�,?.0��,�?]heptadec-6-en-11-one

TITLE=(2R,3S,4S,9R,12R,13R,14S,15R,16R,17S)-3,4,12,13,15,16-hexahydroxy-2,6,14,17-tetramethyl-10-oxatetracyclo[7.7.1.0�,?.0��,�?]heptadec-6-en-11-one

TITLE=15-(carbamoylmethyl)-10,11,23-trihydroxy-18-(3-methyl-2-oxopentanamido)-9,14,17-trioxo-N-[(1Z)-prop-1-en-1-yl]-8,13,16-triazatetracyclo[18.3.1.0�,?.0?,�?]tetracosa-1(23),2(7),3,5,20(24),21-hexaene-12-carboxamide

TITLE=15-(carbamoylmethyl

TITLE=(1R,2R,4S,16R,17R,20S)-13,20-dimethoxy-4,7,17,22,22-pentamethyl-11-oxo-5,10,21,23-tetraoxahexacyclo[18.2.1.0�,�?.0?,�?.0?,�?.0?,��]tricosa-6(14),7,12-trien-2-yl acetate

TITLE=(1R,2R,4S,16R,17R,20S)-13,20-dimethoxy-4,7,17,22,22-pentamethyl-11-oxo-5,10,21,23-tetraoxahexacyclo[18.2.1.0�,�?.0?,�?.0?,�?.0?,��]tricosa-6(14),7,12-trien-2-yl acetate

TITLE=(1R,2R,4S,16R,17R,20S)-13,20-dimethoxy-4,7,17,22,22-pentamethyl-11-oxo-5,10,21,23-tetraoxahexacyclo[18.2.1.0�,�?.0?,�?.0?,�?.0?,��]tricosa-6(14),7,12-trien-2-yl acetate

TITLE=(1R,2R,4S,16R,17R,20S)-13,20-dimethoxy-4,7,17,22,22-pentamethyl-11-oxo-5,10,21,23-tetraoxahexacyclo[18.2.1.0�,�?.0?,�?.0?,�?.0?,��]tricosa-6(14),7,12-trien-2-yl acetate

TITLE=(1R,2R,4S,16R,17R,20S)-13,20-dimethoxy-4,7,17,22,22-pentamethyl-11-oxo-5,10,21,23-tetraoxahexacyclo[18.2.1.0�,�?.0?,�?.0?,�?.0?,��]tricosa-6(14),7,12-trien-2-yl acetate

TITLE=(1R,2R,4S,16R,17R,20S)-13,20-dimethoxy-4,7,17,22,22-pentamethyl-11-oxo-5,10,21,23-tetraoxahexacyclo[18.2.1.0�,�?.0

TITLE=8-benzoyl-1,5,5-trimethyl-6,15-dioxatetracyclo[9.3.1.0?,��.0?,��]pentadeca-7(12),8,10-trien-9-ol

TITLE=8-benzoyl-1,5,5-trimethyl-6,15-dioxatetracyclo[9.3.1.0?,��.0?,��]pentadeca-7(12),8,10-trien-9-ol

TITLE=8-benzoyl-1,5,5-trimethyl-6,15-dioxatetracyclo[9.3.1.0?,��.0?,��]pentadeca-7(12),8,10-trien-9-ol

TITLE=8-benzoyl-1,5,5-trimethyl-6,15-dioxatetracyclo[9.3.1.0?,��.0?,��]pentadeca-7(12),8,10-trien-9-ol

TITLE=8-benzoyl-1,5,5-trimethyl-6,15-dioxatetracyclo[9.3.1.0?,��.0?,��]pentadeca-7(12),8,10-trien-9-ol

TITLE=8-benzoyl-1,5,5-trimethyl-6,15-dioxatetracyclo[9.3.1.0?,��.0?,��]pentadeca-7(12),8,10-trien-9-ol

TITLE=2-[(5S,25R,26R,29S,30S,31S)-13,14,15,18,19,20,31,35,36-nonahydroxy-2,10,23,28,32-pentaoxo-5-(3,4,5-trihydroxybenzoyloxy)-3,6,9,24,27,33-hexaoxaheptacyclo[28.7.1.0?,�?.0?,�?.0��,�?.0�?,��.0�?,�?]octatriaconta-1(38),11(16),12,14,17,19,21,34,36-nonaen-29-yl]acetic acid

TITLE=2-[(5S,25R,26R,29S,30S,31S)-13,14,15,18,19,20,31,35,36-nonahydroxy-2,10,23,28,32-pentaoxo-5-(3,4

TITLE=1,17,19-trihydroxy-7-methyl-3-oxapentacyclo[9.8.0.0�,?.0?,�?.0��,�?]nonadeca-5(10),13,15,17-tetraene-9,12-dione

TITLE=1,17,19-trihydroxy-7-methyl-3-oxapentacyclo[9.8.0.0�,?.0?,�?.0��,�?]nonadeca-5(10),13,15,17-tetraene-9,12-dione

TITLE=1,17,19-trihydroxy-7-methyl-3-oxapentacyclo[9.8.0.0�,?.0?,�?.0��,�?]nonadeca-5(10),13,15,17-tetraene-9,12-dione

TITLE=1,17,19-trihydroxy-7-methyl-3-oxapentacyclo[9.8.0.0�,?.0?,�?.0��,�?]nonadeca-5(10),13,15,17-tetraene-9,12-dione

TITLE=1,17,19-trihydroxy-7-methyl-3-oxapentacyclo[9.8.0.0�,?.0?,�?.0��,�?]nonadeca-5(10),13,15,17-tetraene-9,12-dione

TITLE=1,17,19-trihydroxy-7-methyl-3-oxapentacyclo[9.8.0.0�,?.0?,�?.0��,�?]nonadeca-5(10),13,15,17-tetraene-9,12-dione

TITLE=1,17,19-trihydroxy-7-methyl-3-oxapentacyclo[9.8.0.0�,?.0?,�?.0��,�?]nonadeca-5(10),13,15,17-tetraene-9,12-dione

TITLE=1,17,19-trihydroxy-7-methyl-3-oxapentacyclo[9.8.0.0�,?.0?,�?.0��,�?]nonadeca-5(10),13,15,17-tetraene-9,12-dione

TITLE=2-[(5S,8R,11S,12S,13S,21R)-13,17,18-trihyd

TITLE=(1R,2R,5S,8R,14R,15R,16S)-16-hydroxy-1,2,14,17,17-pentamethyl-8-(prop-1-en-2-yl)pentacyclo[11.7.0.0�,�?.0?,?.0�?,�?]icosane-5,15-dicarboxylic acid

TITLE=(1R,2R,5S,8R,14R,15R,16S)-16-hydroxy-1,2,14,17,17-pentamethyl-8-(prop-1-en-2-yl)pentacyclo[11.7.0.0�,�?.0?,?.0�?,�?]icosane-5,15-dicarboxylic acid

TITLE=(1R,2R,5S,8R,14R,15R,16S)-16-hydroxy-1,2,14,17,17-pentamethyl-8-(prop-1-en-2-yl)pentacyclo[11.7.0.0�,�?.0?,?.0�?,�?]icosane-5,15-dicarboxylic acid

TITLE=(1R,2R,5S,8R,14R,15R,16S)-16-hydroxy-1,2,14,17,17-pentamethyl-8-(prop-1-en-2-yl)pentacyclo[11.7.0.0�,�?.0?,?.0�?,�?]icosane-5,15-dicarboxylic acid

TITLE=(1R,2R,5S,8R,14R,15R,16S)-16-hydroxy-1,2,14,17,17-pentamethyl-8-(prop-1-en-2-yl)pentacyclo[11.7.0.0�,�?.0?,?.0�?,�?]icosane-5,15-dicarboxylic acid

TITLE=(1R,2R,5S,8R,14R,15R,16S)-16-hydroxy-1,2,14,17,17-pentamethyl-8-(prop-1-en-2-yl)pentacyclo[11.7.0.0�,�?.0?,?.0�?,�?]icosane-5,15-dicarboxylic acid

TITLE=(1R,2R,5S,8R,14R,15R,16S)-16-hydroxy-1,2,14,17,17-pentamethyl-8-(prop-

TITLE=6-(furan-3-yl)-6,8,12,16,21-pentahydroxy-7,15-dimethyl-9-oxo-3,17,19-trioxaheptacyclo[9.9.3.0�,�?.0�,?.0�,?.0��,�?.0�?,��]tricosan-14-yl 2-methylbutanoate

TITLE=6-(furan-3-yl)-6,8,12,16,21-pentahydroxy-7,15-dimethyl-9-oxo-3,17,19-trioxaheptacyclo[9.9.3.0�,�?.0�,?.0�,?.0��,�?.0�?,��]tricosan-14-yl 2-methylbutanoate

TITLE=6-(furan-3-yl)-6,8,12,16,21-pentahydroxy-7,15-dimethyl-9-oxo-3,17,19-trioxaheptacyclo[9.9.3.0�,�?.0�,?.0�,?.0��,�?.0�?,��]tricosan-14-yl 2-methylbutanoate

TITLE=6-(furan-3-yl)-6,8,12,16,21-pentahydroxy-7,15-dimethyl-9-oxo-3,17,19-trioxaheptacyclo[9.9.3.0�,�?.0�,?.0�,?.0��,�?.0�?,��]tricosan-14-yl 2-methylbutanoate

TITLE=6-(furan-3-yl)-6,8,12,16,21-pentahydroxy-7,15-dimethyl-9-oxo-3,17,19-trioxaheptacyclo[9.9.3.0�,�?.0�,?.0�,?.0��,�?.0�?,��]tricosan-14-yl 2-methylbutanoate

TITLE=6-(furan-3-yl)-6,8,12,16,21-pentahydroxy-7,15-dimethyl-9-oxo-3,17,19-trioxaheptacyclo[9.9.3.0�,�?.0�,?.0�,?.0��,�?.0�?,��]tricosan-14-yl 2-methylbutanoate

TITLE=6-(furan-3-yl)-6,8,12,

TITLE=(2S,3R,4R,5R,6S)-2-{[(2R,3R,4S,5R,6R)-4,5-dihydroxy-6-methyl-2-[(2R,4'S,7'S,8'R,9'S,13'R,14'R,16'R)-7',9',13'-trimethyl-5-methylidene-16'-{[(2R,3R,4R,5R,6S)-3,4,5-trihydroxy-6-methyloxan-2-yl]oxy}-5'-oxaspiro[oxane-2,6'-pentacyclo[10.8.0.0�,?.0?,?.0��,�?]icosan]-18'-eneoxy]oxan-3-yl]oxy}-6-methyloxane-3,4,5-triol

TITLE=(2S,3R,4R,5R,6S)-2-{[(2R,3R,4S,5R,6R)-4,5-dihydroxy-6-methyl-2-[(2R,4'S,7'S,8'R,9'S,13'R,14'R,16'R)-7',9',13'-trimethyl-5-methylidene-16'-{[(2R,3R,4R,5R,6S)-3,4,5-trihydroxy-6-methyloxan-2-yl]oxy}-5'-oxaspiro[oxane-2,6'-pentacyclo[10.8.0.0�,?.0?,?.0��,�?]icosan]-18'-eneoxy]oxan-3-yl]oxy}-6-methyloxane-3,4,5-triol

TITLE=(2S,3R,4R,5R,6S)-2-{[(2R,3R,4S,5R,6R)-4,5-dihydroxy-6-methyl-2-[(2R,4'S,7'S,8'R,9'S,13'R,14'R,16'R)-7',9',13'-trimethyl-5-methylidene-16'-{[(2R,3R,4R,5R,6S)-3,4,5-trihydroxy-6-methyloxan-2-yl]oxy}-5'-oxaspiro[oxane-2,6'-pentacyclo[10.8.0.0�,?.0?,?.0��,�?]icosan]-18'-eneoxy]oxan-3-yl]oxy}-6-methyloxane-3,4,5-triol

TITLE=(2S,3R,4R,5R,6S)-2-{[(2R,3R,

TITLE=15'-[(5-{[3,4-dihydroxy-6-(hydroxymethyl)-5-{[3,4,5-trihydroxy-6-(hydroxymethyl)oxan-2-yl]oxy}oxan-2-yl]oxy}-3-hydroxy-4-methoxy-6-methyloxan-2-yl)oxy]-7'-hydroxy-8',12'-dimethyl-6'-oxaspiro[oxolane-3,5'-pentacyclo[9.8.0.0�,?.0?,?.0��,�?]nonadecane]-5-one

TITLE=15'-[(5-{[3,4-dihydroxy-6-(hydroxymethyl)-5-{[3,4,5-trihydroxy-6-(hydroxymethyl)oxan-2-yl]oxy}oxan-2-yl]oxy}-3-hydroxy-4-methoxy-6-methyloxan-2-yl)oxy]-7'-hydroxy-8',12'-dimethyl-6'-oxaspiro[oxolane-3,5'-pentacyclo[9.8.0.0�,?.0?,?.0��,�?]nonadecane]-5-one

TITLE=15'-[(5-{[3,4-dihydroxy-6-(hydroxymethyl)-5-{[3,4,5-trihydroxy-6-(hydroxymethyl)oxan-2-yl]oxy}oxan-2-yl]oxy}-3-hydroxy-4-methoxy-6-methyloxan-2-yl)oxy]-7'-hydroxy-8',12'-dimethyl-6'-oxaspiro[oxolane-3,5'-pentacyclo[9.8.0.0�,?.0?,?.0��,�?]nonadecane]-5-one

TITLE=15'-[(5-{[3,4-dihydroxy-6-(hydroxymethyl)-5-{[3,4,5-trihydroxy-6-(hydroxymethyl)oxan-2-yl]oxy}oxan-2-yl]oxy}-3-hydroxy-4-methoxy-6-methyloxan-2-yl)oxy]-7'-hydroxy-8',12'-dimethyl-6'-oxaspiro[oxolane-3,5'-p

TITLE=[(1R,5R,9S,13S)-5,9,13-trimethyltetracyclo[11.2.1.0�,�?.0?,?]hexadec-14-en-5-yl]methanol

TITLE=[(1R,5R,9S,13S)-5,9,13-trimethyltetracyclo[11.2.1.0�,�?.0?,?]hexadec-14-en-5-yl]methanol

TITLE=[(1R,5R,9S,13S)-5,9,13-trimethyltetracyclo[11.2.1.0�,�?.0?,?]hexadec-14-en-5-yl]methanol

TITLE=[(1R,5R,9S,13S)-5,9,13-trimethyltetracyclo[11.2.1.0�,�?.0?,?]hexadec-14-en-5-yl]methanol

TITLE=(1R,21S,23R)-6,7,8,11,12,13,22,23-octahydroxy-3,16-dioxo-2,17,20-trioxatetracyclo[17.3.1.0?,?.0�?,�?]tricosa-4(9),5,7,10(15),11,13-hexaen-21-yl 3,4,5-trihydroxybenzoate

TITLE=(1R,21S,23R)-6,7,8,11,12,13,22,23-octahydroxy-3,16-dioxo-2,17,20-trioxatetracyclo[17.3.1.0?,?.0�?,�?]tricosa-4(9),5,7,10(15),11,13-hexaen-21-yl 3,4,5-trihydroxybenzoate

TITLE=(1R,21S,23R)-6,7,8,11,12,13,22,23-octahydroxy-3,16-dioxo-2,17,20-trioxatetracyclo[17.3.1.0?,?.0�?,�?]tricosa-4(9),5,7,10(15),11,13-hexaen-21-yl 3,4,5-trihydroxybenzoate

TITLE=(1R,21S,23R)-6,7,8,11,12,13,22,23-octahydroxy-3,16-dioxo-2,17,20-trioxatetracyclo[

TITLE=(5S,9R)-14-(hydroxymethyl)-5,9-dimethyltetracyclo[11.2.1.0�,�?.0?,?]hexadecane-5-carboxylic acid

TITLE=(5S,9R)-14-(hydroxymethyl)-5,9-dimethyltetracyclo[11.2.1.0�,�?.0?,?]hexadecane-5-carboxylic acid

TITLE=(5S,9R)-14-(hydroxymethyl)-5,9-dimethyltetracyclo[11.2.1.0�,�?.0?,?]hexadecane-5-carboxylic acid

TITLE=(5S,9R)-14-(hydroxymethyl)-5,9-dimethyltetracyclo[11.2.1.0�,�?.0?,?]hexadecane-5-carboxylic acid

TITLE=(5S,9R)-14-(hydroxymethyl)-5,9-dimethyltetracyclo[11.2.1.0�,�?.0?,?]hexadecane-5-carboxylic acid

TITLE=(5S,9R)-14-(hydroxymethyl)-5,9-dimethyltetracyclo[11.2.1.0�,�?.0?,?]hexadecane-5-carboxylic acid

TITLE=(15S,16Z,17S)-16-ethylidene-4-hydroxy-15-{[(2S,3R,4S,5S,6R)-3,4,5-trihydroxy-6-(hydroxymethyl)oxan-2-yl]oxy}-2,10,14,20-tetraoxatetracyclo[21.2.2.1�,?.0��,�?]octacosa-1(25),3(28),4,6,12,23,26-heptaene-11,19-dione

TITLE=(15S,16Z,17S)-16-ethylidene-4-hydroxy-15-{[(2S,3R,4S,5S,6R)-3,4,5-trihydroxy-6-(hydroxymethyl)oxan-2-yl]oxy}-2,10,14,20-tetraoxatetracyclo[21.2.2.1�,?

TITLE=methyl (2R)-2-[(1S,3S,7R,8R,9R,12S,13R)-13-(furan-3-yl)-6,6,8,12-tetramethyl-17-methylidene-5,15-dioxo-2,14-dioxatetracyclo[7.7.1.0�,��.0�,?]heptadecan-7-yl]-2-hydroxyacetate

TITLE=methyl (2R)-2-[(1S,3S,7R,8R,9R,12S,13R)-13-(furan-3-yl)-6,6,8,12-tetramethyl-17-methylidene-5,15-dioxo-2,14-dioxatetracyclo[7.7.1.0�,��.0�,?]heptadecan-7-yl]-2-hydroxyacetate

TITLE=methyl (2R)-2-[(1S,3S,7R,8R,9R,12S,13R)-13-(furan-3-yl)-6,6,8,12-tetramethyl-17-methylidene-5,15-dioxo-2,14-dioxatetracyclo[7.7.1.0�,��.0�,?]heptadecan-7-yl]-2-hydroxyacetate

TITLE=methyl (2R)-2-[(1S,3S,7R,8R,9R,12S,13R)-13-(furan-3-yl)-6,6,8,12-tetramethyl-17-methylidene-5,15-dioxo-2,14-dioxatetracyclo[7.7.1.0�,��.0�,?]heptadecan-7-yl]-2-hydroxyacetate

TITLE=methyl (2R)-2-[(1S,3S,7R,8R,9R,12S,13R)-13-(furan-3-yl)-6,6,8,12-tetramethyl-17-methylidene-5,15-dioxo-2,14-dioxatetracyclo[7.7.1.0�,��.0�,?]heptadecan-7-yl]-2-hydroxyacetate

TITLE=methyl (2R)-2-[(1S,3S,7R,8R,9R,12S,13R)-13-(furan-3-yl)-6,6,8,12-tetramethyl-17-meth

TITLE=(1S,8S,9S,10S,13R)-6,9,10-trimethyl-2-oxo-4,14-dioxatetracyclo[7.5.0.0�,��.0�,?]tetradeca-3(7),5-dien-8-yl acetate

TITLE=(1S,8S,9S,10S,13R)-6,9,10-trimethyl-2-oxo-4,14-dioxatetracyclo[7.5.0.0�,��.0�,?]tetradeca-3(7),5-dien-8-yl acetate

TITLE=(1S,8S,9S,10S,13R)-6,9,10-trimethyl-2-oxo-4,14-dioxatetracyclo[7.5.0.0�,��.0�,?]tetradeca-3(7),5-dien-8-yl acetate

TITLE=(1S,8S,9S,10S,13R)-6,9,10-trimethyl-2-oxo-4,14-dioxatetracyclo[7.5.0.0�,��.0�,?]tetradeca-3(7),5-dien-8-yl acetate

TITLE=(1S,8S,9S,10S,13R)-6,9,10-trimethyl-2-oxo-4,14-dioxatetracyclo[7.5.0.0�,��.0�,?]tetradeca-3(7),5-dien-8-yl acetate

TITLE=(1S,8S,9S,10S,13R)-6,9,10-trimethyl-2-oxo-4,14-dioxatetracyclo[7.5.0.0�,��.0�,?]tetradeca-3(7),5-dien-8-yl acetate

TITLE=(1S,8S,9S,10S,13R)-6,9,10-trimethyl-2-oxo-4,14-dioxatetracyclo[7.5.0.0�,��.0�,?]tetradeca-3(7),5-dien-8-yl acetate

TITLE=(1S,8S,9S,10S,13R)-6,9,10-trimethyl-2-oxo-4,14-dioxatetracyclo[7.5.0.0�,��.0�,?]tetradeca-3(7),5-dien-8-yl acetate

TITLE=(1S,8S,9S,10S,13R)

TITLE=(1S,2S,5R,7R,10S,11S,14R,16R,19S,20S,23R,25R)-5,14-diethyl-2,11,20,23-tetramethyl-4,13,22,28,29,30-hexaoxatetracyclo[23.2.1.1?,�?.1�?,�?]triacontane-3,12,21-trione

TITLE=(1S,2S,5R,7R,10S,11S,14R,16R,19S,20S,23R,25R)-5,14-diethyl-2,11,20,23-tetramethyl-4,13,22,28,29,30-hexaoxatetracyclo[23.2.1.1?,�?.1�?,�?]triacontane-3,12,21-trione

TITLE=(1S,2S,5R,7R,10S,11S,14R,16R,19S,20S,23R,25R)-5,14-diethyl-2,11,20,23-tetramethyl-4,13,22,28,29,30-hexaoxatetracyclo[23.2.1.1?,�?.1�?,�?]triacontane-3,12,21-trione

TITLE=(1S,2S,5R,7R,10S,11S,14R,16R,19S,20S,23R,25R)-5,14-diethyl-2,11,20,23-tetramethyl-4,13,22,28,29,30-hexaoxatetracyclo[23.2.1.1?,�?.1�?,�?]triacontane-3,12,21-trione

TITLE=(1S,2S,5R,7R,10S,11S,14R,16R,19S,20S,23R,25R)-5,14-diethyl-2,11,20,23-tetramethyl-4,13,22,28,29,30-hexaoxatetracyclo[23.2.1.1?,�?.1�?,�?]triacontane-3,12,21-trione

TITLE=(1S,2S,5R,7R,10S,11S,14R,16R,19S,20S,23R,25R)-5,14-diethyl-2,11,20,23-tetramethyl-4,13,22,28,29,30-hexaoxatetracyclo[23.2.1.1?,�?.1�?,�?]tr

TITLE=(2S,3R,4R,5S,6S)-3,5-bis(acetyloxy)-2-{[(1S,2S,4S,5S,6R,10S)-2-(hydroxymethyl)-10-{[(2S,3R,4S,5S,6R)-3,4,5-trihydroxy-6-(hydroxymethyl)oxan-2-yl]oxy}-3,9-dioxatricyclo[4.4.0.0�,?]dec-7-en-5-yl]oxy}-6-methyloxan-4-yl (2E)-3-phenylprop-2-enoate

TITLE=(2S,3R,4R,5S,6S)-3,5-bis(acetyloxy)-2-{[(1S,2S,4S,5S,6R,10S)-2-(hydroxymethyl)-10-{[(2S,3R,4S,5S,6R)-3,4,5-trihydroxy-6-(hydroxymethyl)oxan-2-yl]oxy}-3,9-dioxatricyclo[4.4.0.0�,?]dec-7-en-5-yl]oxy}-6-methyloxan-4-yl (2E)-3-phenylprop-2-enoate

TITLE=(2S,3R,4R,5S,6S)-3,5-bis(acetyloxy)-2-{[(1S,2S,4S,5S,6R,10S)-2-(hydroxymethyl)-10-{[(2S,3R,4S,5S,6R)-3,4,5-trihydroxy-6-(hydroxymethyl)oxan-2-yl]oxy}-3,9-dioxatricyclo[4.4.0.0�,?]dec-7-en-5-yl]oxy}-6-methyloxan-4-yl (2E)-3-phenylprop-2-enoate

TITLE=(2S,3R,4R,5S,6S)-3,5-bis(acetyloxy)-2-{[(1S,2S,4S,5S,6R,10S)-2-(hydroxymethyl)-10-{[(2S,3R,4S,5S,6R)-3,4,5-trihydroxy-6-(hydroxymethyl)oxan-2-yl]oxy}-3,9-dioxatricyclo[4.4.0.0�,?]dec-7-en-5-yl]oxy}-6-methyloxan-4-yl (2E)-3-phenylprop-2-enoate



TITLE=(1S,3R,8R,10S,11R,12S,14S,16R)-12-(acetyloxy)-5,11,15,15-tetramethyl-6-oxo-2,7-dioxapentacyclo[8.8.0.0�,�.0?,?.0��,�?]octadec-4-en-14-yl acetate

TITLE=(1S,3R,8R,10S,11R,12S,14S,16R)-12-(acetyloxy)-5,11,15,15-tetramethyl-6-oxo-2,7-dioxapentacyclo[8.8.0.0�,�.0?,?.0��,�?]octadec-4-en-14-yl acetate

TITLE=(1S,3R,8R,10S,11R,12S,14S,16R)-12-(acetyloxy)-5,11,15,15-tetramethyl-6-oxo-2,7-dioxapentacyclo[8.8.0.0�,�.0?,?.0��,�?]octadec-4-en-14-yl acetate

TITLE=(1S,3R,8R,10S,11R,12S,14S,16R)-12-(acetyloxy)-5,11,15,15-tetramethyl-6-oxo-2,7-dioxapentacyclo[8.8.0.0�,�.0?,?.0��,�?]octadec-4-en-14-yl acetate

TITLE=(1S,3R,8R,10S,11R,12S,14S,16R)-12-(acetyloxy)-5,11,15,15-tetramethyl-6-oxo-2,7-dioxapentacyclo[8.8.0.0�,�.0?,?.0��,�?]octadec-4-en-14-yl acetate

TITLE=(1S,3R,8R,10S,11R,12S,14S,16R)-12-(acetyloxy)-5,11,15,15-tetramethyl-6-oxo-2,7-dioxapentacyclo[8.8.0.0�,�.0?,?.0��,�?]octadec-4-en-14-yl acetate

TITLE=(1S,3R,8R,10S,11R,12S,14S,16R)-12-(acetyloxy)-5,11,15,15-tetramethyl-6-oxo-2,7-dio

TITLE=(2S,3R,4R,5R,6S)-2-{[(2R,3R,4S,5S,6R)-4,5-dihydroxy-6-(hydroxymethyl)-2-{5',7,9,13-tetramethyl-5-oxaspiro[pentacyclo[10.8.0.0�,?.0?,?.0��,�?]icosane-6,2'-piperidine]-3-oloxy}oxan-3-yl]oxy}-6-methyloxane-3,4,5-triol

TITLE=(2S,3R,4R,5R,6S)-2-{[(2R,3R,4S,5S,6R)-4,5-dihydroxy-6-(hydroxymethyl)-2-{5',7,9,13-tetramethyl-5-oxaspiro[pentacyclo[10.8.0.0�,?.0?,?.0��,�?]icosane-6,2'-piperidine]-3-oloxy}oxan-3-yl]oxy}-6-methyloxane-3,4,5-triol

TITLE=(2S,3R,4R,5R,6S)-2-{[(2R,3R,4S,5S,6R)-4,5-dihydroxy-6-(hydroxymethyl)-2-{5',7,9,13-tetramethyl-5-oxaspiro[pentacyclo[10.8.0.0�,?.0?,?.0��,�?]icosane-6,2'-piperidine]-3-oloxy}oxan-3-yl]oxy}-6-methyloxane-3,4,5-triol

TITLE=(2S,3R,4R,5R,6S)-2-{[(2R,3R,4S,5S,6R)-4,5-dihydroxy-6-(hydroxymethyl)-2-{5',7,9,13-tetramethyl-5-oxaspiro[pentacyclo[10.8.0.0�,?.0?,?.0��,�?]icosane-6,2'-piperidine]-3-oloxy}oxan-3-yl]oxy}-6-methyloxane-3,4,5-triol

TITLE=(2S,3R,4R,5R,6S)-2-{[(2R,3R,4S,5S,6R)-4,5-dihydroxy-6-(hydroxymethyl)-2-{5',7,9,13-tetramethyl-5-oxaspiro[

TITLE=(1S,4S,5R,9S,10R,13R,14R)-14-hydroxy-5,9-dimethyl-14-{[(3-methylbutanoyl)oxy]methyl}tetracyclo[11.2.1.0�,�?.0?,?]hexadecane-5-carboxylic acid

TITLE=(1S,4S,5R,9S,10R,13R,14R)-14-hydroxy-5,9-dimethyl-14-{[(3-methylbutanoyl)oxy]methyl}tetracyclo[11.2.1.0�,�?.0?,?]hexadecane-5-carboxylic acid

TITLE=(1S,4S,5R,9S,10R,13R,14R)-14-hydroxy-5,9-dimethyl-14-{[(3-methylbutanoyl)oxy]methyl}tetracyclo[11.2.1.0�,�?.0?,?]hexadecane-5-carboxylic acid

TITLE=(1S,4S,5R,9S,10R,13R,14R)-14-hydroxy-5,9-dimethyl-14-{[(3-methylbutanoyl)oxy]methyl}tetracyclo[11.2.1.0�,�?.0?,?]hexadecane-5-carboxylic acid

TITLE=(1S,4S,5R,9S,10R,13R,14R)-14-hydroxy-5,9-dimethyl-14-{[(3-methylbutanoyl)oxy]methyl}tetracyclo[11.2.1.0�,�?.0?,?]hexadecane-5-carboxylic acid

TITLE=(1S,4S,5R,9S,10R,13R,14R)-14-hydroxy-5,9-dimethyl-14-{[(3-methylbutanoyl)oxy]methyl}tetracyclo[11.2.1.0�,�?.0?,?]hexadecane-5-carboxylic acid

TITLE=(1S,4S,5R,9S,10R,13R,14R)-14-hydroxy-5,9-dimethyl-14-{[(3-methylbutanoyl)oxy]methyl}tetracyclo[11.2.

TITLE=(1R,2R,4S,7S,8R,9R,10S,11R,12S,13S,14R,17R,18R,19R)-8-(acetyloxy)-10,19-dihydroxy-1,9,18-trimethyl-15-oxo-16,20-dioxahexacyclo[15.3.2.0�,��.0?,��.0?,��.0�?,�?]docos-5-ene-5-carboxylic acid

TITLE=(1R,2R,4S,7S,8R,9R,10S,11R,12S,13S,14R,17R,18R,19R)-8-(acetyloxy)-10,19-dihydroxy-1,9,18-trimethyl-15-oxo-16,20-dioxahexacyclo[15.3.2.0�,��.0?,��.0?,��.0�?,�?]docos-5-ene-5-carboxylic acid

TITLE=(1R,2R,4S,7S,8R,9R,10S,11R,12S,13S,14R,17R,18R,19R)-8-(acetyloxy)-10,19-dihydroxy-1,9,18-trimethyl-15-oxo-16,20-dioxahexacyclo[15.3.2.0�,��.0?,��.0?,��.0�?,�?]docos-5-ene-5-carboxylic acid

TITLE=(1R,2R,4S,7S,8R,9R,10S,11R,12S,13S,14R,17R,18R,19R)-8-(acetyloxy)-10,19-dihydroxy-1,9,18-trimethyl-15-oxo-16,20-dioxahexacyclo[15.3.2.0�,��.0?,��.0?,��.0�?,�?]docos-5-ene-5-carboxylic acid

TITLE=(1R,2R,4S,7S,8R,9R,10S,11R,12S,13S,14R,17R,18R,19R)-8-(acetyloxy)-10,19-dihydroxy-1,9,18-trimethyl-15-oxo-16,20-dioxahexacyclo[15.3.2.0�,��.0?,��.0?,��.0�?,�?]docos-5-ene-5-carboxylic acid

TITLE=(1R,2R,4S,7S,8

TITLE=(1S,11R,14R)-14-(hydroxymethyl)-3-(3-{[(1R,4R)-4-(hydroxymethyl)-5,7-dimethyl-6,8-dioxo-2,3-dithia-5,7-diazabicyclo[2.2.2]octan-1-yl]methyl}-1H-indol-1-yl)-18-methyl-15,16-dithia-10,12,18-triazapentacyclo[12.2.2.0�,��.0�,��.0?,?]octadeca-4(9),5,7-triene-13,17-dione

TITLE=(1S,11R,14R)-14-(hydroxymethyl)-3-(3-{[(1R,4R)-4-(hydroxymethyl)-5,7-dimethyl-6,8-dioxo-2,3-dithia-5,7-diazabicyclo[2.2.2]octan-1-yl]methyl}-1H-indol-1-yl)-18-methyl-15,16-dithia-10,12,18-triazapentacyclo[12.2.2.0�,��.0�,��.0?,?]octadeca-4(9),5,7-triene-13,17-dione

TITLE=(1S,11R,14R)-14-(hydroxymethyl)-3-(3-{[(1R,4R)-4-(hydroxymethyl)-5,7-dimethyl-6,8-dioxo-2,3-dithia-5,7-diazabicyclo[2.2.2]octan-1-yl]methyl}-1H-indol-1-yl)-18-methyl-15,16-dithia-10,12,18-triazapentacyclo[12.2.2.0�,��.0�,��.0?,?]octadeca-4(9),5,7-triene-13,17-dione

TITLE=(1S,11R,14R)-14-(hydroxymethyl)-3-(3-{[(1R,4R)-4-(hydroxymethyl)-5,7-dimethyl-6,8-dioxo-2,3-dithia-5,7-diazabicyclo[2.2.2]octan-1-yl]methyl}-1H-indol-1-yl)-18-methyl-15,16-dit

TITLE=methyl (1S,5S,9S,10R,15R)-15-(acetyloxy)-8-hydroxy-4,5,7,10,14,14-hexamethyl-6,18-dioxo-19-oxapentacyclo[10.5.2.0�,��.0�,�?.0?,?]nonadeca-3,7-diene-9-carboxylate

TITLE=methyl (1S,5S,9S,10R,15R)-15-(acetyloxy)-8-hydroxy-4,5,7,10,14,14-hexamethyl-6,18-dioxo-19-oxapentacyclo[10.5.2.0�,��.0�,�?.0?,?]nonadeca-3,7-diene-9-carboxylate

TITLE=methyl (1S,5S,9S,10R,15R)-15-(acetyloxy)-8-hydroxy-4,5,7,10,14,14-hexamethyl-6,18-dioxo-19-oxapentacyclo[10.5.2.0�,��.0�,�?.0?,?]nonadeca-3,7-diene-9-carboxylate

TITLE=methyl (1S,5S,9S,10R,15R)-15-(acetyloxy)-8-hydroxy-4,5,7,10,14,14-hexamethyl-6,18-dioxo-19-oxapentacyclo[10.5.2.0�,��.0�,�?.0?,?]nonadeca-3,7-diene-9-carboxylate

TITLE=methyl (1S,5S,9S,10R,15R)-15-(acetyloxy)-8-hydroxy-4,5,7,10,14,14-hexamethyl-6,18-dioxo-19-oxapentacyclo[10.5.2.0�,��.0�,�?.0?,?]nonadeca-3,7-diene-9-carboxylate

TITLE=methyl (1S,5S,9S,10R,15R)-15-(acetyloxy)-8-hydroxy-4,5,7,10,14,14-hexamethyl-6,18-dioxo-19-oxapentacyclo[10.5.2.0�,��.0�,�?.0?,?]nonadeca-3,7-diene-9

TITLE=11-(hydroxymethyl)-2,6,6,14,19-pentamethyl-7,16,18-trioxapentacyclo[12.6.1.0�,��.0?,�?.0�?,��]henicosa-5(10),11-diene-3,8,15,20-tetrone

TITLE=11-(hydroxymethyl)-2,6,6,14,19-pentamethyl-7,16,18-trioxapentacyclo[12.6.1.0�,��.0?,�?.0�?,��]henicosa-5(10),11-diene-3,8,15,20-tetrone

TITLE=11-(hydroxymethyl)-2,6,6,14,19-pentamethyl-7,16,18-trioxapentacyclo[12.6.1.0�,��.0?,�?.0�?,��]henicosa-5(10),11-diene-3,8,15,20-tetrone

TITLE=11-(hydroxymethyl)-2,6,6,14,19-pentamethyl-7,16,18-trioxapentacyclo[12.6.1.0�,��.0?,�?.0�?,��]henicosa-5(10),11-diene-3,8,15,20-tetrone

TITLE=11-(hydroxymethyl)-2,6,6,14,19-pentamethyl-7,16,18-trioxapentacyclo[12.6.1.0�,��.0?,�?.0�?,��]henicosa-5(10),11-diene-3,8,15,20-tetrone

TITLE=11-(hydroxymethyl)-2,6,6,14,19-pentamethyl-7,16,18-trioxapentacyclo[12.6.1.0�,��.0?,�?.0�?,��]henicosa-5(10),11-diene-3,8,15,20-tetrone

TITLE=11-(hydroxymethyl)-2,6,6,14,19-pentamethyl-7,16,18-trioxapentacyclo[12.6.1.0�,��.0?,�?.0�?,��]henicosa-5(10),11-diene-3,8,15,20-tetrone


TITLE=2',4',11-trioxaspiro[tricyclo[4.4.1.0�,?]undecane-2,3'-tricyclo[7.3.1.0?,��]tridecane]-1'(12'),3,5',7',8,9'(13'),10'-heptaene-5,7,10-triol

TITLE=2',4',11-trioxaspiro[tricyclo[4.4.1.0�,?]undecane-2,3'-tricyclo[7.3.1.0?,��]tridecane]-1'(12'),3,5',7',8,9'(13'),10'-heptaene-5,7,10-triol

TITLE=2',4',11-trioxaspiro[tricyclo[4.4.1.0�,?]undecane-2,3'-tricyclo[7.3.1.0?,��]tridecane]-1'(12'),3,5',7',8,9'(13'),10'-heptaene-5,7,10-triol

TITLE=2',4',11-trioxaspiro[tricyclo[4.4.1.0�,?]undecane-2,3'-tricyclo[7.3.1.0?,��]tridecane]-1'(12'),3,5',7',8,9'(13'),10'-heptaene-5,7,10-triol

TITLE=2',4',11-trioxaspiro[tricyclo[4.4.1.0�,?]undecane-2,3'-tricyclo[7.3.1.0?,��]tridecane]-1'(12'),3,5',7',8,9'(13'),10'-heptaene-5,7,10-triol

TITLE=2',4',11-trioxaspiro[tricyclo[4.4.1.0�,?]undecane-2,3'-tricyclo[7.3.1.0?,��]tridecane]-1'(12'),3,5',7',8,9'(13'),10'-heptaene-5,7,10-triol

TITLE=2',4',11-trioxaspiro[tricyclo[4.4.1.0�,?]undecane-2,3'-tricyclo[7.3.1.0?,��]tridecane]-1'(12'),3,5',7',8,9'(13'),10'-h

TITLE=(1'S,2'S,3S,3'S,7'R,8'R,9'S,13'R)-8'-(acetyloxy)-2,2,2',9',13'-pentamethyl-6',16'-dimethylidene-6,11',15'-trioxo-10',14',17'-trioxaspiro[oxane-3,5'-pentacyclo[7.6.1.1?,��.0�,��.0�,?]heptadecane]-3'-yl (2E)-2-methylbut-2-enoate

TITLE=(1'S,2'S,3S,3'S,7'R,8'R,9'S,13'R)-8'-(acetyloxy)-2,2,2',9',13'-pentamethyl-6',16'-dimethylidene-6,11',15'-trioxo-10',14',17'-trioxaspiro[oxane-3,5'-pentacyclo[7.6.1.1?,��.0�,��.0�,?]heptadecane]-3'-yl (2E)-2-methylbut-2-enoate

TITLE=(1'S,2'S,3S,3'S,7'R,8'R,9'S,13'R)-8'-(acetyloxy)-2,2,2',9',13'-pentamethyl-6',16'-dimethylidene-6,11',15'-trioxo-10',14',17'-trioxaspiro[oxane-3,5'-pentacyclo[7.6.1.1?,��.0�,��.0�,?]heptadecane]-3'-yl (2E)-2-methylbut-2-enoate

TITLE=(1'S,2'S,3S,3'S,7'R,8'R,9'S,13'R)-8'-(acetyloxy)-2,2,2',9',13'-pentamethyl-6',16'-dimethylidene-6,11',15'-trioxo-10',14',17'-trioxaspiro[oxane-3,5'-pentacyclo[7.6.1.1?,��.0�,��.0�,?]heptadecane]-3'-yl (2E)-2-methylbut-2-enoate

TITLE=(1'S,2'S,3S,3'S,7'R,8'R,9'S,13'R)-8'-(acetyloxy)-2,2,2',9'

TITLE=14-(hydroxymethyl)-5,9-dimethyltetracyclo[11.2.1.0�,�?.0?,?]hexadecan-5-ol

TITLE=14-(hydroxymethyl)-5,9-dimethyltetracyclo[11.2.1.0�,�?.0?,?]hexadecan-5-ol

TITLE=14-(hydroxymethyl)-5,9-dimethyltetracyclo[11.2.1.0�,�?.0?,?]hexadecan-5-ol

TITLE=14-(hydroxymethyl)-5,9-dimethyltetracyclo[11.2.1.0�,�?.0?,?]hexadecan-5-ol

TITLE=14-(hydroxymethyl)-5,9-dimethyltetracyclo[11.2.1.0�,�?.0?,?]hexadecan-5-ol

TITLE=14-(hydroxymethyl)-5,9-dimethyltetracyclo[11.2.1.0�,�?.0?,?]hexadecan-5-ol

TITLE=14-(hydroxymethyl)-5,9-dimethyltetracyclo[11.2.1.0�,�?.0?,?]hexadecan-5-ol

TITLE=14-(hydroxymethyl)-5,9-dimethyltetracyclo[11.2.1.0�,�?.0?,?]hexadecan-5-ol

TITLE=14-(hydroxymethyl)-5,9-dimethyltetracyclo[11.2.1.0�,�?.0?,?]hexadecan-5-ol

TITLE=14-(hydroxymethyl)-5,9-dimethyltetracyclo[11.2.1.0�,�?.0?,?]hexadecan-5-ol

TITLE=14-(hydroxymethyl)-5,9-dimethyltetracyclo[11.2.1.0�,�?.0?,?]hexadecan-5-ol

TITLE=14-(hydroxymethyl)-5,9-dimethyltetracyclo[11.2.1.0�,�?.0?,?]hexadecan-5-ol

TITLE=14-(hydrox

TITLE=(2R,3R,6R,8R,9S,12S,13R,14R,15R,16R)-6,8,14,15-tetrahydroxy-2,6,13,16-tetramethyl-3-{[(2R,3R,4S,5S,6R)-3,4,5-trihydroxy-6-(hydroxymethyl)oxan-2-yl]oxy}-10-oxatetracyclo[7.6.1.0�,?.0��,�?]hexadec-4-en-11-one

TITLE=(2R,3R,6R,8R,9S,12S,13R,14R,15R,16R)-6,8,14,15-tetrahydroxy-2,6,13,16-tetramethyl-3-{[(2R,3R,4S,5S,6R)-3,4,5-trihydroxy-6-(hydroxymethyl)oxan-2-yl]oxy}-10-oxatetracyclo[7.6.1.0�,?.0��,�?]hexadec-4-en-11-one

TITLE=(2R,3R,6R,8R,9S,12S,13R,14R,15R,16R)-6,8,14,15-tetrahydroxy-2,6,13,16-tetramethyl-3-{[(2R,3R,4S,5S,6R)-3,4,5-trihydroxy-6-(hydroxymethyl)oxan-2-yl]oxy}-10-oxatetracyclo[7.6.1.0�,?.0��,�?]hexadec-4-en-11-one

TITLE=(2R,3R,6R,8R,9S,12S,13R,14R,15R,16R)-6,8,14,15-tetrahydroxy-2,6,13,16-tetramethyl-3-{[(2R,3R,4S,5S,6R)-3,4,5-trihydroxy-6-(hydroxymethyl)oxan-2-yl]oxy}-10-oxatetracyclo[7.6.1.0�,?.0��,�?]hexadec-4-en-11-one

TITLE=(2R,3R,6R,8R,9S,12S,13R,14R,15R,16R)-6,8,14,15-tetrahydroxy-2,6,13,16-tetramethyl-3-{[(2R,3R,4S,5S,6R)-3,4,5-trihydroxy-6-(hydroxymethyl)o

TITLE=(1R,4S,5R,9R,10R,13S)-13-{[(2S,3R,4S,5R,6R)-5-hydroxy-6-(hydroxymethyl)-3,4-bis({[(2S,3R,4S,5S,6R)-3,4,5-trihydroxy-6-(hydroxymethyl)oxan-2-yl]oxy})oxan-2-yl]oxy}-5,9-dimethyl-14-methylidenetetracyclo[11.2.1.0�,�?.0?,?]hexadecane-5-carboxylic acid

TITLE=(1R,4S,5R,9R,10R,13S)-13-{[(2S,3R,4S,5R,6R)-5-hydroxy-6-(hydroxymethyl)-3,4-bis({[(2S,3R,4S,5S,6R)-3,4,5-trihydroxy-6-(hydroxymethyl)oxan-2-yl]oxy})oxan-2-yl]oxy}-5,9-dimethyl-14-methylidenetetracyclo[11.2.1.0�,�?.0?,?]hexadecane-5-carboxylic acid

TITLE=(1R,4S,5R,9R,10R,13S)-13-{[(2S,3R,4S,5R,6R)-5-hydroxy-6-(hydroxymethyl)-3,4-bis({[(2S,3R,4S,5S,6R)-3,4,5-trihydroxy-6-(hydroxymethyl)oxan-2-yl]oxy})oxan-2-yl]oxy}-5,9-dimethyl-14-methylidenetetracyclo[11.2.1.0�,�?.0?,?]hexadecane-5-carboxylic acid

TITLE=(1R,4S,5R,9R,10R,13S)-13-{[(2S,3R,4S,5R,6R)-5-hydroxy-6-(hydroxymethyl)-3,4-bis({[(2S,3R,4S,5S,6R)-3,4,5-trihydroxy-6-(hydroxymethyl)oxan-2-yl]oxy})oxan-2-yl]oxy}-5,9-dimethyl-14-methylidenetetracyclo[11.2.1.0�,�?.0?,?]hexadecane

TITLE=(2S,3R,4R,5R,6S)-2-[(4-hydroxy-6-{[(4S,6R,7S,8R,9S,13S,16S)-6-hydroxy-7,9,13-trimethyl-6-[(3R)-3-methyl-4-{[(2R,3R,4S,5S,6R)-3,4,5-trihydroxy-6-(hydroxymethyl)oxan-2-yl]oxy}butyl]-5-oxapentacyclo[10.8.0.0�,?.0?,?.0��,�?]icosan-16-yl]oxy}-2-(hydroxymethyl)-5-{[3,4,5-trihydroxy-6-(hydroxymethyl)oxan-2-yl]oxy}oxan-3-yl)oxy]-6-methyloxane-3,4,5-triol

TITLE=(2S,3R,4R,5R,6S)-2-[(4-hydroxy-6-{[(4S,6R,7S,8R,9S,13S,16S)-6-hydroxy-7,9,13-trimethyl-6-[(3R)-3-methyl-4-{[(2R,3R,4S,5S,6R)-3,4,5-trihydroxy-6-(hydroxymethyl)oxan-2-yl]oxy}butyl]-5-oxapentacyclo[10.8.0.0�,?.0?,?.0��,�?]icosan-16-yl]oxy}-2-(hydroxymethyl)-5-{[3,4,5-trihydroxy-6-(hydroxymethyl)oxan-2-yl]oxy}oxan-3-yl)oxy]-6-methyloxane-3,4,5-triol

TITLE=(2S,3R,4R,5R,6S)-2-[(4-hydroxy-6-{[(4S,6R,7S,8R,9S,13S,16S)-6-hydroxy-7,9,13-trimethyl-6-[(3R)-3-methyl-4-{[(2R,3R,4S,5S,6R)-3,4,5-trihydroxy-6-(hydroxymethyl)oxan-2-yl]oxy}butyl]-5-oxapentacyclo[10.8.0.0�,?.0?,?.0��,�?]icosan-16-yl]oxy}-2-(hydroxymethyl)-5-{[3,4,5-trihydroxy-6-(hy

TITLE=(2S,3R,4R,5R,6S)-2-{[(2R,3S,4R,5R,6S)-6-{[(2R,3R,4S,5S,6R)-4,5-dihydroxy-6-(hydroxymethyl)-2-[(1'S,2S,4'S,5S,7'R,9'S,13'R,16'S)-5,7',9',13'-tetramethyl-5-({[(2R,3R,4S,5S,6R)-3,4,5-trihydroxy-6-(hydroxymethyl)oxan-2-yl]oxy}methyl)-5'-oxaspiro[oxolane-2,6'-pentacyclo[10.8.0.0�,?.0?,?.0��,�?]icosan]-18'-eneoxy]oxan-3-yl]oxy}-4,5-dihydroxy-2-(hydroxymethyl)oxan-3-yl]oxy}-6-methyloxane-3,4,5-triol

TITLE=(2S,3R,4R,5R,6S)-2-{[(2R,3S,4R,5R,6S)-6-{[(2R,3R,4S,5S,6R)-4,5-dihydroxy-6-(hydroxymethyl)-2-[(1'S,2S,4'S,5S,7'R,9'S,13'R,16'S)-5,7',9',13'-tetramethyl-5-({[(2R,3R,4S,5S,6R)-3,4,5-trihydroxy-6-(hydroxymethyl)oxan-2-yl]oxy}methyl)-5'-oxaspiro[oxolane-2,6'-pentacyclo[10.8.0.0�,?.0?,?.0��,�?]icosan]-18'-eneoxy]oxan-3-yl]oxy}-4,5-dihydroxy-2-(hydroxymethyl)oxan-3-yl]oxy}-6-methyloxane-3,4,5-triol

TITLE=(2S,3R,4R,5R,6S)-2-{[(2R,3S,4R,5R,6S)-6-{[(2R,3R,4S,5S,6R)-4,5-dihydroxy-6-(hydroxymethyl)-2-[(1'S,2S,4'S,5S,7'R,9'S,13'R,16'S)-5,7',9',13'-tetramethyl-5-({[(2R,3R,4S,5S,6R)-3,4,5-trihydro

TITLE=(1S,2R,4S,9R,10R,14S,15S,17S)-9-(furan-3-yl)-1-hydroxy-15-[(1R)-1-hydroxy-2-methoxy-2-oxoethyl]-10,14,16,16-tetramethyl-7,18-dioxo-3,8-dioxapentacyclo[12.3.1.0�,?.0?,��.0?,�?]octadecan-17-yl propanoate

TITLE=(1S,2R,4S,9R,10R,14S,15S,17S)-9-(furan-3-yl)-1-hydroxy-15-[(1R)-1-hydroxy-2-methoxy-2-oxoethyl]-10,14,16,16-tetramethyl-7,18-dioxo-3,8-dioxapentacyclo[12.3.1.0�,?.0?,��.0?,�?]octadecan-17-yl propanoate

TITLE=(1S,2R,4S,9R,10R,14S,15S,17S)-9-(furan-3-yl)-1-hydroxy-15-[(1R)-1-hydroxy-2-methoxy-2-oxoethyl]-10,14,16,16-tetramethyl-7,18-dioxo-3,8-dioxapentacyclo[12.3.1.0�,?.0?,��.0?,�?]octadecan-17-yl propanoate

TITLE=(1S,2R,4S,9R,10R,14S,15S,17S)-9-(furan-3-yl)-1-hydroxy-15-[(1R)-1-hydroxy-2-methoxy-2-oxoethyl]-10,14,16,16-tetramethyl-7,18-dioxo-3,8-dioxapentacyclo[12.3.1.0�,?.0?,��.0?,�?]octadecan-17-yl 2-methylpropanoate

TITLE=(1S,2R,4S,9R,10R,14S,15S,17S)-9-(furan-3-yl)-1-hydroxy-15-[(1R)-1-hydroxy-2-methoxy-2-oxoethyl]-10,14,16,16-tetramethyl-7,18-dioxo-3,8-dioxapentacyclo

## Replace charge values of "-1+" with "-1"

In [64]:
input_file = PATH_MS_DATA + 'all_gnps_inchi_added_191023.mgf'
output_file = open(ROOT + "\\Data\\GNPS_all\\" + "MS_data_allGNPS_191023_corrected.mgf", 'w')

with open(input_file, 'r') as file:
    for line in file:
        output_file.write(line.replace('-1+', '-1'))

In [65]:
output_file.close()