In [1]:
import os
import pandas as pd
import json
from extractor.gnps import GnpsAnnotationsFile
from extractor.gnps import GnpsCacher
from extractor.gnps import GnpsParametersFile
from extractor.gnps import GnpsInchiScore
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem import Descriptors
from rdkit.Chem.Draw import rdMolDraw2D
from rdkit.Avalon import pyAvalonTools
from rdkit.Chem.Draw import rdDepictor

In [2]:
os.chdir("..")


In [3]:
hl_from_inchi = Chem.inchi.MolFromInchi(
    "InChI=1S/C15H24N2O2/c18-12-4-5-16-8-10-6-11(14(16)7-12)9-17-13(10)2-1-3-15(17)19/h10-14,18H,1-9H2/t10-,11-,12-,13+,14-/m0/s1"
)
# Draw.MolToImage(hl_from_inchi).save("hl_from_inchi.png")
canon_smiles = pyAvalonTools.GetCanonSmiles(hl_from_inchi)
hl_from_canon_smiles = Chem.MolFromSmiles(canon_smiles)
# Draw.MolToImage(hl_from_canon_smiles).save("hl_from_canon_smiles.png")
hl_from_smiles = Chem.MolFromSmiles("[C@@H]12CN3CC[C@H](O)C[C@H]3[C@H](CN3C(=O)CCC[C@H]23)C1")
# Draw.MolToImage(hl_from_smiles).save("hl_from_smiles.png")

d = rdMolDraw2D.MolDraw2DSVG(500, 500)
s = rdMolDraw2D.PrepareAndDrawMolecule(d, hl_from_inchi)
d.FinishDrawing()
t = d.GetDrawingText()
# with open("hl_from_inchi.svg", "w") as f:
# f.write(t)

t = Draw.MolToACS1996SVG(hl_from_inchi)
# with open("hl_from_inchi_acs1996.svg", "w") as f:
# f.write(t)

# Draw.MolToSVG(hl_from_inchi)
# ValueError: Bad Conformer Id
# Attempt as per https://github.com/rdkit/rdkit/issues/4991
# rdDepictor.Compute2DCoords(hl_from_inchi)
# Draw.MolToSVG(hl_from_inchi)
# ValueError: Bad Conformer Id

compounds_file = "../Manufactured case/Compounds.tsv"
compounds = pd.read_csv(compounds_file, sep="\t").set_index("Id")
# compounds["Rdkit_molecular_weight"] = compounds.loc[compounds.index <= 1, ["InChI"]].apply(
#     lambda i: Descriptors.HeavyAtomMolWt(Chem.inchi.MolFromInchi(i)), axis=1
# )
inchs = compounds.loc[compounds["Exact mass"].notna(), "InChI"]
compounds["Relative molecular weight"] = inchs.apply(lambda i: Descriptors.MolWt(Chem.inchi.MolFromInchi(i)))
compounds["Difference"] = compounds["Relative molecular weight"] - compounds["Exact mass"]

In [4]:
pd.set_option('display.max_columns', None) 
pd.set_option('display.max_rows', None)
compounds[compounds["Difference"] > 0.3]

Unnamed: 0_level_0,Old id,Reported,Chemical name,Chemical class,InChI,Exact mass,Relative molecular weight,Difference
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
7,17,1,Ancistroealaine A,Naphthalene-Isoquinoline alkaloid,InChI=1S/C26H29NO4/c1-14-10-18-17(8-9-20(28-4)...,419.209,419.521,0.312
10,21,1,Baleabuxidine,Cycloartane-type triterpene,InChI=1S/C30H50N2O4/c1-17(2)25(36)31-22-11-12-...,502.3771,502.74,0.3629
23,36,1,Cycloamanide A,Cyclic peptide,InChI=1S/C33H42N6O6/c1-20(2)28-33(45)37-25(18-...,618.316,618.735,0.419
72,87,1,Popisonine,Bisbenzylisoquinoline alkaloid,InChI=1S/C37H42N2O6/c1-38-14-12-25-19-32(40)35...,610.3,610.751,0.451


In [5]:
import os

new_dir = '/path/to/new/directory'
