In [2]:
from molmap import loadmap

import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
from rdkit import Chem
from rdkit.Chem.Draw import IPythonConsole
#IPythonConsole.ipython_useSVG = True
import numpy as np
import pandas as pd
from tqdm import tqdm
from collections import defaultdict

In [3]:
def sort_dict(x):
    return {k: v for k, v in sorted(x.items(), key=lambda item: item[1], reverse=True)}

def count_place(inh, parameter):
    count = {}
    for s in inh[parameter].unique():
        count[s] = sum(inh[parameter]==s)
    count = sort_dict(count)
    return count

In [21]:
df = pd.read_csv('./source/excipients.csv',encoding='utf-8')

In [22]:
df['drug'] = df['Name']
df = df[['drug','smiles']]

In [23]:
from molmap import loadmap

import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns

from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem.MolStandardize import rdMolStandardize
from rdkit import RDLogger
#IPythonConsole.ipython_useSVG = True
import numpy as np
import pandas as pd
from tqdm import tqdm
from openbabel import pybel
RDLogger.DisableLog('rdApp.*')
def clean_and_standardize(smiles,ph=7.4,iso=False):
    try:
        # Convert SMILES to RDKit molecule
        mol = Chem.MolFromSmiles(smiles)
        
        # Skip invalid molecules
        if mol is None:
            return None,None

        # Canonicalize the SMILES
        # canonical_smiles = Chem.MolToSmiles(mol, isomericSmiles=True, canonical=True)

        # Remove salts and other fragments / Keep only the largest fragment
        fragments = Chem.GetMolFrags(mol, asMols=True)
        largest_fragment = max(fragments, default=None, key=lambda m: m.GetNumAtoms())
        if largest_fragment is None:
            return None,None
        
        u = rdMolStandardize.Uncharger()
        uncharge_mol = u.uncharge(largest_fragment)
        uncharge_smiles = Chem.MolToSmiles(uncharge_mol, isomericSmiles=iso, canonical=True)
        
        ob_mol = pybel.readstring("smi", Chem.MolToSmiles(largest_fragment, isomericSmiles=iso, canonical=True))
        
        ob_mol.OBMol.AddHydrogens(False, True, ph)

        # Convert back to SMILES
        adjusted_smiles = ob_mol.write("smi").strip()

        return adjusted_smiles, uncharge_smiles
    
    except Exception as e:
        print(f"Error processing SMILES {smiles}: {e}")
        return None,None

In [24]:
df['smiles'] = [ clean_and_standardize(s)[1] for s in df['smiles'] ]

In [25]:
df = df.dropna()[['drug','smiles']]

In [27]:
df.to_csv('excipients.csv',index=False)

In [26]:
df

Unnamed: 0,drug,smiles
0,Tween 20,CCCCCCCCCCCC(=O)OCC(C)OC1OC(C(O)OCCOCCOCCOCCOC...
1,Tween 40,CCCCCCCCCCCCCCCCC(=O)OCC(C)OC1OC(C(O)OCCOCCOCC...
2,Tween 60,CCCCCCCCCCCCCCCCCCC(=O)OCC(C)OC1OC(C(O)OCCOCCO...
3,Tween 80,CCCCCCCCC=CCCCCCCC(=O)OCC(C)OC1OC(C(O)OCCOCCOC...
4,Span 20,CCCCCCCCCCCC(=O)OC1OCC(O)C(O)C1O
5,Span 40,CCCCCCCCCCCCCCCCC(=O)OC1OCC(O)C(O)C1O
6,Span 60,CCCCCCCCCCCCCCCCCCC(=O)OC1OCC(O)C(O)C1O
7,Span 80,CCCCCCCCC=CCCCCCCC(=O)OC1OCC(O)C(O)C1O
8,Sucrose Monolaurate,CCCCCCCCCCCC(=O)OC1C(CO)OC(OC2OC(CO)C(O)C(O)C2...
9,Sucrose Monopalmitate,CCCCCCCCCCCCCCCCC(=O)OC1C(CO)OC(OC2OC(CO)C(O)C...


In [4]:
df = pd.read_csv('./source/competitive.csv',encoding='utf-8')

In [5]:
df = df[['drug','smiles']]

In [11]:
df[[ s in ['Nicardipine','Tariquidar','Elacridar'] for s in df['drug'] ]  ]

Unnamed: 0,drug,smiles
2,Nicardipine,CC1=C(C(C(=C(N1)C)C(=O)OCCN(C)CC2=CC=CC=C2)C3=...
7,Tariquidar,COC1=C(C=C2CN(CCC2=C1)CCC3=CC=C(C=C3)NC(=O)C4=...
12,Elacridar,COC1=CC=CC2=C1NC1=C(C=CC=C1C(=O)NC1=CC=C(CCN3C...


In [8]:
sub = df[ [ s not in ['Doxorubicin','Daunorubicin','Elacridar','Tariquidar','Nicardipine','Kaempferide','Andrographolide'] for s in df['drug'] ] ]

In [9]:
sub

Unnamed: 0,drug,smiles
0,Morphine,CN1CC[C@]23[C@@H]4[C@H]1CC5=C2C(=C(C=C5)O)O[C@...
1,Vinblastine,CC[C@@]1(C[C@H]2C[C@@](C3=C(CCN(C2)C1)C4=CC=CC...
3,Hoechst_33342,CCOC1=CC=C(C=C1)C2=NC3=C(N2)C=C(C=C3)C4=NC5=C(...
4,Paclitaxel,CC1=C2[C@H](C(=O)[C@@]3([C@H](C[C@@H]4[C@]([C@...
5,Rhodamine_123,COC(=O)C1=CC=CC=C1C2=C3C=CC(=[NH2+])C=C3OC4=C2...
6,Verapamil,CC(C)C(CCCN(C)CCC1=CC(=C(C=C1)OC)OC)(C#N)C2=CC...
9,Valproic_acid,CCCC(CCC)C(=O)O
11,Colchicine,COC1=CC2=C(C(OC)=C1OC)C1=CC=C(OC)C(=O)C=C1[C@H...


In [None]:
inh = 

In [41]:
df = df[ [ s not in ['Kaempferide','Andrographolide'] for s in df['drug'] ] ]

In [42]:
df['smiles'] = [ clean_and_standardize(s)[1] for s in df['smiles'] ]

In [44]:
df.to_csv('competitive.csv',index=False)