# Preparation of Ligands

### Converting to SDF

In [2]:
import os
import pandas as pd
import re
from rdkit import Chem
from openbabel import pybel

In [4]:
def read_csv(file_path):
    try:
        df = pd.read_csv(file_path, encoding="utf-8")
    except UnicodeDecodeError:
        df = pd.read_csv(file_path, encoding="latin1")
    df["SMILES"] = df["SMILES"].astype(str).str.strip()
    return df

In [5]:
def clean_smiles(smiles):
    smiles = re.sub(r'[^\x00-\x7F]+', '', smiles)  # Remove corrupt characters
    smiles = smiles.replace("\\", "")  # Fix incorrect escape sequences
    return smiles.strip()

In [6]:
def is_valid_smiles(smiles):
    mol = Chem.MolFromSmiles(smiles)
    return mol is not None

In [7]:
def convert_to_sdf(df, output_folder="ligands"):
    os.makedirs(output_folder, exist_ok=True)
    
    for idx, row in df[df["Valid"] == True].iterrows():
        name = row["Name"].replace(" ", "_")
        smiles = row["SMILES"]
        
        try:
            mol = pybel.readstring("smi", smiles)
            mol.make3D(forcefield="uff")
            mol.localopt(forcefield="uff", steps=200)
            mol.write("sdf", f"{output_folder}/{name}.sdf", overwrite=True)
            print(f" Saved: {name}.sdf")
        except Exception as e:
            print(f" Error processing {name}: {e}")

In [8]:
file_path = "compounds.csv"
df = read_csv(file_path)
df["SMILES"] = df["SMILES"].apply(clean_smiles)
df["Valid"] = df["SMILES"].apply(is_valid_smiles)

invalid_smiles = df[df["Valid"] == False]
if not invalid_smiles.empty:
    print("The following SMILES are invalid:")
    print(invalid_smiles[["Name", "SMILES"]])

convert_to_sdf(df)

 Saved: Protodioscin.sdf
 Saved: Prototribestin.sdf
 Saved: Dioscin.sdf
 Saved: Tribulosin.sdf
 Saved: Rutin.sdf
 Saved: kaempferol.sdf
 Saved: kaempferol-3-glucoside.sdf
 Saved: Kaempferol-3-rutinoside.sdf
 Saved: Tribuloside.sdf
 Saved: quercetin.sdf
 Saved: Isorhamnetin.sdf
 Saved: Flavonols.sdf
 Saved: Terrestribisamide.sdf
 Saved: Pseudoprotodioscin.sdf
 Saved: Quercetin_3-gentiobioside.sdf
 Saved: pyrrolidine-2,5-dione.sdf
 Saved: Terrestribisamide.sdf
 Saved: Tribulusterine.sdf
 Saved: (25R)-Spirost-4-ene-3,12-dione.sdf
 Saved: Terrestrosin_K.sdf
 Saved: Terrestrosin_G.sdf
 Saved: Tribulusamide_B.sdf
 Saved: terrestrosin_d.sdf
 Saved: ferulic_acid.sdf
 Saved: Vanillin.sdf
