In [3]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import PandasTools

def sdf_to_csv(sdf_file, output_file, delimiter=','):
    """
    Convert an SDF file to a CSV file
    """
    # Convert to a pandas DataFrame
    df = PandasTools.LoadSDF(sdf_file)

    #Clean up and sanitize molecules; store SMILES info for failed sanitization
    failed_smiles = []
    rows_to_drop = []

    for index, row in df.iterrows():
        mol = row["ROMol"]
        try:
            Chem.SanitizeMol(mol)
        except:
            failed_smiles.append(Chem.MolToSmiles(mol))
            rows_to_drop.append(index)
            print(f"Failed to sanitize mol at index {index}")

    # Drop the rows_to_drop, ID column since its empty and ROMol column since its not needed
    df = df.drop(rows_to_drop, axis=0)
    df = df.drop('ID', axis=1)
    df = df.drop('ROMol', axis=1)

    # Save the DataFrame as CSV, index is not necessary here
    df.to_csv(output_file, sep=delimiter, index=False)

    print(f"Conversion complete! Output file: {output_file}")
    print(f"The SMILES that failed to sanitize are: {failed_smiles}")

#Generate .csv for pandas handling
sdf_file = 'Enamine_Rush-Delivery_Building_Blocks-US_258685cmpd_20250409.sdf'
csv_file = 'Enamine_BBs.csv'

sdf_to_csv(sdf_file, csv_file)
output = pd.read_csv("Enamine_BBs.csv")

#Check that the document was converted appropriately and there's available data
print(len(output))
print(output.head())

[22:57:09] Explicit valence for atom # 35 N greater than permitted
[22:57:09] ERROR: Could not sanitize molecule ending on line 5797057
[22:57:09] ERROR: Explicit valence for atom # 35 N greater than permitted


Conversion complete! Output file: Enamine_BBs.csv
The SMILES that failed to sanitize are: []
258684
    Catalog_ID                                         IUPAC Name  \
0  EN300-07843  2-(2-ethoxy-4-formylphenoxy)-N,N-dimethylpropa...   
1  EN300-04783  4-[2-(3,4-dichlorophenyl)-2-oxoethoxy]-3-metho...   
2  EN300-04785  2-[(1-methyl-1H-1,3-benzodiazol-2-yl)amino]eth...   
3  EN300-56168                     4-chloro-2-methoxybenzoic acid   
4  EN300-04309  3-[4-(acetyloxy)phenyl]-2-(1,3-dioxo-2,3-dihyd...   

           CAS  purity     MDLNUMBER   LogP  \
0  733030-83-6    95.0  MFCD04635872  1.479   
1  723333-45-7    95.0  MFCD03960564  3.816   
2   57262-39-2    95.0  MFCD01136088  1.339   
3   57479-70-6    95.0  MFCD00002532  2.346   
4  345585-35-5    95.0  MFCD03950738  1.957   

                                                URL avail_US_100mg  \
0  https://www.enaminestore.com/catalog/EN300-07843              X   
1  https://www.enaminestore.com/catalog/EN300-04783           

  output = pd.read_csv("Enamine_BBs.csv")
