# smiles to pdbqt

In [6]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import MolToPDBBlock
import os
import traceback

csv_path = r"C:\Users\ichan\Desktop\data\train.csv"
output_folder = r"C:\Users\ichan\AppData\Roaming\JetBrains\DataSpell2025.1\projects\workspace\output_folder\output.csv"

df = pd.read_csv(csv_path)

os.makedirs(output_folder, exist_ok=True)

log_file = os.path.join(output_folder, "error_log.txt")

success_df = pd.DataFrame(columns=df.columns)

with open(log_file, "w") as log:
    for index, row in df.iterrows():
        smiles = row["Canonical_Smiles"]
        compound_name = row["ID"]

        try:
            if pd.isna(smiles) or not Chem.MolFromSmiles(smiles):
                raise ValueError(f"Invalid SMILES in row {index + 2}")

            mol = Chem.MolFromSmiles(smiles)
            mol = Chem.AddHs(mol)
            AllChem.EmbedMolecule(mol)

            pdbqt_filename = os.path.join(output_folder, f"{compound_name}.pdbqt")
            with open(pdbqt_filename, "w") as pdbqt_file:
                pdbqt_file.write(MolToPDBBlock(mol))


            success_df = success_df.append(row, ignore_index=True)

        except Exception as e:
            log.write(f"Error processing row {index + 2}: {str(e)}\n")
            traceback.print_exc(file=log)


success_df.to_csv(csv_path, index=False)

print("Conversion complete. Check the error log for details.")


ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject