In [1]:
from padelpy import from_smiles

In [2]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# padelpy/functions.py
# v.0.1.10
# Developed in 2021 by Travis Kessler <travis.j.kessler@gmail.com>
#
# Contains various functions commonly used with PaDEL-Descriptor
#

import warnings
import random
# stdlib. imports
from collections import OrderedDict
from csv import DictReader
from datetime import datetime
from os import remove
from re import IGNORECASE, compile
from time import sleep

# PaDELPy imports
from padelpy import padeldescriptor

__all__ = [
    "from_mdl",
    "from_smiles",
    "from_sdf",
]


def from_smiles(smiles,
                output_csv: str = None,
                descriptors: bool = True,
                fingerprints: bool = False,
                timeout: int = 60,
                maxruntime: int = -1,
                threads: int = -1
                ) -> OrderedDict:
    """ from_smiles: converts SMILES string to QSPR descriptors/fingerprints.

    Args:
        smiles (str, list): SMILES string for a given molecule, or a list of
            SMILES strings
        output_csv (str): if supplied, saves descriptors to this CSV file
        descriptors (bool): if `True`, calculates descriptors
        fingerprints (bool): if `True`, calculates fingerprints
        timeout (int): maximum time, in seconds, for conversion
        maxruntime (int): maximum running time per molecule in seconds. default=-1.
        threads (int): number of threads to use; defaults to -1 for max available

    Returns:
        list or OrderedDict: if multiple SMILES strings provided, returns a
            list of OrderedDicts, else single OrderedDict; each OrderedDict
            contains labels and values for each descriptor generated for each
            supplied molecule
    """
    # unit conversion for maximum running time per molecule
    # seconds -> milliseconds
    if maxruntime != -1:
        maxruntime = maxruntime * 1000

    timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f")#[:-3]
    filename = timestamp + str(random.randint(1e8,1e9))

    with open("{}.smi".format(filename), "w") as smi_file:
        if type(smiles) == str:
            smi_file.write(smiles)
        elif type(smiles) == list:
            smi_file.write("\n".join(smiles))
        else:
            raise RuntimeError("Unknown input format for `smiles`: {}".format(
                type(smiles)
            ))
    smi_file.close()

    save_csv = True
    if output_csv is None:
        save_csv = False
        output_csv = "{}.csv".format(timestamp)

    for attempt in range(3):
        try:
            padeldescriptor(
                mol_dir="{}.smi".format(filename),
                d_file=output_csv,
                convert3d=True,
                retain3d=True,
                d_2d=descriptors,
                d_3d=descriptors,
                fingerprints=fingerprints,
                sp_timeout=timeout,
                retainorder=True,
                maxruntime=maxruntime,
                threads=threads
            )
            break
        except RuntimeError as exception:
            if attempt == 2:
                remove("{}.smi".format(filename))
                if not save_csv:
                    sleep(0.5)
                    try:
                        remove(output_csv)
                    except FileNotFoundError as e:
                        warnings.warn(e, RuntimeWarning)
                raise RuntimeError(exception)
            else:
                continue
        except KeyboardInterrupt as kb_exception:
            remove("{}.smi".format(filename))
            if not save_csv:
                try:
                    remove(output_csv)
                except FileNotFoundError as e:
                    warnings.warn(e, RuntimeWarning)
            raise kb_exception

    with open(output_csv, "r", encoding="utf-8") as desc_file:
        reader = DictReader(desc_file)
        rows = [row for row in reader]
    desc_file.close()

    remove("{}.smi".format(filename))
    if not save_csv:
        remove(output_csv)

    if type(smiles) == list and len(rows) != len(smiles):
        raise RuntimeError("PaDEL-Descriptor failed on one or more mols." +
                           " Ensure the input structures are correct.")
    elif type(smiles) == str and len(rows) == 0:
        raise RuntimeError(
            "PaDEL-Descriptor failed on {}.".format(smiles) +
            " Ensure input structure is correct."
        )

    for idx, r in enumerate(rows):
        if len(r) == 0:
            raise RuntimeError(
                "PaDEL-Descriptor failed on {}.".format(smiles[idx]) +
                " Ensure input structure is correct."
            )

    for idx in range(len(rows)):
        del rows[idx]["Name"]

    if type(smiles) == str:
        return rows[0]
    return rows



the below one is the code which worked to calculate 1800 2D descriptors and 800 finger prints from PADELPY library in python where input files will be string of canonical SMILES ID's. 3D descriptors wont be downloaded because of the type of input (SMILES) hence for 3D descriptors another code was written in the below cell.

In [None]:

import pandas as pd
from padelpy.functions import from_smiles

# Load the Excel file
excel_data = pd.read_excel('df.xlsx', sheet_name='Sheet1')

# Extract SMILES data from a specific column (e.g., 'SMILES')
smiles_data_series = excel_data['SMILES']

# Convert the Pandas Series to a list of SMILES strings
smiles_data_list = smiles_data_series.tolist()

# Run the code with the list of SMILES strings
result = from_smiles(smiles_data_list, output_csv="output.csv", descriptors=True, fingerprints=True, timeout=3000)


In [None]:
# to calculate 2-D and 3-D descriptors
padeldescriptor(d_2d=True, d_3d=True)

In [None]:
import os

# Get the current working directory
cwd = os.getcwd()

# Specify the path to the "3D" folder within the working directory
folder_path = os.path.join(cwd, '3D')

# Check if the folder exists
if os.path.exists(folder_path) and os.path.isdir(folder_path):
    # List all SDF files in the "3D" folder
    sdf_files = [filename for filename in os.listdir(folder_path) if filename.endswith('.sdf')]
    
    # Print the names of SDF files as strings
    for sdf_file in sdf_files:
        print(sdf_file)
else:
    print("The '3D' folder does not exist in the current working directory.")


this below code was used to calculate 3D descriptors for 3D SDF files (input).

In [None]:
# a path to a directory containing structural files can be supplied
padeldescriptor(mol_dir='/home/shyam/3D', d_2d=False, d_3d=True, d_file='descriptors.csv')

In [20]:
import os

# Get the current working directory
cwd = os.getcwd()

# Specify the path to the "3D" folder within the working directory
folder_path = os.path.join(cwd, '3D')

# Check if the folder exists
if os.path.exists(folder_path) and os.path.isdir(folder_path):
    # List all SDF files in the "3D" folder
    sdf_files = [filename for filename in os.listdir(folder_path) if filename.endswith('.sdf')]
    
    # Print the names of SDF files
    for sdf_file in sdf_files:
        print(sdf_file)
else:
    print("The '3D' folder does not exist in the current working directory.")


OSI-027_3D.sdf
Uprosertib_3D.sdf
Doramapimod_3D.sdf
Lapatinib_3D.sdf
Palbociclib_3D.sdf
PF-4708671_3D.sdf
Nilotinib_3D.sdf
Luminespib_3D.sdf
Paclitaxel_3D.sdf
SCH772984_3D.sdf
JQ1_3D.sdf
Bortezomib_3D.sdf
Linsitinib_3D.sdf
KU-55933_3D.sdf
Axitini_3D.sdf
Cisplatin_3D.sdf
Navitoclax_3D.sdf
MK-2206_3D.sdf
Venetoclax_3D.sdf
Alpelisi_3D.sdf
Sapitinib_3D.sdf
Crizotinib_3D.sdf
Afatinib_3D.sdf
Alisertib_3D.sdf
NU7441_3D.sdf
MK-1775_3D.sdf
Temozolomide_3D.sdf
Taselisib_3D.sdf
AZD7762_3D.sdf
SB216763_3D.sdf
PD173074_3D.sdf
ZM447439_3D.sdf
Trametinib_3D.sdf
Ribociclib_3D.sdf
AZD6482_3D.sdf
