In [1]:
#Import libraries 
import requests# for web requests
import json #to work with json
open('Passed.txt', 'w').close()#To claear


In [2]:
json_url = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/fastsimilarity_2d/smiles/C1=NC2=C(N1)C(=O)N=C(N2)N/cids/json?Threshold=95&MaxRecords=100'

In [3]:

try:
    # Fetch the JSON content from the URL
    response = requests.get(json_url)
    response.raise_for_status()  # Raise an error for bad responses (4xx or 5xx)

    # Parse the JSON content
    data = response.json()

except requests.exceptions.RequestException as e:
    print(f"Error fetching JSON from {json_url}: {e}")

except json.JSONDecodeError as e:
    print(f"Error decoding JSON from {json_url}: {e}")

In [4]:
def check_lipinski_rule(pubchem_cid):
    # Define Lipinski's Rule of 5 criteria
    max_molecular_weight = 500
    max_logp = 5
    max_hydrogen_bond_donors = 5
    max_hydrogen_bond_acceptors = 10
    pubchem_api_url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{pubchem_cid}/property/CanonicalSMILES,MolecularWeight,XLogP,HBondDonorCount,HBondAcceptorCount/JSON"
    
    try:
        # Retrieve compound information from PubChem
        response = requests.get(pubchem_api_url)
        data = response.json()

        # Extract relevant properties
        molecular_weight = float(data['PropertyTable']['Properties'][0]['MolecularWeight'])
        logp = float(data['PropertyTable']['Properties'][0]['XLogP'])
        hydrogen_bond_donors = int(data['PropertyTable']['Properties'][0]['HBondDonorCount'])
        hydrogen_bond_acceptors = int(data['PropertyTable']['Properties'][0]['HBondAcceptorCount'])

        # Check Lipinski's Rule of 5
        if (molecular_weight <= max_molecular_weight and
            logp <= max_logp and
            hydrogen_bond_donors <= max_hydrogen_bond_donors and
            hydrogen_bond_acceptors <= max_hydrogen_bond_acceptors):
            print(f"The compound with PubChem CID {pubchem_cid} obeys Lipinski's Rule of 5.")
            with open('Passed.txt', 'a') as file:
                file.write(str(pubchem_cid)+"\n")
        else:
            print(f"The compound with PubChem CID {pubchem_cid} does not obey Lipinski's Rule of 5.")
    except Exception as e:
        print(f"Error: {e}")

In [5]:
# Extract CID values one by one
if 'IdentifierList' in data and 'CID' in data['IdentifierList']:
    cid_list = data['IdentifierList']['CID']
    for cid in cid_list:
        pubchem_cid = cid
        check_lipinski_rule(pubchem_cid)
else:
    print("Invalid JSON format or missing CID values.")

The compound with PubChem CID 135398634 obeys Lipinski's Rule of 5.
The compound with PubChem CID 135398638 obeys Lipinski's Rule of 5.
The compound with PubChem CID 9679 obeys Lipinski's Rule of 5.
The compound with PubChem CID 790 obeys Lipinski's Rule of 5.
Error: 'XLogP'
The compound with PubChem CID 70315 obeys Lipinski's Rule of 5.
The compound with PubChem CID 135426867 obeys Lipinski's Rule of 5.
The compound with PubChem CID 135413991 obeys Lipinski's Rule of 5.
The compound with PubChem CID 76292 obeys Lipinski's Rule of 5.
Error: 'XLogP'
The compound with PubChem CID 70765 obeys Lipinski's Rule of 5.
The compound with PubChem CID 135421879 obeys Lipinski's Rule of 5.
The compound with PubChem CID 135418794 obeys Lipinski's Rule of 5.
The compound with PubChem CID 247638 obeys Lipinski's Rule of 5.
Error: 'XLogP'
The compound with PubChem CID 87841 obeys Lipinski's Rule of 5.
The compound with PubChem CID 81485 obeys Lipinski's Rule of 5.
Error: 'XLogP'
The compound with PubC

In [1]:
import os
import requests

def download_molecule(pubchem_id, output_folder):
    url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{pubchem_id}/SDF"
    response = requests.get(url)
    if response.status_code == 200:
        with open(os.path.join(output_folder, f"{pubchem_id}.sdf"), "wb") as f:
            f.write(response.content)
        print(f"Downloaded molecule {pubchem_id}")
    else:
        print(f"Failed to download molecule {pubchem_id}")

def main():
    # Create the output folder if it doesn't exist
    output_folder = "ligands"
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Read PubChem IDs from the text file
    with open("passed.txt", "r") as file:
        pubchem_ids = file.readlines()
        pubchem_ids = [cid.strip() for cid in pubchem_ids if cid.strip()]  # remove empty lines and trailing whitespaces

    # Download molecules for each PubChem ID
    for pubchem_id in pubchem_ids:
        download_molecule(pubchem_id, output_folder)

if __name__ == "__main__":
    main()


Downloaded molecule 135398634
Downloaded molecule 135398638
Downloaded molecule 9679
Downloaded molecule 790
Downloaded molecule 70315
Downloaded molecule 135426867
Downloaded molecule 135413991
Downloaded molecule 76292
Downloaded molecule 70765
Downloaded molecule 135421879
Downloaded molecule 135418794
Downloaded molecule 247638
Downloaded molecule 87841
Downloaded molecule 81485
Downloaded molecule 136438278
Downloaded molecule 135421792
Downloaded molecule 192963
Downloaded molecule 141764386
Downloaded molecule 137183582
Downloaded molecule 136703207
Downloaded molecule 136335066
Downloaded molecule 136176309
Downloaded molecule 136166600
Downloaded molecule 135480515
Downloaded molecule 135457123
Downloaded molecule 135418579
Downloaded molecule 129678271
Downloaded molecule 129665671
Downloaded molecule 129636544
Downloaded molecule 87210822
Downloaded molecule 56991968
Downloaded molecule 10701851
Downloaded molecule 116262
Downloaded molecule 168007084
Downloaded molecule 154

In [3]:
!pip install rdkit 

Collecting rdkit
  Obtaining dependency information for rdkit from https://files.pythonhosted.org/packages/28/81/2f819fd40f42099e287b6d53dad38132415bf1b3469cf4e0f0ffee0743c0/rdkit-2023.9.5-cp311-cp311-win_amd64.whl.metadata
  Downloading rdkit-2023.9.5-cp311-cp311-win_amd64.whl.metadata (4.0 kB)
Downloading rdkit-2023.9.5-cp311-cp311-win_amd64.whl (21.1 MB)
   ---------------------------------------- 0.0/21.1 MB ? eta -:--:--
   ---------------------------------------- 0.0/21.1 MB ? eta -:--:--
   ---------------------------------------- 0.1/21.1 MB 812.7 kB/s eta 0:00:26
   ---------------------------------------- 0.2/21.1 MB 1.1 MB/s eta 0:00:19
    --------------------------------------- 0.3/21.1 MB 1.5 MB/s eta 0:00:14
    --------------------------------------- 0.4/21.1 MB 1.9 MB/s eta 0:00:12
    --------------------------------------- 0.5/21.1 MB 1.9 MB/s eta 0:00:11
   - -------------------------------------- 0.6/21.1 MB 2.0 MB/s eta 0:00:11
   - -------------------------------

In [1]:
"""import os
from rdkit import Chem
from rdkit.Chem import AllChem

def sdf_to_pdb(sdf_file, pdb_folder):
    suppl = Chem.SDMolSupplier(sdf_file)
    
    for i, mol in enumerate(suppl):
        if mol is None:
            continue
        mol_name = os.path.splitext(os.path.basename(sdf_file))[0] + f"_{i}.pdb"
        pdb_file = os.path.join(pdb_folder, mol_name)
        AllChem.EmbedMolecule(mol)
        AllChem.MMFFOptimizeMolecule(mol)
        Chem.MolToPDBFile(mol, pdb_file)

def convert_ligands_to_pdb(ligands_folder, pdb_folder):
    if not os.path.exists(pdb_folder):
        os.makedirs(pdb_folder)
    
    for file_name in os.listdir(ligands_folder):
        if file_name.endswith(".sdf"):
            sdf_file = os.path.join(ligands_folder, file_name)
            sdf_to_pdb(sdf_file, pdb_folder)

# Example usage:
ligands_folder = "ligands"
pdb_folder = "pdb_ligands"
convert_ligands_to_pdb(ligands_folder, pdb_folder)
"""

[12:20:16] Molecule does not have explicit Hs. Consider calling AddHs()
[12:20:16] Molecule does not have explicit Hs. Consider calling AddHs()
[12:20:16] Molecule does not have explicit Hs. Consider calling AddHs()
[12:20:16] Molecule does not have explicit Hs. Consider calling AddHs()
[12:20:16] Molecule does not have explicit Hs. Consider calling AddHs()
[12:20:16] Molecule does not have explicit Hs. Consider calling AddHs()
[12:20:16] Molecule does not have explicit Hs. Consider calling AddHs()
[12:20:16] Molecule does not have explicit Hs. Consider calling AddHs()
[12:20:16] Molecule does not have explicit Hs. Consider calling AddHs()
[12:20:16] Molecule does not have explicit Hs. Consider calling AddHs()
[12:20:16] Molecule does not have explicit Hs. Consider calling AddHs()
[12:20:16] Molecule does not have explicit Hs. Consider calling AddHs()
[12:20:17] Molecule does not have explicit Hs. Consider calling AddHs()
[12:20:17] Molecule does not have explicit Hs. Consider calling 

KekulizeException: Can't kekulize mol.  Unkekulized atoms: 5 6 7

In [17]:
#!pip install pybel
!pip install wheel 
!pip install openbabel

Collecting openbabel
  Using cached openbabel-3.1.1.1.tar.gz (82 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: openbabel
  Building wheel for openbabel (setup.py): started
  Building wheel for openbabel (setup.py): finished with status 'error'
  Running setup.py clean for openbabel
Failed to build openbabel


  error: subprocess-exited-with-error
  
  python setup.py bdist_wheel did not run successfully.
  exit code: 1
  
  [15 lines of output]
  running bdist_wheel
  running build
  running build_ext
  Guessing Open Babel location:
  - include_dirs: ['C:\\Users\\preet\\anaconda3\\include', 'C:\\Users\\preet\\anaconda3\\Include', '/usr/local/include/openbabel3']
  - library_dirs: ['C:\\Users\\preet\\anaconda3\\libs', 'C:\\Users\\preet\\anaconda3', 'C:\\Users\\preet\\anaconda3\\PCbuild\\amd64', '/usr/local/lib']
  building 'openbabel._openbabel' extension
  swigging openbabel\openbabel-python.i to openbabel\openbabel-python_wrap.cpp
  swig.exe -python -c++ -small -O -templatereduce -naturalvar -IC:\Users\preet\anaconda3\include -IC:\Users\preet\anaconda3\Include -I/usr/local/include/openbabel3 -o openbabel\openbabel-python_wrap.cpp openbabel\openbabel-python.i
  
  Error: SWIG failed. Is Open Babel installed?
  You may need to manually specify the location of Open Babel include and library d

In [18]:
import pybel

def convert_sdf_to_pdbqt(input_sdf_file, output_pdbqt_file):
    # Open the SDF file
    with open(input_sdf_file, 'r') as f:
        # Read the molecules in the SDF file
        molecules = pybel.readfile("sdf", f)

        # Open the PDBQT file for writing
        with open(output_pdbqt_file, 'w') as pdbqt_file:
            # Convert each molecule and write to the output file
            for mol in molecules:
                # Add hydrogen atoms to the molecule (required for PDBQT format)
                mol.addh()
                # Write the molecule to the PDBQT file
                pdbqt_file.write(mol.write("pdbqt"))

if __name__ == "__main__":
    input_sdf_file = "sample.sdf"
    output_pdbqt_file = "output.pdbqt"
    convert_sdf_to_pdbqt(input_sdf_file, output_pdbqt_file)

'''
import pybel

# Open the SDF file
with open("sample.sdf", 'r') as f:
    # Read the molecules in the SDF file
    molecules = pybel.readfile("sdf", f)

    # Iterate over each molecule
    for mol in molecules:
        # Process each molecule here
        print(mol)

'''
'''def convert_sdf_to_pdbqt(input_sdf_file, output_pdbqt_file):
    # Open the SDF file
    molecules = pybel.readfile("sdf", input_sdf_file)
    for mol in readfile("sdf", "head.sdf"):

    # Open the PDBQT file for writing
    with open(output_pdbqt_file, 'w') as f:
        # Convert each molecule and write to the output file
        for mol in molecules:
            mol.addh()  # Add hydrogen atoms (required for PDBQT format)
            f.write(mol.write("pdbqt"))

if __name__ == "__main__":
    input_sdf_file = "sample.sdf"
    output_pdbqt_file = "output.pdbqt"
    convert_sdf_to_pdbqt(input_sdf_file, output_pdbqt_file)
'''

'def convert_sdf_to_pdbqt(input_sdf_file, output_pdbqt_file):\n    # Open the SDF file\n    molecules = pybel.readfile("sdf", input_sdf_file)\n    for mol in readfile("sdf", "head.sdf"):\n\n    # Open the PDBQT file for writing\n    with open(output_pdbqt_file, \'w\') as f:\n        # Convert each molecule and write to the output file\n        for mol in molecules:\n            mol.addh()  # Add hydrogen atoms (required for PDBQT format)\n            f.write(mol.write("pdbqt"))\n\nif __name__ == "__main__":\n    input_sdf_file = "sample.sdf"\n    output_pdbqt_file = "output.pdbqt"\n    convert_sdf_to_pdbqt(input_sdf_file, output_pdbqt_file)\n'