✅ Étape 1 : Import des bibliothèques

In [1]:
import os
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Descriptors
from rdkit.Chem import Draw
from openbabel import pybel
from tqdm.notebook import tqdm


✅ Étape 2 : Chargement de la base de données



In [3]:
df = pd.read_csv("smiles_select_GDB9.csv")  # Le fichier doit contenir une colonne "SMILES"
df["Molecule"] = [Chem.MolFromSmiles(sm) for sm in df["SMILES"]]
df = df.dropna(subset=["Molecule"]).reset_index(drop=True)


In [4]:
df

Unnamed: 0,mol_id,SMILES,formula,pce_pcbm(%),pce_pcdtbt(%),sas1(%),pce_pcbm_sas(%),pce_pcdtbt_sas(%),type,Molecule
0,977,O=O,O2,8.661863,0.0,3.373829,5.288034,-3.373829,accepteur,<rdkit.Chem.rdchem.Mol object at 0x7f6d4670e5e0>
1,1712,O=C1N=c2c(=C1CNc1ccc(cc1)S(=O)(=O)Nc1ccccn1)c1...,C21H15N5O3S2,8.503739,0.0,7.227077,1.276662,-7.227077,accepteur,<rdkit.Chem.rdchem.Mol object at 0x7f6d4670e650>
2,4550,O=NC1=c2cc(N(O)O)c3c(c2=NC1=O)CCCC3,C12H11N3O4,0.0,16.886417,6.955664,-6.955664,9.930753,donneur,<rdkit.Chem.rdchem.Mol object at 0x7f6d4670e6c0>
3,7801,Oc1ccc(cc1)O.O=C1C=CC(=O)C=C1,C12H10O4,7.447933,0.0,6.864235,0.583698,-6.864235,accepteur,<rdkit.Chem.rdchem.Mol object at 0x7f6d4670e730>
4,11029,[O-]C(=O)[O-].[Mg+2],CMgO3,0.0,8.546781,4.86129,-4.86129,3.685491,donneur,<rdkit.Chem.rdchem.Mol object at 0x7f6d4670e7a0>
5,17851,CC[N-]c1nc(N[N+]#N)nc(n1)NC(C)C,C8H14N8,3.984198,36.108642,7.622432,-3.638234,28.48621,donneur,<rdkit.Chem.rdchem.Mol object at 0x7f6d4670e810>
6,20778,N#[N+]Nc1nc([N-]C(C)C)nc(n1)SC,C7H11N7S,0.0,25.412759,7.206303,-7.206303,18.206457,donneur,<rdkit.Chem.rdchem.Mol object at 0x7f6d4670e880>


✅ Étape 3 : Génération des structures 3D (.pdbqt) pour le docking

In [5]:
def generate_3d_structure(smiles, mol_id, out_dir="ligands_pdbqt"):
    os.makedirs(out_dir, exist_ok=True)
    mol = Chem.MolFromSmiles(smiles)
    mol = Chem.AddHs(mol)
    AllChem.EmbedMolecule(mol)
    AllChem.UFFOptimizeMolecule(mol)
    tmp_sdf = os.path.join(out_dir, f"{mol_id}.sdf")
    tmp_pdbqt = os.path.join(out_dir, f"{mol_id}.pdbqt")

    writer = Chem.SDWriter(tmp_sdf)
    writer.write(mol)
    writer.close()

    # Convert SDF to PDBQT using Open Babel
    mol_ob = next(pybel.readfile("sdf", tmp_sdf))
    mol_ob.addh()
    mol_ob.make3D()
    mol_ob.write("pdbqt", tmp_pdbqt, overwrite=True)

    return tmp_pdbqt


✅ Étape 5 : Lancement du docking avec Smina (ou vina)

In [6]:
def run_smina_docking(ligand_pdbqt, output_dir="docking_results", receptor="receptor.pdbqt"):
    os.makedirs(output_dir, exist_ok=True)
    output_file = os.path.join(output_dir, os.path.basename(ligand_pdbqt).replace(".pdbqt", "_out.pdbqt"))
    log_file = os.path.join(output_dir, os.path.basename(ligand_pdbqt).replace(".pdbqt", ".log"))

    command = f"""
    smina --receptor {receptor} --ligand {ligand_pdbqt} \
    --center_x 0 --center_y 0 --center_z 0 --size_x 20 --size_y 20 --size_z 20 \
    --out {output_file} --log {log_file} --exhaustiveness 8
    """
    os.system(command)
    
    with open(log_file, 'r') as f:
        for line in f:
            if "Affinity:" in line:
                score = float(line.split()[1])
                return score
    return None



✅ Étape 6 : Boucle sur les molécules

In [10]:
results = []

for idx, row in tqdm(df.iterrows(), total=len(df)):
    ligand_file = generate_3d_structure(row["SMILES"], mol_id=row["mol_id"])
    score = run_smina_docking(ligand_file, receptor="receptor.pdbqt")  # Receptor fictif !
    results.append({
        "ID": row["mol_id"],
        "SMILES": row["SMILES"],
        "Affinity_kcal_mol": score
    })

docking_df = pd.DataFrame(results)
docking_df.to_csv("results_smina_docking.csv", index=False)


  0%|          | 0/7 [00:00<?, ?it/s]

   _______  _______ _________ _        _______ 
  (  ____ \(       )\__   __/( (    /|(  ___  )
  | (    \/| () () |   ) (   |  \  ( || (   ) |
  | (_____ | || || |   | |   |   \ | || (___) |
  (_____  )| |(_)| |   | |   | (\ \) ||  ___  |
        ) || |   | |   | |   | | \   || (   ) |
  /\____) || )   ( |___) (___| )  \  || )   ( |
  \_______)|/     \|\_______/|/    )_)|/     \|


smina is based off AutoDock Vina. Please cite appropriately.

Weights      Terms
-0.035579    gauss(o=0,_w=0.5,_c=8)
-0.005156    gauss(o=3,_w=2,_c=8)
0.840245     repulsion(o=0,_c=8)
-0.035069    hydrophobic(g=0.5,_b=1.5,_c=8)
-0.587439    non_dir_h_bond(g=-0.7,_b=0,_c=8)
1.923        num_tors_div





Error: could not open "receptor.pdbqt" for reading.


   _______  _______ _________ _        _______ 
  (  ____ \(       )\__   __/( (    /|(  ___  )
  | (    \/| () () |   ) (   |  \  ( || (   ) |
  | (_____ | || || |   | |   |   \ | || (___) |
  (_____  )| |(_)| |   | |   | (\ \) ||  ___  |
        ) || |   | |   | |   | | \   || (   ) |
  /\____) || )   ( |___) (___| )  \  || )   ( |
  \_______)|/     \|\_______/|/    )_)|/     \|


smina is based off AutoDock Vina. Please cite appropriately.

Weights      Terms
-0.035579    gauss(o=0,_w=0.5,_c=8)
-0.005156    gauss(o=3,_w=2,_c=8)
0.840245     repulsion(o=0,_c=8)
-0.035069    hydrophobic(g=0.5,_b=1.5,_c=8)
-0.587439    non_dir_h_bond(g=-0.7,_b=0,_c=8)
1.923        num_tors_div

   _______  _______ _________ _        _______ 
  (  ____ \(       )\__   __/( (    /|(  ___  )
  | (    \/| () () |   ) (   |  \  ( || (   ) |
  | (_____ | || || |   | |   |   \ | || (___) |
  (_____  )| |(_)| |   | |   | (\ \) ||  ___  |
        ) || |   | |   | |   | | \   || (   ) |
  /\____) || )   ( |___) 



Error: could not open "receptor.pdbqt" for reading.


Error: could not open "receptor.pdbqt" for reading.


   _______  _______ _________ _        _______ 
  (  ____ \(       )\__   __/( (    /|(  ___  )
  | (    \/| () () |   ) (   |  \  ( || (   ) |
  | (_____ | || || |   | |   |   \ | || (___) |
  (_____  )| |(_)| |   | |   | (\ \) ||  ___  |
        ) || |   | |   | |   | | \   || (   ) |
  /\____) || )   ( |___) (___| )  \  || )   ( |
  \_______)|/     \|\_______/|/    )_)|/     \|


smina is based off AutoDock Vina. Please cite appropriately.

Weights      Terms
-0.035579    gauss(o=0,_w=0.5,_c=8)
-0.005156    gauss(o=3,_w=2,_c=8)
0.840245     repulsion(o=0,_c=8)
-0.035069    hydrophobic(g=0.5,_b=1.5,_c=8)
-0.587439    non_dir_h_bond(g=-0.7,_b=0,_c=8)
1.923        num_tors_div

   _______  _______ _________ _        _______ 
  (  ____ \(       )\__   __/( (    /|(  ___  )
  | (    \/| () () |   ) (   |  \  ( || (   ) |
  | (_____ | || || |   | |   |   \ | || (___) |
  (_____  )| |(_)| |   | |   | (\ \) ||  ___  |
        ) || |   | |   | |   | | \   || (   ) |
  /\____) || )   ( |___) 



Error: could not open "receptor.pdbqt" for reading.
[17:55:29] UFFTYPER: Unrecognized charge state for atom: 0
[17:55:29] UFFTYPER: Unrecognized charge state for atom: 4


Error: could not open "receptor.pdbqt" for reading.


Error: could not open "receptor.pdbqt" for reading.


   _______  _______ _________ _        _______ 
  (  ____ \(       )\__   __/( (    /|(  ___  )
  | (    \/| () () |   ) (   |  \  ( || (   ) |
  | (_____ | || || |   | |   |   \ | || (___) |
  (_____  )| |(_)| |   | |   | (\ \) ||  ___  |
        ) || |   | |   | |   | | \   || (   ) |
  /\____) || )   ( |___) (___| )  \  || )   ( |
  \_______)|/     \|\_______/|/    )_)|/     \|


smina is based off AutoDock Vina. Please cite appropriately.

Weights      Terms
-0.035579    gauss(o=0,_w=0.5,_c=8)
-0.005156    gauss(o=3,_w=2,_c=8)
0.840245     repulsion(o=0,_c=8)
-0.035069    hydrophobic(g=0.5,_b=1.5,_c=8)
-0.587439    non_dir_h_bond(g=-0.7,_b=0,_c=8)
1.923        num_tors_div





Error: could not open "receptor.pdbqt" for reading.


✅ Étape 7 : Filtrage des bons candidats

In [11]:
candidates = docking_df[docking_df["Affinity_kcal_mol"] < -6.0]
print("Top candidate molecules with medicinal potential:\n", candidates.head())


Top candidate molecules with medicinal potential:
 Empty DataFrame
Columns: [ID, SMILES, Affinity_kcal_mol]
Index: []
