<a href="https://colab.research.google.com/github/Ivalomat/4-elements/blob/revisions/Docking.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
import os
import subprocess

# Cargar base de datos
df = pd.read_csv("Enamine10k_scores.csv")
ligand_dir = "ligands"
os.makedirs(ligand_dir, exist_ok=True)

for idx, row in df.iterrows():
    smiles = row["SMILES"]
    mol_id = f"mol_{idx:04d}"
    mol = Chem.MolFromSmiles(smiles)
    mol = Chem.AddHs(mol)
    AllChem.EmbedMolecule(mol)
    AllChem.UFFOptimizeMolecule(mol)

    sdf_path = os.path.join(ligand_dir, f"{mol_id}.sdf")
    pdbqt_path = os.path.join(ligand_dir, f"{mol_id}.pdbqt")

    # Guardar como SDF
    writer = Chem.SDWriter(sdf_path)
    writer.write(mol)
    writer.close()

    # Convertir a PDBQT usando Open Babel
    subprocess.run(["obabel", sdf_path, "-O", pdbqt_path, "--gen3d"])

print("Conversión a PDBQT completada.")

In [None]:
import os
import subprocess

# Parámetros del docking según el artículo
CENTER_X = 33.19
CENTER_Y = 1.06
CENTER_Z = 15.62
SIZE_X = 30
SIZE_Y = 30
SIZE_Z = 30
EXHAUSTIVENESS = 8
NUM_MODES = 9
ENERGY_RANGE = 3

RECEPTOR = "receptor.pdbqt"
LIGANDS_DIR = "ligands"
OUTPUT_DIR = "results"
os.makedirs(OUTPUT_DIR, exist_ok=True)

for ligand_file in os.listdir(LIGANDS_DIR):
    if ligand_file.endswith(".pdbqt"):
        ligand_path = os.path.join(LIGANDS_DIR, ligand_file)
        out_path = os.path.join(OUTPUT_DIR, f"{ligand_file.replace('.pdbqt', '')}_out.pdbqt")
        log_path = os.path.join(OUTPUT_DIR, f"{ligand_file.replace('.pdbqt', '')}_log.txt")

        command = [
            "vina",
            "--receptor", RECEPTOR,
            "--ligand", ligand_path,
            "--center_x", str(CENTER_X),
            "--center_y", str(CENTER_Y),
            "--center_z", str(CENTER_Z),
            "--size_x", str(SIZE_X),
            "--size_y", str(SIZE_Y),
            "--size_z", str(SIZE_Z),
            "--out", out_path,
            "--log", log_path,
            "--exhaustiveness", str(EXHAUSTIVENESS),
            "--num_modes", str(NUM_MODES),
            "--energy_range", str(ENERGY_RANGE)
        ]

        print(f"Docking {ligand_file} ...")
        subprocess.run(command)

print("Docking terminado.")

In [None]:
from quick_dock import qvina

docking_instance = qvina.QVina(protein_file="proteinclean.pdb", ext_ligands="SMILES_id.csv")
docking_instance.prepare_protein(overwrite=False)
docking_instance.prepare_ligands(overwrite=True)
docking_instance.convert_all_pdbs_to_pdbqt()
docking_instance.run_docking(config_dict={"center_x": 33.19,
                                          "center_y": 1.06,
                                          "center_z": 15.62,
                                          "size_x": 30,
                                          "size_y": 30,
                                          "size_z": 30,
                                          "cpu": 16,
                                          "exhaustiveness": 16,
                                          "num_modes": 3,
                                          "energy_range": 3,}, overwrite=True)
results = docking_instance.analyze_results()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

def load_csv(path, ligand_col, score_col, label):
    df = pd.read_csv(path, usecols=[ligand_col, score_col])
    df = df.rename(columns={ligand_col: 'ligand_id', score_col: label})
    return df

file1 = {
    'path': 'SMILES_id.csv',
    'ligand_col': 'SMILES',
    'score_col': 'SCORE',
    'label': 'run1'
}
file2 = {
    'path': 'ligands_curated.csv',
    'ligand_col': 'SMILES',
    'score_col': 'SCORE',
    'label': 'run2'
}

try:
    df1 = load_csv(**file1)
    df2 = load_csv(**file2)
    df  = pd.merge(df1, df2, on='ligand_id', how='inner')

    corr = df[[file1['label'], file2['label']]].corr()
    print("Matriz de correlación:\n", corr, "\n")

    diff_col = f"diff_{file1['label']}_{file2['label']}"
    df[diff_col] = df[file1['label']] - df[file2['label']]
    print("Resumen de diferencias:\n", df[diff_col].describe(), "\n")

    plt.scatter(df[file1['label']], df[file2['label']], alpha=0.5)
    plt.xlabel(file1['label'])
    plt.ylabel(file2['label'])
    plt.title(f"{file1['label']} vs {file2['label']}")
    plt.grid(True)
    plt.show()

except FileNotFoundError:
    print("Error: No se encontró uno de los archivos. Verifica las rutas especificadas.")