# Determinar todos los compuestos dentro de las carpetas
<hr>

In [1]:
import glob
import pandas as pd

In [2]:
carpeta_bases = "Bases_conjuntas/Targets"
carpeta_inhibidores = "Inhibidores/Targets"

In [3]:
Csv_bases = glob.glob(f"{carpeta_bases}/**/*.csv", recursive=True)

In [4]:
Csv_inhibidores = glob.glob(f"{carpeta_inhibidores}/**/*.csv", recursive=True)

# Crear dataframes con los SMILES y la respectiva ruta
<hr>

In [5]:
Csv_bases

['Bases_conjuntas/Targets/EGFR_MAPK1/Mejores_predictivos_EGFR_MAPK1.csv',
 'Bases_conjuntas/Targets/EGFR_SRC/Mejores_predictivos_EGFR_SRC.csv',
 'Bases_conjuntas/Targets/EGFR_STAT3/Mejores_predictivos_EGFR_STAT3.csv',
 'Bases_conjuntas/Targets/MAPK1_SRC/Mejores_predictivos_MAPK1_SRC.csv']

In [6]:
import os

lista_dfs = []

for i in Csv_bases:
    df = pd.read_csv(i, index_col=0)
    # Obtener solo la carpeta (eliminar el archivo) de la ruta i
    carpeta = os.path.dirname(i)
    df["ruta"] = carpeta
    lista_dfs.append(df)

df_bases = pd.concat(lista_dfs, ignore_index=True)


In [7]:
import os

lista_dfs = []

for i in Csv_inhibidores:
    df = pd.read_csv(i, index_col=0)
    # Obtener solo la carpeta (eliminar el archivo) de la ruta i
    carpeta = os.path.dirname(i)
    df["ruta"] = carpeta
    lista_dfs.append(df)

df_inhibidores = pd.concat(lista_dfs, ignore_index=True)


# Establecer nombres temporales
<hr>

In [8]:
df_bases["ID"] = df_bases["Targets"].str.replace(" ", "_", regex=True) + "_" + df_bases.index.astype(str)


In [9]:
df_inhibidores["ID"] = df_inhibidores["Targets"].str.replace(" ", "_", regex=True) + "_" + df_inhibidores.index.astype(str)

In [10]:
df_bases.to_csv("IDS/df_bases.csv", index=False)

In [11]:
df_inhibidores.to_csv("IDS/df_inhibidores.csv", index=False)

In [12]:
df_bases.shape

(593, 17)

In [13]:
df_inhibidores.shape

(320, 18)

In [14]:
df_bases["Targets_nuevo"].value_counts()

Targets_nuevo
EGFR_MAPK1    555
EGFR_SRC       19
EGFR_STAT3     18
MAPK1_SRC       1
Name: count, dtype: int64

In [15]:
df_inhibidores["Targets_nuevo"].value_counts()

Targets_nuevo
MAPK1_P_EGFR        89
EGFR_P_STAT3        74
SRC_P_EGFR          66
SRC_P_MAPK1         29
EGFR_P_SRC          29
EGFR_P_MAPK1        17
SRC_P_STAT3          5
MAPK1_P_STAT3        3
SRC_EGFR_P_STAT3     3
SRC_MAPK1_P_EGFR     2
SRC_EGFR_P_MAPK1     2
SRC_P_EGFR_STAT3     1
Name: count, dtype: int64

# Preparar moléculas con obabel
<hr>

In [16]:
import os
import subprocess
from rdkit import Chem
from rdkit.Chem import AllChem

ruta_pythonsh = "/home/eliud7720/mgltools/mgltools_x86_64Linux2_1.5.7/bin/pythonsh"
ruta_prepare_ligand4 = "/home/eliud7720/mgltools/mgltools_x86_64Linux2_1.5.7/MGLToolsPckgs/AutoDockTools/Utilities24/prepare_ligand4.py"

def smiles_to_pdb(smiles, pdb_filename):
    mol = Chem.MolFromSmiles(smiles)
    mol = Chem.AddHs(mol)

    result = AllChem.EmbedMolecule(mol, randomSeed=0xf00d)

    if result != 0:
        print(f"❌ No se pudo generar conformación 3D para: {smiles}")
        return  # O podrías lanzar un ValueError

    AllChem.MMFFOptimizeMolecule(mol)
    
    w = Chem.rdmolfiles.PDBWriter(pdb_filename)
    w.write(mol)
    w.close()


def preparar_ligando_con_mgltools(pdb_file, pdbqt_file, ruta_pythonsh, ruta_prepare_ligand4):
    directorio = os.path.dirname(pdb_file)  # Carpeta del archivo pdb
    
    cmd = [
        ruta_pythonsh,
        ruta_prepare_ligand4,
        "-l", os.path.basename(pdb_file),  # Solo el nombre para que busque en cwd
        "-o", os.path.basename(pdbqt_file),
        "-A", "hydrogens"
    ]
    
    # Ejecutar en el directorio donde están los archivos
    result = subprocess.run(cmd, cwd=directorio, capture_output=True, text=True)
    print("STDOUT:\n", result.stdout)
    print("STDERR:\n", result.stderr)
    
    if result.returncode != 0:
        print(f"Error preparando ligando para {pdb_file}")
        return False
    
    # Ahora sí eliminar el pdb original
    os.remove(pdb_file)
    return True



In [17]:
longitud = len(df_bases)

for i, (nombre, smiles, ruta) in enumerate(zip(df_bases["ID"], df_bases["SMILES"], df_bases["ruta"]), 1):
    
    print(f"Procesando {i} de {longitud}")
    
    nombre_final_pdb = os.path.abspath(os.path.join(ruta, nombre + ".pdb"))
    nombre_final_pdbqt = os.path.abspath(os.path.join(ruta, nombre + ".pdbqt"))

    print(nombre_final_pdbqt)
    smiles_to_pdb(smiles, nombre_final_pdb)
    preparar_ligando_con_mgltools(nombre_final_pdb, nombre_final_pdbqt, ruta_pythonsh, ruta_prepare_ligand4)

Procesando 1 de 593
/mnt/c/Users/Eliud/OneDrive/Escritorio/Docking/Compuestos/Bases_conjuntas/Targets/EGFR_MAPK1/EGFR_MAPK1_0.pdbqt
STDOUT:
 setting PYTHONHOME environment

STDERR:
 
Procesando 2 de 593
/mnt/c/Users/Eliud/OneDrive/Escritorio/Docking/Compuestos/Bases_conjuntas/Targets/EGFR_MAPK1/EGFR_MAPK1_1.pdbqt
STDOUT:
 setting PYTHONHOME environment

STDERR:
 
Procesando 3 de 593
/mnt/c/Users/Eliud/OneDrive/Escritorio/Docking/Compuestos/Bases_conjuntas/Targets/EGFR_MAPK1/EGFR_MAPK1_2.pdbqt
STDOUT:
 setting PYTHONHOME environment

STDERR:
 
Procesando 4 de 593
/mnt/c/Users/Eliud/OneDrive/Escritorio/Docking/Compuestos/Bases_conjuntas/Targets/EGFR_MAPK1/EGFR_MAPK1_3.pdbqt
STDOUT:
 setting PYTHONHOME environment

STDERR:
 
Procesando 5 de 593
/mnt/c/Users/Eliud/OneDrive/Escritorio/Docking/Compuestos/Bases_conjuntas/Targets/EGFR_MAPK1/EGFR_MAPK1_4.pdbqt
STDOUT:
 setting PYTHONHOME environment

STDERR:
 
Procesando 6 de 593
/mnt/c/Users/Eliud/OneDrive/Escritorio/Docking/Compuestos/Bases_c

[14:30:57] UFFTYPER: Unrecognized hybridization for atom: 1
[14:30:57] UFFTYPER: Unrecognized atom type: Ru (1)


STDOUT:
 setting PYTHONHOME environment
Unable to assign HAD type to atom Ru
Unable to assign valence to atom EGFR_MAPK1_520: :UNL1:RU1 type = Ru
Unable to assign MAP type to atom Ru
Sorry, there are no Gasteiger parameters available for atom EGFR_MAPK1_520: :UNL1:RU1
Unable to assign XYZ type to atom Ru
Unable to assign HYB type to atom Ru
Unable to assign HYB type to atom Ru
Unable to assign HYB type to atom Ru
Unable to assign HYB type to atom Ru
Unable to assign HYB type to atom Ru
Unable to assign HYB type to atom Ru
Unable to assign XYZ type to atom Ru
Unable to assign HYB type to atom Ru
Unable to assign HYB type to atom Ru
Unable to assign HYB type to atom Ru
Unable to assign HYB type to atom Ru
Unable to assign HYB type to atom Ru
Unable to assign HYB type to atom Ru
Unable to assign XYZ type to atom Ru
Unable to assign HYB type to atom Ru
Unable to assign HYB type to atom Ru
Unable to assign HYB type to atom Ru
Unable to assign HYB type to atom Ru
Unable to assign HYB type to

In [18]:
longitud = len(df_inhibidores)

for i, (nombre, smiles, ruta) in enumerate(zip(df_inhibidores["ID"], df_inhibidores["SMILES"], df_inhibidores["ruta"]), 1):
    
    print(f"Procesando {i} de {longitud}")
    
    nombre_final_pdb = os.path.abspath(os.path.join(ruta, nombre + ".pdb"))
    nombre_final_pdbqt = os.path.abspath(os.path.join(ruta, nombre + ".pdbqt"))

    print(nombre_final_pdbqt)
    smiles_to_pdb(smiles, nombre_final_pdb)
    preparar_ligando_con_mgltools(nombre_final_pdb, nombre_final_pdbqt, ruta_pythonsh, ruta_prepare_ligand4)

Procesando 1 de 320
/mnt/c/Users/Eliud/OneDrive/Escritorio/Docking/Compuestos/Inhibidores/Targets/EGFR/MAPK1_P_EGFR_0.pdbqt
STDOUT:
 setting PYTHONHOME environment

STDERR:
 
Procesando 2 de 320
/mnt/c/Users/Eliud/OneDrive/Escritorio/Docking/Compuestos/Inhibidores/Targets/EGFR/MAPK1_P_EGFR_1.pdbqt
STDOUT:
 setting PYTHONHOME environment

STDERR:
 
Procesando 3 de 320
/mnt/c/Users/Eliud/OneDrive/Escritorio/Docking/Compuestos/Inhibidores/Targets/EGFR/MAPK1_P_EGFR_2.pdbqt
STDOUT:
 setting PYTHONHOME environment

STDERR:
 
Procesando 4 de 320
/mnt/c/Users/Eliud/OneDrive/Escritorio/Docking/Compuestos/Inhibidores/Targets/EGFR/MAPK1_P_EGFR_3.pdbqt
STDOUT:
 setting PYTHONHOME environment

STDERR:
 
Procesando 5 de 320
/mnt/c/Users/Eliud/OneDrive/Escritorio/Docking/Compuestos/Inhibidores/Targets/EGFR/MAPK1_P_EGFR_4.pdbqt
STDOUT:
 setting PYTHONHOME environment

STDERR:
 
Procesando 6 de 320
/mnt/c/Users/Eliud/OneDrive/Escritorio/Docking/Compuestos/Inhibidores/Targets/EGFR/MAPK1_P_EGFR_5.pdbqt
S

[14:31:55] UFFTYPER: Unrecognized charge state for atom: 16


STDOUT:
 setting PYTHONHOME environment

STDERR:
 
Procesando 15 de 320
/mnt/c/Users/Eliud/OneDrive/Escritorio/Docking/Compuestos/Inhibidores/Targets/EGFR/MAPK1_P_EGFR_14.pdbqt
STDOUT:
 setting PYTHONHOME environment

STDERR:
 
Procesando 16 de 320
/mnt/c/Users/Eliud/OneDrive/Escritorio/Docking/Compuestos/Inhibidores/Targets/EGFR/MAPK1_P_EGFR_15.pdbqt
STDOUT:
 setting PYTHONHOME environment

STDERR:
 
Procesando 17 de 320
/mnt/c/Users/Eliud/OneDrive/Escritorio/Docking/Compuestos/Inhibidores/Targets/EGFR/MAPK1_P_EGFR_16.pdbqt
STDOUT:
 setting PYTHONHOME environment

STDERR:
 
Procesando 18 de 320
/mnt/c/Users/Eliud/OneDrive/Escritorio/Docking/Compuestos/Inhibidores/Targets/EGFR/MAPK1_P_EGFR_17.pdbqt
STDOUT:
 setting PYTHONHOME environment

STDERR:
 
Procesando 19 de 320
/mnt/c/Users/Eliud/OneDrive/Escritorio/Docking/Compuestos/Inhibidores/Targets/EGFR/MAPK1_P_EGFR_18.pdbqt
STDOUT:
 setting PYTHONHOME environment

STDERR:
 
Procesando 20 de 320
/mnt/c/Users/Eliud/OneDrive/Escritorio/Dock

# Analizar IDS
<hr>

In [19]:
df_inhibidores

Unnamed: 0,Targets,Molecule ChEMBL ID,EGFR,MAPK1,SRC,STAT3,0,BBB,SMILES,peso_molecular,logP,donadores_H,aceptores_H,violaciones,Targets_nuevo,after_P_,ruta,ID
0,MAPK1 P EGFR,CHEMBL2408790,0.929761,0.285700,0.660000,0.198848,0.0,0.000000,COc1cccc(F)c1CN1CCC[C@@H](NC(=O)Nc2cc3[nH]nc(-...,489.555,4.26212,3.0,6.0,0.0,MAPK1_P_EGFR,EGFR,Inhibidores/Targets/EGFR,MAPK1_P_EGFR_0
1,MAPK1 P EGFR,CHEMBL4109433,0.934886,27.200000,0.580000,0.483336,0.0,0.080000,COC[C@@H](NC(=O)Nc1cc2[nH]nc(Nc3ccccc3Cl)c2cn1...,436.903,4.86410,4.0,5.0,0.0,MAPK1_P_EGFR,EGFR,Inhibidores/Targets/EGFR,MAPK1_P_EGFR_1
2,MAPK1 P EGFR,CHEMBL3658668,0.952426,1.344000,0.470914,0.496461,0.0,0.050000,COC[C@@H](NC(=O)Nc1cc2[nH]nc(-c3ccn4ncnc4c3)c2...,446.446,3.31590,3.0,7.0,0.0,MAPK1_P_EGFR,EGFR,Inhibidores/Targets/EGFR,MAPK1_P_EGFR_2
3,MAPK1 P EGFR,CHEMBL4108159,0.907507,0.500000,0.410000,0.271605,0.0,0.070000,COC[C@@H](NC(=O)Nc1cc2[nH]nc(NC3COC3)c2cn1)c1c...,382.424,2.27780,4.0,6.0,0.0,MAPK1_P_EGFR,EGFR,Inhibidores/Targets/EGFR,MAPK1_P_EGFR_3
4,MAPK1 P EGFR,CHEMBL3658735,0.901858,58.970000,0.440407,0.681730,0.0,0.583333,Cc1cc(-c2n[nH]c3cc(NC(=O)NCc4ccc(-c5ncc[nH]5)c...,424.468,4.04012,4.0,5.0,0.0,MAPK1_P_EGFR,EGFR,Inhibidores/Targets/EGFR,MAPK1_P_EGFR_4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
315,SRC P STAT3,CHEMBL4436086,0.936290,0.089450,897.000000,0.921986,0.0,0.060000,Oc1cccc(Nc2nc(/C=C/c3cc4c(cc3Br)OCO4)nc3ccccc2...,462.303,5.74060,2.0,6.0,1.0,SRC_P_STAT3,STAT3,Inhibidores/Targets/STAT3,SRC_P_STAT3_315
316,SRC P STAT3,CHEMBL261450,0.098713,0.050460,93.000000,0.937757,0.0,0.447500,NC(=O)c1c(Nc2ccccc2)nn2c(N)cc(C3CC3)nc12,308.345,2.03140,3.0,6.0,0.0,SRC_P_STAT3,STAT3,Inhibidores/Targets/STAT3,SRC_P_STAT3_316
317,SRC EGFR P STAT3,CHEMBL122182,8.000000,0.229674,400.000000,0.904747,0.0,0.200000,Oc1ccc(Nc2[nH]nc3ncnc(Nc4cccc(Cl)c4)c23)cc1,352.785,4.19910,4.0,6.0,0.0,SRC_EGFR_P_STAT3,STAT3,Inhibidores/Targets/STAT3,SRC_EGFR_P_STAT3_317
318,SRC EGFR P STAT3,CHEMBL157021,16000.000000,0.179411,1500.000000,0.920060,0.0,0.205000,NC(=O)CCc1c(SSc2[nH]c3ccccc3c2CCC(N)=O)[nH]c2c...,438.578,4.28440,4.0,4.0,0.0,SRC_EGFR_P_STAT3,STAT3,Inhibidores/Targets/STAT3,SRC_EGFR_P_STAT3_318


In [20]:
df_bases

Unnamed: 0,SMILES,Nombre,EGFR,MAPK1,SRC,STAT3,mean,BBB,Targets,peso_molecular,logP,donadores_H,aceptores_H,violaciones,Targets_nuevo,ruta,ID
0,C/C(=C/CC[C@@H](C)[C@@H]1CC[C@]2(C)C3=CCC4C(C)...,,0.922429,0.915159,0.230000,0.135191,0.550695,0.185000,EGFR MAPK1,454.695,7.60790,1.0,2.0,1.0,EGFR_MAPK1,Bases_conjuntas/Targets/EGFR_MAPK1,EGFR_MAPK1_0
1,C/C(=C\CC[C@H](C)[C@@H]1CC[C@]2(C)C3=CCC4C(C)(...,"(E,6S)-2-methyl-6-[(10R,13S,14S,17S)-4,4,10,13...",0.922429,0.915159,0.230000,0.135191,0.550695,0.185000,EGFR MAPK1,454.695,7.60790,1.0,2.0,1.0,EGFR_MAPK1,Bases_conjuntas/Targets/EGFR_MAPK1,EGFR_MAPK1_1
2,C[C@H](CC[C@@H](O)C(C)(C)O)[C@@H]1CC[C@@]2(C(=...,,0.928582,0.926768,0.170000,0.053530,0.519720,0.100000,EGFR MAPK1,488.709,5.60740,3.0,4.0,1.0,EGFR_MAPK1,Bases_conjuntas/Targets/EGFR_MAPK1,EGFR_MAPK1_2
3,CC(C)=CCC[C@@H](C)[C@H]1CC[C@@]2(C)C3=CC[C@@]4...,,0.945426,0.944940,0.260000,0.033167,0.545883,0.265000,EGFR MAPK1,438.740,8.93330,0.0,1.0,1.0,EGFR_MAPK1,Bases_conjuntas/Targets/EGFR_MAPK1,EGFR_MAPK1_3
4,CC(C)=CCC[C@H](C)[C@@H]1CC[C@]2(C)C3=CC[C@H]4C...,,0.939520,0.919611,0.210000,0.069483,0.534654,0.170000,EGFR MAPK1,424.713,8.54320,0.0,1.0,1.0,EGFR_MAPK1,Bases_conjuntas/Targets/EGFR_MAPK1,EGFR_MAPK1_4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
588,O=C(NCC1=CC=CN=C1)[C@H]1CN2CC[C@H]1C[C@@H]2CN1...,,0.912304,0.071742,0.275000,0.901213,0.540065,0.010000,EGFR STAT3,422.577,2.82270,1.0,6.0,0.0,EGFR_STAT3,Bases_conjuntas/Targets/EGFR_STAT3,EGFR_STAT3_588
589,CCC(C(=O)OCCOCCN(CC)CC)c1ccccc1,butamirate citrate,0.923914,0.461465,0.165000,0.930930,0.620327,0.780000,EGFR STAT3,307.434,3.08180,0.0,4.0,0.0,EGFR_STAT3,Bases_conjuntas/Targets/EGFR_STAT3,EGFR_STAT3_589
590,COc1cc2c(NCCCCCO[N+](=O)[O-])ncnc2c(OC)c1OC,KT 1,0.977872,0.075074,0.885000,0.940082,0.719507,0.160000,EGFR STAT3,366.374,2.44610,1.0,9.0,0.0,EGFR_STAT3,Bases_conjuntas/Targets/EGFR_STAT3,EGFR_STAT3_590
591,CCCOc1cc(N)ccc1C(=O)OCCN(CC)CC,PROPOXYCAINE HYDROCHLORIDE,0.932808,0.148395,0.667581,0.904663,0.663362,1.000000,EGFR STAT3,294.395,2.55620,1.0,5.0,0.0,EGFR_STAT3,Bases_conjuntas/Targets/EGFR_STAT3,EGFR_STAT3_591
