# Crear carpetas para el docking
<hr>

In [1]:
import os

In [2]:
# Crear carpetas

os.makedirs('docking/bases_conjuntas', exist_ok=True)
os.makedirs('docking/inhibidores', exist_ok=True)

In [3]:
bases = 'docking/bases_conjuntas'
inhibidores = 'docking/inhibidores'

# Extraer todos los compuestos de las dos bases
<hr>

In [4]:
import glob

In [5]:
bases_preparadas = glob.glob("Bases_conjuntas/Targets/*/*.pdbqt")
inhibidores_preparados = glob.glob("Inhibidores/Targets/*/*.pdbqt")

In [6]:
len(bases_preparadas), len(inhibidores_preparados)

(590, 320)

# Cargar proteínas y ligandos
<hr>

In [7]:
EGFR_P = "Proteinas_preparadas/EGFR.pdbqt"
MAPK1_P = "Proteinas_preparadas/MAPK1.pdbqt"
SRC_P = "Proteinas_preparadas/SRC.pdbqt"
STAT3_P = "Proteinas_preparadas/STAT3.pdbqt"

In [8]:
EGFR_L = "Compuestos_preparados/EGFR.pdbqt"
MAPK1_L = "Compuestos_preparados/MAPK1.pdbqt"
SRC_L = "Compuestos_preparados/SRC.pdbqt"
STAT3_L = "Compuestos_preparados/STAT3.pdbqt"

# Crear tablas de datos para docking - inhibidores
<hr>

In [9]:
import pandas as pd

In [10]:
my_dict1 = {"Ruta": inhibidores_preparados}

In [11]:
df_inhib = pd.DataFrame(my_dict1)

In [12]:
df_inhib["nombre"] = df_inhib["Ruta"].apply(lambda x: os.path.basename(x).split(".")[0])

In [13]:
import re

df_inhib["Targets"] = df_inhib["nombre"].apply(lambda x: re.sub(r'_\d+$', '', x))

In [14]:
df_inhib.head()

Unnamed: 0,Ruta,nombre,Targets
0,Inhibidores/Targets/EGFR/MAPK1_P_EGFR_0.pdbqt,MAPK1_P_EGFR_0,MAPK1_P_EGFR
1,Inhibidores/Targets/EGFR/MAPK1_P_EGFR_1.pdbqt,MAPK1_P_EGFR_1,MAPK1_P_EGFR
2,Inhibidores/Targets/EGFR/MAPK1_P_EGFR_10.pdbqt,MAPK1_P_EGFR_10,MAPK1_P_EGFR
3,Inhibidores/Targets/EGFR/MAPK1_P_EGFR_11.pdbqt,MAPK1_P_EGFR_11,MAPK1_P_EGFR
4,Inhibidores/Targets/EGFR/MAPK1_P_EGFR_12.pdbqt,MAPK1_P_EGFR_12,MAPK1_P_EGFR


In [15]:
df_inhib["R_Targets"] = df_inhib["Targets"].apply(lambda x: x.split("_P_")[-1])

In [16]:
df_inhib["R_Targets"].value_counts()

R_Targets
EGFR          157
STAT3          85
MAPK1          48
SRC            29
EGFR_STAT3      1
Name: count, dtype: int64

In [17]:
df_inhib.shape

(320, 4)

# Crear tablas de datos para docking - bases
<hr>

In [18]:
my_dict2 = {"Ruta": bases_preparadas}

In [19]:
df_prep= pd.DataFrame(my_dict2)

In [20]:
df_prep["nombre"] = df_prep["Ruta"].apply(lambda x: os.path.basename(x).split(".")[0])

In [21]:
import re

df_prep["Targets"] = df_prep["nombre"].apply(lambda x: re.sub(r'_\d+$', '', x))

In [22]:
df_prep["Targets"].value_counts()

Targets
EGFR_MAPK1    554
EGFR_SRC       19
EGFR_STAT3     16
MAPK1_SRC       1
Name: count, dtype: int64

In [23]:
df_prep.shape

(590, 3)

In [24]:
df_prep["R_Targets"] = df_prep["Targets"].apply(lambda x: x.split("_P_")[-1])

In [25]:
df_prep["R_Targets"].value_counts()

R_Targets
EGFR_MAPK1    554
EGFR_SRC       19
EGFR_STAT3     16
MAPK1_SRC       1
Name: count, dtype: int64

# Realizar docking para los inhibidores
<hr>

In [None]:
import os
import subprocess

# Establecer el bucle
for index, row in df_inhib.iterrows():

    print("-------------------------------------------------------------")
    print(f"Procesando compuesto {index} de {df_inhib.shape[0]}")
    
    # Cargar ruta del ligando
    ligando = row["Ruta"]

    # Cargar los receptores
    receptores = row["R_Targets"].split("_")

    for receptor in receptores:

        # Determinar el receptor y las rutas de los archivos
        if receptor == "EGFR":
            receptor_p = EGFR_P
            receptor_l = EGFR_L
        elif receptor == "MAPK1":
            receptor_p = MAPK1_P
            receptor_l = MAPK1_L
        elif receptor == "SRC":
            receptor_p = SRC_P
            receptor_l = SRC_L
        elif receptor == "STAT3":
            receptor_p = STAT3_P
            receptor_l = STAT3_L
        
        autobox_ligand = receptor_l
        autobox_add = 18  # margen en Ångstroms
        exhaustiveness = 16

        nombre = os.path.splitext(os.path.basename(ligando))[0]
        log = os.path.join(inhibidores, nombre + "_L_" + receptor + ".log")
        out = os.path.join(inhibidores, nombre + "_L_" + receptor + ".pdbqt")

        # Comando gnina con CNN desactivado y resultados ordenados por score
        comando = [
            "gnina",
            "--receptor", receptor_p,
            "--ligand", ligando,
            "--autobox_ligand", autobox_ligand,
            "--autobox_add", str(autobox_add),
            "--exhaustiveness", str(exhaustiveness),
            "--cnn_scoring", "none",  # desactivar CNN
            "--out", out,
            "--log", log
        ]

        resultado = subprocess.run(comando)

# Realizar docking para bases
<hr>

In [26]:
import os
import glob
import pandas as pd

compuestos = glob.glob("docking/bases_conjuntas/*.pdbqt")

nombres_izquierda = []

for i in compuestos:
    nombre = os.path.basename(i)
    izquierda = nombre.split("_L_")[0]
    nombres_izquierda.append(izquierda)

serie = pd.Series(nombres_izquierda).drop_duplicates().reset_index(drop=True)

In [27]:
df_filtrado = df_prep[~df_prep["nombre"].isin(serie)]

In [28]:
df_filtrado.shape

(147, 4)

In [29]:
df_filtrado.reset_index(inplace=True)

In [None]:
import subprocess

# Establecer el bucle
for index, row in df_filtrado.iterrows():

    print("-------------------------------------------------------------")
    print(f"Procesando compuesto {index} de {df_filtrado.shape[0]}")
    
    # Cargar ruta del ligando
    ligando = row["Ruta"]

    # Cargar los receptores
    receptores = row["R_Targets"].split("_")

    for receptor in receptores:

        # Determinar el receptor y las rutas de los archivos (ruta del receptor y del ligando cocristalizado)
        if receptor == "EGFR":
            receptor_p = EGFR_P
            receptor_l = EGFR_L
        elif receptor == "MAPK1":
            receptor_p = MAPK1_P
            receptor_l = MAPK1_L
        elif receptor == "SRC":
            receptor_p = SRC_P
            receptor_l = SRC_L
        elif receptor == "STAT3":
            receptor_p = STAT3_P
            receptor_l = STAT3_L
        
        autobox_ligand = receptor_l # Definir el ligando para autobox
        autobox_add = 18  # margen en Ångstroms

        # Configuración del docking
        exhaustiveness = 16
        nombre = os.path.splitext(os.path.basename(ligando))[0]
        
        # Ruta del log
        log = os.path.join(bases, nombre + "_L_" + receptor + ".log")
        out = os.path.join(bases, nombre + "_L_" + receptor + ".pdbqt")
        
        # Comando gnina
        comando = [
            "gnina",
            "--receptor", receptor_p,
            "--ligand", ligando,
            "--autobox_ligand", autobox_ligand,
            "--autobox_add", str(autobox_add),
            "--exhaustiveness", str(exhaustiveness),
            "--cnn_scoring", "none",  # desactivar CNN
            "--out", out,
            "--log", log
        ]

        resultado = subprocess.run(comando)