In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#@title Instalar librerías

!pip install rdkit
!pip install Biopython
!pip install pandarallel
!pip install py3Dmol

In [None]:
#@title Abrir el dataframe

import os
import pandas as pd

input_folder = "/content/drive/MyDrive/TFM/T2/Analisis_dataframe_final"
input_file = os.path.join(input_folder, "df_filtro_binana_final.csv")
df_harm = pd.read_csv(input_file, sep = ',')
print(df_harm.shape[0],df_harm.columns)

70378 Index(['level_0', 'index', 'PDB_entry_id', 'Classification', 'Organism',
       'Uniprot_id', 'Ligand_id', 'Ligand_InChi', 'Experimental_method',
       'Resolution', 'Adding_Classification', 'Affinity', 'Coordenadas',
       'Ligand_smiles', 'Mol_Weight', 'n_atoms', 'Nombre', 'Classification_y',
       'Count_general', 'Count_diff'],
      dtype='object')


In [None]:
df_harm.head()

Unnamed: 0,index,PDB_entry_id,Classification,Organism,Uniprot_id,Ligand_id,Ligand_InChi,Experimental_method,Resolution,Adding_Classification,Affinity,Coordenadas,Ligand_smiles,Mol_Weight,n_atoms,Nombre,Classification_y,Count_general,Count_diff
0,0,4GCP,MEMBRANE PROTEIN,Escherichia coli K-12,P02931,AIC,AVKUERGKIZMTKX-NJBDSQKTSA-N,X-RAY DIFFRACTION,1.98,ANTIBIOTIC,,"[70.12, 0, 'B', 401]",CC1(C)S[C@@H]2[C@H](NC(=O)[C@H](N)c3ccccc3)C(=...,349.412,24.0,4GCP_AIC_B_401,MEMBRANE PROTEIN,11,3
1,1,4GCX,TRANSFERASE,Zymomonas mobilis subsp. mobilis ZM4 = ATCC 31821,P28720,PRF,MEYMBLGOKYDGLZ-UHFFFAOYSA-N,X-RAY DIFFRACTION,1.42,,,"[100.0, 0, 'A', 401]",NCc1c[nH]c2N=C(N)NC(=O)c12,179.183,13.0,4GCX_PRF_A_401,TRANSFERASE,15,2
2,2,4GCY,HYDROLASE,Mycobacterium tuberculosis,P9WNS5,DUP,XZLLMTSKYYYJLH-SHYZEUOFSA-N,X-RAY DIFFRACTION,1.5,,,"[60.48, 0, 'A', 201]",O[C@H]1C[C@@H](O[C@@H]1CO[P](O)(=O)N[P](O)(=O)...,467.157,28.0,4GCY_DUP_A_201,HYDROLASE,129,3
3,3,4GDE,ISOMERASE,Aspergillus fumigatus,Q4W1X2,FDA,YPZRHBJKEMOYQH-UYBVJOGSSA-N,X-RAY DIFFRACTION,2.2,,,"[91.8, 0, 'A', 601]",Cc1cc2NC3=C(NC(=O)NC3=O)N(C[C@H](O)[C@H](O)[C@...,787.573,53.0,4GDE_FDA_A_601,ISOMERASE,187,7
4,4,4GDE,ISOMERASE,Aspergillus fumigatus,Q4W1X2,FDA,YPZRHBJKEMOYQH-UYBVJOGSSA-N,X-RAY DIFFRACTION,2.2,,,"[91.61, 0, 'B', 601]",Cc1cc2NC3=C(NC(=O)NC3=O)N(C[C@H](O)[C@H](O)[C@...,787.573,53.0,4GDE_FDA_B_601,ISOMERASE,187,7


In [None]:
#@title Expander el Dataframe por la columna "Coordenadas"

import pandas as pd
import ast

df_harm['Coordenadas'] = df_harm['Coordenadas'].apply(ast.literal_eval)



In [None]:
nombre_instancias = df_harm['Nombre'].tolist()
print(len(nombre_instancias))

70378


El script está en ente enlace: https://gist.github.com/anku255/03dc35c5233a3fc59d60fdf62c3cda24

In [None]:
#@title Función Ultrafast Shape Recognition

import math, os
from tqdm import tqdm

# --------- LECTOR PDB ---------
class Atom:
    def __init__(self, x, y, z):
        self.x = float(x)
        self.y = float(y)
        self.z = float(z)

    def getXCoordinate(self): return self.x
    def getYCoordinate(self): return self.y
    def getZCoordinate(self): return self.z

class Molecule:
    def __init__(self):
        self.atoms = []

    def addAtom(self, atom):
        self.atoms.append(atom)

    def getAtoms(self): return self.atoms
    def getAtom(self, i): return self.atoms[i]

def readPDBLigand(filepath):
    molecule = Molecule()
    with open(filepath, 'r') as f:
        for line in f:
            if line.startswith("HETATM") or line.startswith("ATOM"):
                try:
                    x = float(line[30:38])
                    y = float(line[38:46])
                    z = float(line[46:54])
                    molecule.addAtom(Atom(x, y, z))
                except:
                    continue
    return molecule

# --------- CÓDIGO USR ---------
class Point():
    def __init__(self, x=0, y=0, z=0):
        self.xCoordinate = x
        self.yCoordinate = y
        self.zCoordinate = z

class AtomIndexAndDistance():
    def __init__(self, index, distance):
        self.index = index
        self.distance = distance

class USR():
    def CalculateCentroid(self, aMolecule):
        centroid = Point()
        atoms = aMolecule.getAtoms()
        if not atoms:
            return centroid
        sx, sy, sz = 0.0, 0.0, 0.0
        for atom in atoms:
            sx += atom.getXCoordinate()
            sy += atom.getYCoordinate()
            sz += atom.getZCoordinate()
        n = len(atoms)
        centroid.xCoordinate = sx / n
        centroid.yCoordinate = sy / n
        centroid.zCoordinate = sz / n
        return centroid

    def EuclideanDistanceMeasure(self, aMolecule, point, i):
        atom = aMolecule.getAtom(i)
        dx = atom.getXCoordinate() - point.xCoordinate
        dy = atom.getYCoordinate() - point.yCoordinate
        dz = atom.getZCoordinate() - point.zCoordinate
        return math.sqrt(dx*dx + dy*dy + dz*dz)

    def ClosestAndFurthestAtomToCentroid(self, aMolecule):
        centroid = self.CalculateCentroid(aMolecule)
        distances = []
        for i in range(len(aMolecule.getAtoms())):
            dist = self.EuclideanDistanceMeasure(aMolecule, centroid, i)
            distances.append(AtomIndexAndDistance(i, dist))
        distances.sort(key=lambda x: x.distance)
        return [distances[0], distances[-1]]

    def FurthestAtomFromFurthestAtom(self, aMolecule, furthestAtom):
        index = furthestAtom.index
        target = aMolecule.getAtom(index)
        distances = []
        for i in range(len(aMolecule.getAtoms())):
            atom = aMolecule.getAtom(i)
            dx = atom.getXCoordinate() - target.getXCoordinate()
            dy = atom.getYCoordinate() - target.getYCoordinate()
            dz = atom.getZCoordinate() - target.getZCoordinate()
            dist = math.sqrt(dx*dx + dy*dy + dz*dz)
            distances.append(AtomIndexAndDistance(i, dist))
        distances.sort(key=lambda x: x.distance)
        return distances[-1].index

    def MomentToCentroid(self, momentId, aMolecule, centroid):
        atoms = aMolecule.getAtoms()
        distances = []
        for atom in atoms:
            dx = atom.getXCoordinate() - centroid.xCoordinate
            dy = atom.getYCoordinate() - centroid.yCoordinate
            dz = atom.getZCoordinate() - centroid.zCoordinate
            distances.append(math.sqrt(dx*dx + dy*dy + dz*dz))
        return self._moment(momentId, distances)

    def MomentToX(self, momentId, aMolecule, refIndex):
        ref = aMolecule.getAtom(refIndex)
        distances = []
        for atom in aMolecule.getAtoms():
            dx = atom.getXCoordinate() - ref.getXCoordinate()
            dy = atom.getYCoordinate() - ref.getYCoordinate()
            dz = atom.getZCoordinate() - ref.getZCoordinate()
            distances.append(math.sqrt(dx*dx + dy*dy + dz*dz))
        return self._moment(momentId, distances)

    def _moment(self, momentId, distances):
        n = len(distances)
        mean = sum(distances) / n
        if momentId == 1:
            return mean
        elif momentId == 2:
            return sum((d - mean)**2 for d in distances) / n
        elif momentId == 3:
            variance = sum((d - mean)**2 for d in distances) / n
            skewness = sum(abs(d - mean)**3 for d in distances) / n
            return skewness / (variance ** 1.5) if variance > 0 else 0.0

def getUSRDescriptor(aMolecule):
    usr = USR()
    centroid = usr.CalculateCentroid(aMolecule)
    v = usr.ClosestAndFurthestAtomToCentroid(aMolecule)
    ffaIndex = usr.FurthestAtomFromFurthestAtom(aMolecule, v[1])
    return [
        usr.MomentToCentroid(1, aMolecule, centroid),
        usr.MomentToCentroid(2, aMolecule, centroid),
        usr.MomentToCentroid(3, aMolecule, centroid),
        usr.MomentToX(1, aMolecule, v[0].index),
        usr.MomentToX(2, aMolecule, v[0].index),
        usr.MomentToX(3, aMolecule, v[0].index),
        usr.MomentToX(1, aMolecule, v[1].index),
        usr.MomentToX(2, aMolecule, v[1].index),
        usr.MomentToX(3, aMolecule, v[1].index),
        usr.MomentToX(1, aMolecule, ffaIndex),
        usr.MomentToX(2, aMolecule, ffaIndex),
        usr.MomentToX(3, aMolecule, ffaIndex),
    ]


In [None]:
#@title Función USR para los sitios agrupados

import os
import glob
from collections import defaultdict
from tqdm import tqdm
import math

#############################
# LECTOR PDB Y DESCRIPTORES #
#############################

# --------- LECTOR PDB ---------
class Atom:
    def __init__(self, x, y, z):
        self.x = float(x)
        self.y = float(y)
        self.z = float(z)

    def getXCoordinate(self):
        return self.x
    def getYCoordinate(self):
        return self.y
    def getZCoordinate(self):
        return self.z

class Molecule:
    def __init__(self):
        self.atoms = []

    def addAtom(self, atom):
        self.atoms.append(atom)

    def getAtoms(self):
        return self.atoms
    def getAtom(self, i):
        return self.atoms[i]

def readPDBLigand(filepath):
    molecule = Molecule()
    with open(filepath, 'r') as f:
        for line in f:
            if line.startswith("HETATM") or line.startswith("ATOM"):
                try:
                    x = float(line[30:38])
                    y = float(line[38:46])
                    z = float(line[46:54])
                    molecule.addAtom(Atom(x, y, z))
                except Exception as ex:
                    # Si ocurre algún error al convertir las coordenadas, se salta la línea.
                    continue
    return molecule

# --------- CÓDIGO USR ---------
class Point:
    def __init__(self, x=0, y=0, z=0):
        self.xCoordinate = x
        self.yCoordinate = y
        self.zCoordinate = z

class AtomIndexAndDistance:
    def __init__(self, index, distance):
        self.index = index
        self.distance = distance

class USR:
    def CalculateCentroid(self, aMolecule):
        centroid = Point()
        atoms = aMolecule.getAtoms()
        if not atoms:
            return centroid
        sx, sy, sz = 0.0, 0.0, 0.0
        for atom in atoms:
            sx += atom.getXCoordinate()
            sy += atom.getYCoordinate()
            sz += atom.getZCoordinate()
        n = len(atoms)
        centroid.xCoordinate = sx / n
        centroid.yCoordinate = sy / n
        centroid.zCoordinate = sz / n
        return centroid

    def EuclideanDistanceMeasure(self, aMolecule, point, i):
        atom = aMolecule.getAtom(i)
        dx = atom.getXCoordinate() - point.xCoordinate
        dy = atom.getYCoordinate() - point.yCoordinate
        dz = atom.getZCoordinate() - point.zCoordinate
        return math.sqrt(dx*dx + dy*dy + dz*dz)

    def ClosestAndFurthestAtomToCentroid(self, aMolecule):
        centroid = self.CalculateCentroid(aMolecule)
        distances = []
        for i in range(len(aMolecule.getAtoms())):
            dist = self.EuclideanDistanceMeasure(aMolecule, centroid, i)
            distances.append(AtomIndexAndDistance(i, dist))
        distances.sort(key=lambda x: x.distance)
        return [distances[0], distances[-1]]

    def FurthestAtomFromFurthestAtom(self, aMolecule, furthestAtom):
        index = furthestAtom.index
        target = aMolecule.getAtom(index)
        distances = []
        for i in range(len(aMolecule.getAtoms())):
            atom = aMolecule.getAtom(i)
            dx = atom.getXCoordinate() - target.getXCoordinate()
            dy = atom.getYCoordinate() - target.getYCoordinate()
            dz = atom.getZCoordinate() - target.getZCoordinate()
            dist = math.sqrt(dx*dx + dy*dy + dz*dz)
            distances.append(AtomIndexAndDistance(i, dist))
        distances.sort(key=lambda x: x.distance)
        return distances[-1].index

    def MomentToCentroid(self, momentId, aMolecule, centroid):
        atoms = aMolecule.getAtoms()
        distances = []
        for atom in atoms:
            dx = atom.getXCoordinate() - centroid.xCoordinate
            dy = atom.getYCoordinate() - centroid.yCoordinate
            dz = atom.getZCoordinate() - centroid.zCoordinate
            distances.append(math.sqrt(dx*dx + dy*dy + dz*dz))
        return self._moment(momentId, distances)

    def MomentToX(self, momentId, aMolecule, refIndex):
        ref = aMolecule.getAtom(refIndex)
        distances = []
        for atom in aMolecule.getAtoms():
            dx = atom.getXCoordinate() - ref.getXCoordinate()
            dy = atom.getYCoordinate() - ref.getYCoordinate()
            dz = atom.getZCoordinate() - ref.getZCoordinate()
            distances.append(math.sqrt(dx*dx + dy*dy + dz*dz))
        return self._moment(momentId, distances)

    def _moment(self, momentId, distances):
        n = len(distances)
        mean = sum(distances) / n
        if momentId == 1:
            return mean
        elif momentId == 2:
            return sum((d - mean)**2 for d in distances) / n
        elif momentId == 3:
            variance = sum((d - mean)**2 for d in distances) / n
            skewness = sum(abs(d - mean)**3 for d in distances) / n
            return skewness / (variance ** 1.5) if variance > 0 else 0.0

def getUSRDescriptor(aMolecule):
    usr = USR()
    centroid = usr.CalculateCentroid(aMolecule)
    v = usr.ClosestAndFurthestAtomToCentroid(aMolecule)
    ffaIndex = usr.FurthestAtomFromFurthestAtom(aMolecule, v[1])
    return [
        usr.MomentToCentroid(1, aMolecule, centroid),
        usr.MomentToCentroid(2, aMolecule, centroid),
        usr.MomentToCentroid(3, aMolecule, centroid),
        usr.MomentToX(1, aMolecule, v[0].index),
        usr.MomentToX(2, aMolecule, v[0].index),
        usr.MomentToX(3, aMolecule, v[0].index),
        usr.MomentToX(1, aMolecule, v[1].index),
        usr.MomentToX(2, aMolecule, v[1].index),
        usr.MomentToX(3, aMolecule, v[1].index),
        usr.MomentToX(1, aMolecule, ffaIndex),
        usr.MomentToX(2, aMolecule, ffaIndex),
        usr.MomentToX(3, aMolecule, ffaIndex),
    ]




In [None]:
nombre_instancias_sitios = [f"{i}_sitio.pdb" for i in nombre_instancias]


In [None]:
nombre_instancias_sitios

In [None]:
print(len(pdb_files))

71709


In [None]:
#################################
# AGRUPAMIENTO Y PROCESAMIENTO  #
#################################

# Ruta de la carpeta de entrada con archivos PDB
input_folder = "/content/drive/MyDrive/TFM/T2/TOT_ligandos_sitios/TOTAL/TOTAL_PDB/sitios_pdb_proteina"

# Ruta de salida para los archivos CSV de USR
output_folder = "/content/drive/MyDrive/TFM/T2/Ultrafast_Shape_Recognition/USR_sitios_de_union/Output_USR_sitios_1"
os.makedirs(output_folder, exist_ok=True)


sitios_por_nombre = defaultdict(list)
pdb_files = sorted(glob.glob(os.path.join(input_folder, "*.pdb")))
print(len(pdb_files))

71709


In [None]:
pdb_files

In [None]:


for filepath in pdb_files:
    base = os.path.basename(filepath)
    if base in nombre_instancias_sitios:
      parts = base.split("_")
      if len(parts) < 2:

        # Si no se puede dividir correctamente, se ignora
        continue
      ligand_name = parts[1]
      sitios_por_nombre[ligand_name].append(filepath)

# 2. Procesar cada grupo y generar un CSV para cada ligando
for ligand_name, archivos in sitios_por_nombre.items():
    output_path = os.path.join(output_folder, f"USR_{ligand_name}_sitio.csv")
    with open(output_path, 'w') as out:
        out.write("conformation," + ",".join([f"USR_{i+1}" for i in range(12)]) + "\n")
        for pdb_file in tqdm(archivos, desc=f"Procesando sitios de {ligand_name}"):
            try:
                mol = readPDBLigand(pdb_file)
                descriptor = getUSRDescriptor(mol)
                desc_line = ",".join([f"{d:.5f}" for d in descriptor])
                out.write(f"{os.path.basename(pdb_file)},{desc_line}\n")
            except Exception as e:
                print(f"Error en {pdb_file}: {e}")

In [None]:
import os
import glob
from collections import defaultdict
from tqdm import tqdm
import math

#############################
# LECTOR PDB Y DESCRIPTORES #
#############################

# --------- LECTOR PDB ---------
class Atom:
    def __init__(self, x, y, z):
        self.x = float(x)
        self.y = float(y)
        self.z = float(z)

    def getXCoordinate(self):
        return self.x
    def getYCoordinate(self):
        return self.y
    def getZCoordinate(self):
        return self.z

class Molecule:
    def __init__(self):
        self.atoms = []

    def addAtom(self, atom):
        self.atoms.append(atom)

    def getAtoms(self):
        return self.atoms
    def getAtom(self, i):
        return self.atoms[i]

def readPDBLigand(filepath):
    molecule = Molecule()
    with open(filepath, 'r') as f:
        for line in f:
            if line.startswith("HETATM") or line.startswith("ATOM"):
                try:
                    x = float(line[30:38])
                    y = float(line[38:46])
                    z = float(line[46:54])
                    molecule.addAtom(Atom(x, y, z))
                except Exception as ex:
                    # Si ocurre algún error al convertir las coordenadas, se salta la línea.
                    continue
    return molecule

# --------- CÓDIGO USR ---------
class Point:
    def __init__(self, x=0, y=0, z=0):
        self.xCoordinate = x
        self.yCoordinate = y
        self.zCoordinate = z

class AtomIndexAndDistance:
    def __init__(self, index, distance):
        self.index = index
        self.distance = distance

class USR:
    def CalculateCentroid(self, aMolecule):
        centroid = Point()
        atoms = aMolecule.getAtoms()
        if not atoms:
            return centroid
        sx, sy, sz = 0.0, 0.0, 0.0
        for atom in atoms:
            sx += atom.getXCoordinate()
            sy += atom.getYCoordinate()
            sz += atom.getZCoordinate()
        n = len(atoms)
        centroid.xCoordinate = sx / n
        centroid.yCoordinate = sy / n
        centroid.zCoordinate = sz / n
        return centroid

    def EuclideanDistanceMeasure(self, aMolecule, point, i):
        atom = aMolecule.getAtom(i)
        dx = atom.getXCoordinate() - point.xCoordinate
        dy = atom.getYCoordinate() - point.yCoordinate
        dz = atom.getZCoordinate() - point.zCoordinate
        return math.sqrt(dx*dx + dy*dy + dz*dz)

    def ClosestAndFurthestAtomToCentroid(self, aMolecule):
        centroid = self.CalculateCentroid(aMolecule)
        distances = []
        for i in range(len(aMolecule.getAtoms())):
            dist = self.EuclideanDistanceMeasure(aMolecule, centroid, i)
            distances.append(AtomIndexAndDistance(i, dist))
        distances.sort(key=lambda x: x.distance)
        return [distances[0], distances[-1]]

    def FurthestAtomFromFurthestAtom(self, aMolecule, furthestAtom):
        index = furthestAtom.index
        target = aMolecule.getAtom(index)
        distances = []
        for i in range(len(aMolecule.getAtoms())):
            atom = aMolecule.getAtom(i)
            dx = atom.getXCoordinate() - target.getXCoordinate()
            dy = atom.getYCoordinate() - target.getYCoordinate()
            dz = atom.getZCoordinate() - target.getZCoordinate()
            dist = math.sqrt(dx*dx + dy*dy + dz*dz)
            distances.append(AtomIndexAndDistance(i, dist))
        distances.sort(key=lambda x: x.distance)
        return distances[-1].index

    def MomentToCentroid(self, momentId, aMolecule, centroid):
        atoms = aMolecule.getAtoms()
        distances = []
        for atom in atoms:
            dx = atom.getXCoordinate() - centroid.xCoordinate
            dy = atom.getYCoordinate() - centroid.yCoordinate
            dz = atom.getZCoordinate() - centroid.zCoordinate
            distances.append(math.sqrt(dx*dx + dy*dy + dz*dz))
        return self._moment(momentId, distances)

    def MomentToX(self, momentId, aMolecule, refIndex):
        ref = aMolecule.getAtom(refIndex)
        distances = []
        for atom in aMolecule.getAtoms():
            dx = atom.getXCoordinate() - ref.getXCoordinate()
            dy = atom.getYCoordinate() - ref.getYCoordinate()
            dz = atom.getZCoordinate() - ref.getZCoordinate()
            distances.append(math.sqrt(dx*dx + dy*dy + dz*dz))
        return self._moment(momentId, distances)

    def _moment(self, momentId, distances):
        n = len(distances)
        mean = sum(distances) / n
        if momentId == 1:
            return mean
        elif momentId == 2:
            return sum((d - mean)**2 for d in distances) / n
        elif momentId == 3:
            variance = sum((d - mean)**2 for d in distances) / n
            skewness = sum(abs(d - mean)**3 for d in distances) / n
            return skewness / (variance ** 1.5) if variance > 0 else 0.0

def getUSRDescriptor(aMolecule):
    usr = USR()
    centroid = usr.CalculateCentroid(aMolecule)
    v = usr.ClosestAndFurthestAtomToCentroid(aMolecule)
    ffaIndex = usr.FurthestAtomFromFurthestAtom(aMolecule, v[1])
    return [
        usr.MomentToCentroid(1, aMolecule, centroid),
        usr.MomentToCentroid(2, aMolecule, centroid),
        usr.MomentToCentroid(3, aMolecule, centroid),
        usr.MomentToX(1, aMolecule, v[0].index),
        usr.MomentToX(2, aMolecule, v[0].index),
        usr.MomentToX(3, aMolecule, v[0].index),
        usr.MomentToX(1, aMolecule, v[1].index),
        usr.MomentToX(2, aMolecule, v[1].index),
        usr.MomentToX(3, aMolecule, v[1].index),
        usr.MomentToX(1, aMolecule, ffaIndex),
        usr.MomentToX(2, aMolecule, ffaIndex),
        usr.MomentToX(3, aMolecule, ffaIndex),
    ]

In [None]:
import os
import glob
from collections import defaultdict
from tqdm import tqdm

#################################
# AGRUPAMIENTO Y PROCESAMIENTO  #
#################################

# Ruta de la carpeta de entrada con archivos PDB
input_folder = "/content/drive/MyDrive/TFM/T2/Ultrafast_Shape_Recognition/ligandos_pdb"

# Ruta de salida para los archivos CSV de USR
output_folder = "/content/drive/MyDrive/TFM/T2/Ultrafast_Shape_Recognition/Output_USR"
os.makedirs(output_folder, exist_ok=True)

# 1. Agrupar por el nombre del ligando, que se encuentra en el índice [1]
ligandos_por_nombre = defaultdict(list)
pdb_files = sorted(glob.glob(os.path.join(input_folder, "*.pdb")))

for filepath in pdb_files:
    base = os.path.basename(filepath)
    parts = base.split("_")
    if len(parts) < 2:
        # Si no se puede dividir correctamente, se ignora
        continue
    ligand_name = parts[1]  # Ejemplo: "GNP"
    ligandos_por_nombre[ligand_name].append(filepath)

# 2. Procesar cada grupo y generar un CSV para cada ligando, omitiendo si ya existe
for ligand_name, archivos in ligandos_por_nombre.items():
    output_path = os.path.join(output_folder, f"USR_{ligand_name}.csv")

    # Si el CSV para este ligando ya existe, se salta el procesamiento
    if os.path.exists(output_path):
        print(f"El archivo {output_path} ya existe. Saltando {ligand_name}.")
        continue

    with open(output_path, 'w') as out:
        out.write("conformation," + ",".join([f"USR_{i+1}" for i in range(12)]) + "\n")
        for pdb_file in tqdm(archivos, desc=f"Procesando {ligand_name}"):
            try:
                mol = readPDBLigand(pdb_file)
                descriptor = getUSRDescriptor(mol)
                desc_line = ",".join([f"{d:.5f}" for d in descriptor])
                out.write(f"{os.path.basename(pdb_file)},{desc_line}\n")
            except Exception as e:
                print(f"Error en {pdb_file}: {e}")
