In [None]:
from rdkit import Chem
from rdkit.Chem import rdFingerprintGenerator, DataStructs, Draw
from rdkit import RDLogger
import csv
import os
import pandas as pd
from IPython.display import SVG, display

RDLogger.DisableLog('rdApp.*')

# === USER PARAMETERS ===
smiles_file = "/home/angel/Escritorio/smiles_antocianinas.txt"  # Archivo con SMILES e IDs
sdf_file = "/home/angel/Escritorio/Jupyter/chebi.sdf"  # Archivo SDF de ChEBI
output_folder = "/home/angel/Escritorio/Jupyter/antocianinas"  # Carpeta final de salida
similarity_threshold = 0.5
# === END PARAMETERS ===

# Asegurar que la carpeta de salida exista
os.makedirs(output_folder, exist_ok=True)

# Leer archivo de SMILES
with open(smiles_file, "r") as f:
    smiles_list = [line.strip().split() for line in f if line.strip()]

# Cargar archivo SDF una vez
supplier = Chem.SDMolSupplier(sdf_file)
morgan_gen = rdFingerprintGenerator.GetMorganGenerator(radius=3, useBondTypes=True, fpSize=2048)

# Iterar sobre cada molécula base
for base_smiles, base_id in smiles_list:
    base_mol = Chem.MolFromSmiles(base_smiles)
    if base_mol is None:
        print(f"[!] SMILES inválido: {base_smiles} ({base_id})")
        continue
    base_fp = morgan_gen.GetFingerprint(base_mol)

    output_csv = os.path.join(output_folder, f"{base_id}.csv")

    with open(output_csv, "w", newline="") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=["chebi_id", "smiles", "similarity"])
        writer.writeheader()
        count = 0

        for mol in supplier:
            if mol is None:
                continue
            try:
                chebi_id = mol.GetProp("ChEBI ID") if mol.HasProp("ChEBI ID") else ""
                smiles = Chem.MolToSmiles(mol)
                if not chebi_id or not smiles:
                    continue
                fp = morgan_gen.GetFingerprint(mol)
                sim = DataStructs.TanimotoSimilarity(base_fp, fp)
                if sim >= similarity_threshold:
                    writer.writerow({
                        "chebi_id": chebi_id,
                        "smiles": smiles,
                        "similarity": sim
                    })
                    count += 1
            except Exception:
                continue

    print(f"[{base_id}] Similitud >= {similarity_threshold}: {count} compuestos guardados en {output_csv}")
