In [1]:
!pip install pandas
import pandas as pd



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [None]:
import os
import gzip
import shutil
import requests
import pandas as pd
import numpy as np
import MDAnalysis as mda
from MDAnalysis.lib.distances import distance_array
from tqdm import tqdm
from pathlib import Path
from functools import lru_cache

import warnings, signal
warnings.filterwarnings(
    "ignore",
    message=r"1 A\^3 CRYST1 record, this is usually a placeholder",
    category=UserWarning
)

HAS_ALARM = hasattr(signal, "SIGALRM")
class _TimeLimit:
    def __init__(self, seconds):
        self.seconds = int(seconds)
    def __enter__(self):
        if HAS_ALARM:
            signal.signal(signal.SIGALRM, lambda *_: (_ for _ in ()).throw(TimeoutError()))
            signal.alarm(self.seconds)
    def __exit__(self, exc_type, exc, tb):
        if HAS_ALARM:
            signal.alarm(0)



BASE = Path(__file__).resolve().parent if "__file__" in globals() else Path.cwd() 
PDB_DIR = BASE / "pdb_downloads" # путь к папке где хранятся пдб
PDB_DIR.mkdir(exist_ok=True)
RESULTS_FILE = BASE / "protein_ligand_dataset_ALL.csv"
EXPECTED_COLUMNS = [
    "PDB_ID", "Protein_Name", "Ligand_ID", "Ligand_Name",
    "Binding_Residues", "Ordered_Binding_Residues", "Ordered_Residues_Coords",
    "Num_Binding_Residues", "Source_File"
]
PREFERRED_FORMATS = ("pdb", "mmtf")
ALLOW_CIF_FALLBACK = False
MAX_PDBS_PER_LIGAND = 250             
ANALYZE_TIMEOUT_S = 90 
SESSION = requests.Session()


@lru_cache(maxsize=512)
def _pdb_het_index(pdb_path: str):
    het_set = set()
    aa_het_set = set()
    het_resnames = set()
    try:
        with open(pdb_path, "r", encoding="utf-8", errors="ignore") as f:
            for line in f:
                if not line.startswith("HETATM"):
                    continue
                rn = line[17:20].strip().upper()
                ch = line[21].strip()
                try:
                    ri = int(line[22:26])
                except ValueError:
                    continue
                het_set.add((rn, ch, ri))
                het_resnames.add(rn)
                if rn in AA_CODES:
                    aa_het_set.add((rn, ch, ri))
    except Exception:
        pass
    return het_set, het_resnames, aa_het_set

def load_universe(path):
    try:
        return mda.Universe(str(path)) 
    except Exception as e:
        raise RuntimeError(f"Не удалось загрузить {path}: {e}")

def _is_nonpolymeric_ligand(res):
    name = (res.resname or "").strip().upper()
    if not name or name in {"HOH", "WAT"}:
        return False
    try:
        # Если любые атомы помечаются как protein/nucleic -  часть полимера
        if res.atoms.select_atoms("protein or nucleic").n_atoms > 0:
            return False
    except Exception:
        return False
    return True

def _is_aa_het_ligand(res, pdb_path):
    name = (res.resname or "").strip().upper()
    if name not in AA_CODES:
        return False
    if not str(pdb_path).lower().endswith(".pdb"):
        return False
    try:
        _, _, aa_het = _pdb_het_index(str(pdb_path))
        key = (name, _chain_label(res), int(res.resid))
        return key in aa_het
    except Exception:
        return False

def ligands_present_in(u, whitelist, pdb_path):

    allow = {x.strip().upper() for x in whitelist}

    if str(pdb_path).lower().endswith(".pdb"):
        _, het_resnames, _ = _pdb_het_index(str(pdb_path))
        return het_resnames & allow

    # Запасной путь 
    try:
        nonpoly = u.select_atoms("not protein and not nucleic")
        present = {r.resname.strip().upper() for r in nonpoly.residues if r.resname}
        return present & allow
    except Exception:
        return set()

LIGANDS = {
    "TAC": "Tetracycline",
    "TET": "Tetracycline",
    "DXT": "DOXYCYCLINE",
    "OTC": "OXYTETRACYCLINE",
    "ADP": "ADENOSINE-5'-DIPHOSPHATE",
    "CTC": "7-CHLOROTETRACYCLINE",
    "CFF": "CAFFEINE",
    "LSD": "Lasalocid A",
    "7LD": "(8alpha)-N,N-diethyl-6-methyl-9,10-didehydroergoline-8-carboxamide",
    "PRO": "PROLINE",
    "HEM": "PROTOPORPHYRIN IX CONTAINING FE",
    "TRP": "TRYPTOPHAN",
    "TCK": "Tosyl-L-lysine chloromethyl ketone",
    "FMN": "FLAVIN MONONUCLEOTIDE",
    "C": "CYTIDINE-5'-MONOPHOSPHATE",
    "G": "GUANOSINE-5'-MONOPHOSPHATE",
    "GCU": "alpha-D-glucopyranuronic acid",
    "BCL": "BACTERIOCHLOROPHYLL A",
    "FAD": "FLAVIN-ADENINE DINUCLEOTIDE",
    "NDP": "NADPH DIHYDRO-NICOTINAMIDE-ADENINE-DINUCLEOTIDE PHOSPHATE",
    "PHO": "PHEOPHYTIN A",
    "CMP": "ADENOSINE-3',5'-CYCLIC-MONOPHOSPHATE",
    "OCS": "CYSTEINESULFONIC ACID",
    "PGA": "2-PHOSPHOGLYCOLIC ACID",
    "GNP": "PHOSPHOAMINOPHOSPHONIC ACID-GUANYLATE ESTER",
    "HIC": "4-METHYL-HISTIDINE",
    "BTN": "BIOTIN",
    "G1P": "1-O-phosphono-alpha-D-glucopyranose",
    "CYC": "PHYCOCYANOBILIN",
    "C8E": "(HYDROXYETHYLOXY)TRI(ETHYLOXY)OCTANE",
    "E64": "N-[N-[1-HYDROXYCARBOXYETHYL-CARBONYL]LEUCYLAMINO-BUTYL]-GUANIDINE",
    "HBI": "7,8-DIHYDROBIOPTERIN",
    "IMP": "INOSINIC ACID",
    "SAM": "S-ADENOSYLMETHIONINE",
    "SIN": "SUCCINIC ACID",
    "5GP": "GUANOSINE-5'-MONOPHOSPHATE",
    "DAH": "3,4-DIHYDROXYPHENYLALANINE",
    "MAE": "MALEIC ACID",
    "MBN": "TOLUENE",
    "TRI": "1,2,4-TRIAZOLE",
    "AZM": "5-ACETAMIDO-1,3,4-THIADIAZOLE-2-SULFONAMIDE",
    "MUB": "N-acetyl-alpha-muramic acid",
    "PPI": "PROPANOIC ACID",
    "STE": "STEARIC ACID",
    "ANP": "PHOSPHOAMINOPHOSPHONIC ACID-ADENYLATE ESTER",
    "ADE": "ADENINE",
    "ASO": "1,5-anhydro-D-glucitol",
    "TMP": "THYMIDINE-5'-PHOSPHATE",
    "AKG": "2-OXOGLUTARIC ACID",
    "CHL": "CHLOROPHYLL B",
    "A2M": "2'-O-methyladenosine 5'-(dihydrogen phosphate)",
    "AC1": "4,6-dideoxy-4-{[(1S,4R,5S,6S)-4,5,6-trihydroxy-3-(hydroxymethyl)cyclohex-2-en-1-yl]amino}-alpha-D-glucopyranose",
    "UDP": "URIDINE-5'-DIPHOSPHATE",
    "BNG": "nonyl beta-D-glucopyranoside",
    "NLE": "NORLEUCINE",
    "MPB": "4-HYDROXY-BENZOIC ACID METHYL ESTER",
    "DGN": "D-GLUTAMINE",
    "SGN": "2-deoxy-6-O-sulfo-2-(sulfoamino)-alpha-D-glucopyranose",
    "FDA": "DIHYDROFLAVINE-ADENINE DINUCLEOTIDE",
    "SAH": "S-ADENOSYL-L-HOMOCYSTEINE",
    "ORN": "L-ornithine",
    "5IU": "5-IODO-2'-DEOXYURIDINE-5'-MONOPHOSPHATE",
    "BLA": "BILIVERDINE IX ALPHA",
    "IPT": "1-methylethyl 1-thio-beta-D-galactopyranoside",
    "HCI": "HYDROCINNAMIC ACID",
    "FUC": "alpha-L-fucopyranose",
    "NIN": "DINITROPHENYLENE",
    "MQ7": "MENAQUINONE-7",
    "FRU": "beta-D-fructofuranose",
    "FC0": "N-CARBOXY-L-PHENYLALANINE",
    "NAI": "1,4-DIHYDRONICOTINAMIDE ADENINE DINUCLEOTIDE",
    "AMH": "TRANS-4-AMINOMETHYLCYCLOHEXANE-1-CARBOXYLIC ACID",
    "C5P": "CYTIDINE-5'-MONOPHOSPHATE",
    "SAL": "2-HYDROXYBENZOIC ACID",
    "APC": "DIPHOSPHOMETHYLPHOSPHONIC ACID ADENOSYL ESTER",
    "OMY": "(betaR)-3-chloro-beta-hydroxy-L-tyrosine",
    "OMZ": "(betaR)-3-CHLORO-BETA-HYDROXY-D-TYROSINE",
    "CRO": "{2-[(1R,2R)-1-amino-2-hydroxypropyl]-4-(4-hydroxybenzylidene)-5-oxo-4,5-dihydro-1H-imidazol-1-yl}acetic acid",
    "DUD": "DEOXYURIDINE-5'-DIPHOSPHATE",
    "FFO": "N-[4-({[(6S)-2-amino-5-formyl-4-oxo-3,4,5,6,7,8-hexahydropteridin-6-yl]methyl}amino)benzoyl]-L-glutamic acid",
    "DGT": "2'-DEOXYGUANOSINE-5'-TRIPHOSPHATE",
    "SCS": "3-(ethyldisulfanyl)-L-alanine",
    "DIO": "1,4-DIETHYLENE DIOXIDE",
    "B12": "COBALAMIN",
    "EST": "ESTRADIOL",
    "SFG": "SINEFUNGIN",
    "LLP": "(2S)-2-amino-6-[[3-hydroxy-2-methyl-5-(phosphonooxymethyl)pyridin-4-yl]methylideneamino]hexanoic acid",
    "PLS": "[3-HYDROXY-2-METHYL-5-PHOSPHONOOXYMETHYL-PYRIDIN-4-YLMETHYL]-SERINE",
    "SMC": "S-METHYLCYSTEINE",
    "HTO": "HEPTANE-1,2,3-TRIOL",
    "C2F": "5-METHYL-5,6,7,8-TETRAHYDROFOLIC ACID",
    "PUB": "PHYCOUROBILIN",
    "DGD": "DIGALACTOSYL DIACYL GLYCEROL (DGDG)",
    "MGD": "2-AMINO-5,6-DIMERCAPTO-7-METHYL-3,7,8A,9-TETRAHYDRO-8-OXA-1,3,9,10-TETRAAZA-ANTHRACEN-4-ONE GUANOSINE DINUCLEOTIDE",
    "PID": "PERIDININ",
    "T6A": "N-[N-(9-B-D-RIBOFURANOSYLPURIN-6-YL)CARBAMOYL]THREONINE-5'-MONOPHOSPHATE",
    "AMT": "2-AMINOTHIAZOLE",
    "MLZ": "N-METHYL-LYSINE",
    "PC1": "1,2-DIACYL-SN-GLYCERO-3-PHOSPHOCHOLINE",
    "PEB": "PHYCOERYTHROBILIN",
    "I": "INOSINIC ACID",
    "AGS": "PHOSPHOTHIOPHOSPHORIC ACID-ADENYLATE ESTER",
    "BE2": "2-AMINOBENZOIC ACID",
    "SGC": "4-thio-beta-D-glucopyranose",
    "STU": "STAUROSPORINE",
    "BTB": "2-[BIS-(2-HYDROXY-ETHYL)-AMINO]-2-HYDROXYMETHYL-PROPANE-1,3-DIOL",
    "HDF": "8-HYDROXY-10-(D-RIBO-2,3,4,5-TETRAHYDROXYPENTYL)-5-DEAZAISOALLOXAZINE",
    "PGE": "TRIETHYLENE GLYCOL",
    "DAO": "LAURIC ACID",
    "NAL": "BETA-(2-NAPHTHYL)-ALANINE",
    "AG2": "AGMATINE",
    "2NO": "NITROGEN DIOXIDE",
    "POL": "N-PROPANOL",
    "ETF": "TRIFLUOROETHANOL",
    "CME": "S,S-(2-HYDROXYETHYL)THIOCYSTEINE",
    "PH2": "2-AMINO-6-HYDROXYMETHYL-7,8-DIHYDRO-3H-PTERIDIN-4-ONE",
    "PQN": "PHYLLOQUINONE",
    "GPE": "L-ALPHA-GLYCEROPHOSPHORYLETHANOLAMINE",
    "PAO": "N-(PHOSPHONOACETYL)-L-ORNITHINE",
    "SME": "METHIONINE SULFOXIDE",
    "DAS": "TOLD-ASPARTIC ACIDUENE",
    "PGR": "R-1,2-PROPANEDIOL",
    "FLF": "2-[[3-(TRIFLUOROMETHYL)PHENYL]AMINO] BENZOIC ACID",
    "SNC": "S-NITROSO-CYSTEINE",
    "1PG": "2-(2-{2-[2-(2-METHOXY-ETHOXY)-ETHOXY]-ETHOXY}-ETHOXY)-ETHANOL",
    "CAS": "S-(DIMETHYLARSENIC)CYSTEINE",
    "SPN": "TOLUENE",
    "MBN": "SPEROIDENONE",
    "HP1": "~{N}-[5-[[5-[[5-[[3-[3-(dimethylamino)propylamino]-3-oxidanylidene-propyl]carbamoyl]-1-methyl-pyrrol-3-yl]carbamoyl]-1-methyl-pyrrol-3-yl]carbamoyl]-1-methyl-4-oxidanyl-pyrrol-3-yl]-1-methyl-imidazole-2-carboxamide",
    "PUT": "1,4-DIAMINOBUTANE",
    "A3P": "ADENOSINE-3'-5'-DIPHOSPHATE",
    "AGM": "5-METHYL-ARGININE",
    "BCN": "BICINE",
    "MGN": "2-METHYL-GLUTAMINE",
    "MHS": "N1-METHYLATED HISTIDINE",
    "0JY": "4-methyl-L-leucine",
    "3CJ": "6-propyl-2-thioxo-2,3-dihydropyrimidin-4(1H)-one",
    "486": "11-(4-DIMETHYLAMINO-PHENYL)-17-HYDROXY-13-METHYL-17-PROP-1-YNYL-1,2,6,7,8,11,12,13,14,15,16,17-DODEC AHYDRO-CYCLOPENTA[A]PHENANTHREN-3-ONE",
    "4HY": "[4-(4-HYDROXY-3-IODO-PHENOXY)-3,5-DIIODO-PHENYL]-ACETIC ACID",
    "9CR": "(9cis)-retinoic acid",
    "ABU": "GAMMA-AMINO-BUTANOIC ACID",
    "AS4": "ALDOSTERONE",
    "BOG": "octyl beta-D-glucopyranoside",
    "CL6": "1-[(2-CHLOROPHENYL)(DIPHENYL)METHYL]-1H-IMIDAZOLE",
    "D10": "DECANE",
    "DMS": "DIMETHYL SULFOXIDE",
    "EPE": "4-(2-HYDROXYETHYL)-1-PIPERAZINE ETHANESULFONIC ACID",
    "F6Y": "3',6'-DIHYDROXY-3-OXO-3H-SPIRO[2-BENZOFURAN-1,9'-XANTHENE]-6-CARBOXYLIC ACID",
    "FLC": "CITRATE ANION",
    "FMN": "FLAVIN MONONUCLEOTIDE",
    "H43": "2-[4-[(3S)-3-[[(1R)-1-naphthalen-1-ylethyl]amino]pyrrolidin-1-yl]phenyl]ethanoic acid",
    "KLN": "1-ACETYL-4-(4-{[(2S,4R)-2-(2,4-DICHLOROPHENYL)-2-(1H-IMIDAZOL-1-YLMETHYL)-1,3-DIOXOLAN-4-YL]METHOXY}PHENYL)PIPERAZINE",
    "1II": "3-[1-[4,4-bis(4-fluorophenyl)butyl]piperidin-4-yl]-1~{H}-benzimidazol-2-one",
    "1WE": "(2S)-1-phenylpropan-2-amine",
    "2J3": "(2R)-2-{[(2R)-2-{[(2R)-2-hydroxypropyl]oxy}propyl]oxy}propan-1-ol",
    "2J9": "4-cyclopropyl-7-fluoro-3,4-dihydro-2H-1,2,4-benzothiadiazine 1,1-dioxide",
    "2PN": "IMIDODIPHOSPHORIC ACID",
    "3PE": "1,2-Distearoyl-sn-glycerophosphoethanolamine",
    "3TD": "(1S)-1,4-anhydro-1-(3-methyl-2,4-dioxo-1,2,3,4-tetrahydropyrimidin-5-yl)-5-O-phosphono-D-ribitol",
    "4D4": "(2S,3R)-2-azanyl-5-carbamimidamido-3-oxidanyl-pentanoic acid",
    "4DY": "(6E)-N-(4-hydroxy-3-methoxybenzyl)-8-methylnon-6-enamide",
    "4WI": "(1R,2S,5S)-N-{(1E,2S)-1-imino-3-[(3S)-2-oxopyrrolidin-3-yl]propan-2-yl}-6,6-dimethyl-3-[3-methyl-N-(trifluoroacetyl)-L-valyl]-3-azabicyclo[3.1.0]hexane-2-carboxamide",
    "5MC": "5-METHYLCYTIDINE-5'-MONOPHOSPHATE",
    "657": "6-(trifluoromethoxy)-1,3-benzothiazol-2-amine",
    "69D": "(1S)-1-(4-bromophenyl)-1-[3-(dimethylamino)propyl]-1,3-dihydro-2-benzofuran-5-carbonitrile",
    "6C7": "S-(3-methylbut-2-en-1-yl) trihydrogen thiodiphosphate",
    "6LR": "morpholine",
    "6NA": "HEXANOIC ACID",
    "6ZP": "2-(6'-oxo-1'-phenyl[1',6'-dihydro[2,3'-bipyridine]]-5'-yl)benzonitrile",
    "08Y": "bromoergocryptine",
    "11A": "UNDECANOIC ACID",
    "718": "6-hydroxy-3,4-dihydronaphthalen-1(2H)-one",
    "7RU": "3-[4-[2-[4-[2,3-bis(chloranyl)phenyl]piperazin-1-yl]ethyl]cyclohexyl]-1,1-dimethyl-urea",
    "87X": "5-cyclohexylpentanoic acid",
    "8PR": "Paroxetine",
    "8X9": "2-[(1R,5S,6S)-6-(aminomethyl)-3-ethyl-6-bicyclo[3.2.0]hept-3-enyl]acetic acid",
    "9PL": "(3S,4R)-3-ethyl-4-[(1-methyl-1H-imidazol-5-yl)methyl]dihydrofuran-2(3H)-one",
    "9KL": "(2S)-2-[3-(benzenecarbonyl)phenyl]propanoic acid",
    "9R9": "1-(6-chloro-1H-indazol-4-yl)cyclohexan-1-ol",
    "9Z9": "(3beta,14beta,17beta,25R)-3-[4-methoxy-3-(methoxymethyl)butoxy]spirost-5-en",
    "MRD": "(4R)-2-METHYLPENTANE-2,4-DIOL",
    "PNT": "1,5-BIS(4-AMIDINOPHENOXY)PENTANE",
    "TMQ": "TRIMETREXATE",
    "B1T": "2,2'-sulfanediylbis(4,6-dichlorophenol)",
    "PTK": "pyrene-1,3,6,8-tetrasulfonic acid",
    "FAH": "fluoroacetic acid",
    "SPM": "SPERMINE",
    "RTL": "RETINOL",
    "MES": "2-(N-MORPHOLINO)-ETHANESULFONIC ACID",
    "HPA": "HYPOXANTHINE",
    "HEA": "HEME-A",
    "PEB": "PHYCOERYTHROBILIN",
    "PRL": "PROFLAVIN",
    "EQU": "EQUILENIN",
    "URF": "5-FLUOROURACIL",
    "TRD": "TRIDECANE",
    "HY1": "PHENYLACETALDEHYDE",
    "TBP": "2,4,6-TRIBROMOPHENOL",
    "DIF": "2-[2,6-DICHLOROPHENYL)AMINO]BENZENEACETIC ACID",
    "LNL": "ALPHA-LINOLENIC ACID",
    "BLB": "BLEOMYCIN B2",
    "BES": "2-(3-AMINO-2-HYDROXY-4-PHENYL-BUTYRYLAMINO)-4-METHYL-PENTANOIC ACID",
    "PNS": "4'-PHOSPHOPANTETHEINE",
    "SRL": "[2-(3,5-DI-TERT-BUTYL-4-HYDROXY-PHENYL)-1-(DIETHOXY-PHOSPHORYL)-VINYL]-PHOSPHONIC ACID DIETHLYL ESTER",
    "BCR": "BETA-CAROTENE",
    "NEA": "5'-DEOXY-5'-[2-(AMINO)ETHYLTHIO]ADENOSINE",
    "DXC": "(3ALPHA,5BETA,12ALPHA)-3,12-DIHYDROXYCHOLAN-24-OIC ACID",
    "RFP": "RIFAMPICIN",
    "EPH": "L-ALPHA-PHOSPHATIDYL-BETA-OLEOYL-GAMMA-PALMITOYL-PHOSPHATIDYLETHANOLAMINE",
    "AKR": "ACRYLIC ACID",
    "570": "2-(2-BENZOYL-PHENYLAMINO)-3-{4-[2-(5-METHYL-2-PHENYL-OXAZOL-4-YL)-ETHOXY]-PHENYL}-PROPIONIC ACID",
    "HAS": "HEME-AS",
    "IDR": "alpha-L-idopyranuronic acid",
    "LHG": "1,2-DIPALMITOYL-PHOSPHATIDYL-GLYCEROLE",
    "LMG": "1,2-DISTEAROYL-MONOGALACTOSYL-DIGLYCERIDE",
    "NRG": "N-OMEGA-NITRO-L-ARGININE",
    "MSO": "SELENOMETHIONINE SELENOXIDE",
    "GKR": "D-GLUCARATE",
    "KTN": "CIS-1-ACETYL-4-(4-((2-(2,4-DICHLOROPHENYL)-2-(1H-IMIDAZOL-1-YLMETHYL)-1,3-DIOXOLAN-4-YL)METHOXY)PHENYL)PIPERAZINE",
    "HCS": "2-AMINO-4-MERCAPTO-BUTYRIC ACID",
    "CLY": "CLINDAMYCIN",
    "CTY": "CLARITHROMYCIN",
    "ERY": "ERYTHROMYCIN A",
    "DGA": "DIACYL GLYCEROL",
    "ING": "D-[(AMINO)CARBONYL]PHENYLALANINE",
    "CHT": "CHOLINE ION",
    "544": "2-(1-METHYL-3-OXO-3-PHENYL-PROPYLAMINO)-3-{4-[2-(5-METHYL-2-PHENYL-OXAZOL-4-YL)-ETHOXY]-PHENYL}-PROPIONIC ACID",
    "147": "4-nitrophenyl beta-D-galactopyranoside",
    "CVI": "CRYSTAL VIOLET",
    "DEQ": "DEQUALINIUM",
    "EIC": "LINOLEIC ACID",
    "10A": "DIDECYL-DIMETHYL-AMMONIUM",
    "2AS": "(2S,3S)-3-methyl-aspartic acid",
    "UN1": "2-AMINOHEXANEDIOIC ACID",
    "DTU": "(2R,3S)-1,4-DIMERCAPTOBUTANE-2,3-DIOL",
    "ZIT": "AZITHROMYCIN",
    "5NI": "5-NITROINDAZOLE",
    "AXT": "ASTAXANTHIN",
    "RIO": "RIBOSTAMYCIN",
    "UMQ": "UNDECYL-MALTOSIDE",
    "NOG": "13-BETA-ETHYL-17-ALPHA-ETHYNYL-17-BETA-HYDROXYGON-4-EN-3-ONE",
    "YCM": "S-(2-AMINO-2-OXOETHYL)-L-CYSTEINE",
    "G3D": "GUANOSINE-3'-MONOPHOSPHATE-5'-DIPHOSPHATE",
    "AE3": "2-(2-ETHOXYETHOXY)ETHANOL",
    "JEF": "O-(O-(2-AMINOPROPYL)-O'-(2-METHOXYETHYL)POLYPROPYLENE GLYCOL 500)",
    "EDT": "{[-(BIS-CARBOXYMETHYL-AMINO)-ETHYL]-CARBOXYMETHYL-AMINO}-ACETIC ACID",
    "FEC": "1,3,5,8-TETRAMETHYL-PORPHINE-2,4,6,7-TETRAPROPIONIC ACID FERROUS COMPLEX",
    "4HY": "[4-(4-HYDROXY-3-IODO-PHENOXY)-3,5-DIIODO-PHENYL]-ACETIC ACID",
    "PAF": "PANTOATE",
    "NOC": "3-(6-AMINO-PURIN-9-YL)-5-HYDROXYMETHYL-CYCLOPENTANE-1,2-DIOL",
    "MEQ": "N5-METHYLGLUTAMINE",
    "HMU": "5-HYDROXYMETHYL URACIL",
    "965": "[3-(3-{[2-chloro-3-(trifluoromethyl)benzyl](2,2-diphenylethyl)amino}propoxy)phenyl]acetic acid",
    "NCT": "(S)-3-(1-METHYLPYRROLIDIN-2-YL)PYRIDINE",
    "VIV": "(2R)-2,5,7,8-TETRAMETHYL-2-[(4R,8R)-4,8,12-TRIMETHYLTRIDECYL]CHROMAN-6-OL",
    "DMU": "DECYL-BETA-D-MALTOPYRANOSIDE",
    "PE0": "PTERINE",
    "MTI": "3,4-DIHYDROXY-2-[(METHYLSULFANYL)METHYL]-5-(4-OXO-4,5-DIHYDRO-3H-PYRROLO[3,2-D]PYRIMIDIN-7-YL)PYRROLIDINIUM",
    "POD": "9-HYDROXY-5-(3,4,5-TRIMETHOXYPHENYL)-5,8,8A,9-TETRAHYDROFURO[3',4':6,7]NAPHTHO[2,3-D][1,3]DIOXOL-6(5AH)-ONE",
    "LUT": "(3R,3'R,6S)-4,5-DIDEHYDRO-5,6-DIHYDRO-BETA,BETA-CAROTENE-3,3'-DIOL",
    "AIN": "2-(ACETYLOXY)BENZOIC ACID",
    "FSI": "5-acetamido-3,5-dideoxy-3-fluoro-D-erythro-alpha-L-manno-non-2-ulopyranosonic acid",
    "DID": "4,4'[1,6-HEXANEDIYLBIS(OXY)]BISBENZENECARBOXIMIDAMIDE",
    "UR3": "3-METHYLURIDINE-5'-MONOPHOSHATE",
    "WBU": "5-AMINO-1H-PYRIMIDINE-2,4-DIONE",
    "YOF": "3-FLUOROTYROSINE",
    "RCO": "RESORCINOL",
    "2AN": "8-ANILINO-1-NAPHTHALENE SULFONATE",
    "PCD": "(MOLYBDOPTERIN-CYTOSINE DINUCLEOTIDE-S,S)-DIOXO-AQUA-MOLYBDENUM(V)",
    "PXG": "3-[O-PHOSPHONOPYRIDOXYL]--AMINO-BENZOIC ACID",
    "VK3": "MENADIONE",
    "NEN": "1-ETHYL-PYRROLIDINE-2,5-DIONE",
    "B3P": "2-[3-(2-HYDROXY-1,1-DIHYDROXYMETHYL-ETHYLAMINO)-PROPYLAMINO]-2-HYDROXYMETHYL-PROPANE-1,3-DIOL",
    "CCC": "CYTIDINE-5'-PHOSPHATE-2',3'-CYCLIC PHOSPHATE",
    "DIF": "2-[2,6-DICHLOROPHENYL)AMINO]BENZENEACETIC ACID",
    "BPY": "BIPHENYL-2,3-DIOL",
    "QUA": "8-HYDROXY-4-(1-HYDROXYETHYL)QUINOLINE-2-CARBOXYLIC ACID",
    "DHC": "CAFFEIC ACID",
    "NLG": "N-ACETYL-L-GLUTAMATE",
    "9DA": "9-DEAZAADENINE",
    "MQD": "2-METHYLPENTANE-1,2,4-TRIOL",
    "ANM": "ANISOMYCIN",
    "TEL": "TELITHROMYCIN",
    "LAZ": "N-(2-AMINOETHYL)-P-CHLOROBENZAMIDE",
    "RP5": "5-O-phosphono-beta-D-ribofuranose",
    "GUL": "(2R,3S,4R,5S)-2,6-difluoro-2-(hydroxymethyl)oxane-3,4,5-triol",
    "SQD": "1,2-DI-O-ACYL-3-O-[6-DEOXY-6-SULFO-ALPHA-D-GLUCOPYRANOSYL]-SN-GLYCEROL",
    "PRT": "PHOSPHORIBOSYL ATP",
    "XAT": "(3S,5R,6S,3'S,5'R,6'S)-5,6,5',6'-DIEPOXY-5,6,5',6'- TETRAHYDRO-BETA,BETA-CAROTENE-3,3'-DIOL",
    "TBE": "TAZOBACTAM INTERMEDIATE",
    "PIH": "iodobenzene",
    "SKM": "(3R,4S,5R)-3,4,5-TRIHYDROXYCYCLOHEX-1-ENE-1-CARBOXYLIC ACID",
    "2OP": "(2S)-2-HYDROXYPROPANOIC ACID",
    "SA8": "S-5'-AZAMETHIONINE-5'-DEOXYADENOSINE",
    "XED": "DEXTROFLOXACINE",
    "4AA": "4-CHLORO-3-HYDROXYANTHRANILIC ACID",
    "COU": "COUMARIN",
    "CNY": "13,15-DIAMINO-2-(AMINOMETHYL)-3,4,9,12-TETRAHYDROXYHEXADECAHYDRO-2H-7,10-EPOXYPYRANO[2,3-B][1,10,4]BENZODIOXAZACYCLODODECIN-8-YL 2,6-DIAMINO-2,6-DIDEOXYHEXOPYRANOSIDE",
    "POG": "(20S)-2,5,8,11,14,17-HEXAMETHYL-3,6,9,12,15,18-HEXAOXAHENICOSANE-1,20-DIOL",
    "5HA": "N-[(1S,2R)-1-BENZYL-3-(CYCLOPROPYLAMINO)-2-HYDROXYPROPYL]-5-[METHYL(METHYLSULFONYL)AMINO]-N'-[(1R)-1-PHENYLETHYL]ISOPHTHALAMIDE",
    "HP6": "HEPTANE",
    "NEH": "ETHANAMINE",
    "FNR": "1-DEOXY-1-(7,8-DIMETHYL-2,4-DIOXO-3,4-DIHYDRO-2H-BENZO[G]PTERIDIN-1-ID-10(5H)-YL)-5-O-PHOSPHONATO-D-RIBITOL",
    "BTI": "5-(HEXAHYDRO-2-OXO-1H-THIENO[3,4-D]IMIDAZOL-6-YL)PENTANAL",
    "4BF": "4-BROMO-L-PHENYLALANINE",
    "23D": "N2-[(1R,2S)-2-AMINOCYCLOHEXYL]-N6-(3-CHLOROPHENYL)-9-ETHYL-9H-PURINE-2,6-DIAMINE",
    "CM5": "5-CYCLOHEXYL-1-PENTYL-BETA-D-MALTOSIDE",
    "CA2": "(1S,3R,4R,5S)-1,3,4-TRIHYDROXY-5-(3-PHENOXYPROPYL)CYCLOHEXANECARBOXYLIC ACID",
    "GBL": "GAMMA-BUTYROLACTONE",
    "LG3": "PYRIMIDINE-2,4-DIAMINE",
    "HSX": "5-O-phosphono-alpha-D-ribofuranose",
    "1DO": "1-DODECANOL",
    "HDB": "(R)-1-(4-(4-(HYDROXYMETHYL)-1,3,2-DIOXABOROLAN-2-YL)BENZYL)GUANIDINE",
    "XY2": "N,N'-DIMETHYL-N-(ACETYL)-N'-(7-NITROBENZ-2-OXA-1,3-DIAZOL-4-YL)ETHYLENEDIAMINE",
    "COK": "[(2R,3S,4R,5R)-5-(6-AMINO-9H-PURIN-9-YL)-4-HYDROXY-3-(PHOSPHONOOXY)TETRAHYDROFURAN-2-YL]METHYL (3R)-3-HYDROXY-4-{[3-({2-[(2-HYDROXYETHYL)DITHIO]ETHYL}AMINO)-3-OXOPROPYL]AMINO}-2,2-DIMETHYL-4-OXOBUTYL DIHYDROGEN DIPHOSPHATE",
    "7PE": "2-(2-(2-(2-(2-(2-ETHOXYETHOXY)ETHOXY)ETHOXY)ETHOXY)ETHOXY)ETHANOL",
    "AC6": "P-HYDROXYACETOPHENONE",
    "HY3": "3-HYDROXYPROLINE",
    "GIM": "GLUCOIMIDAZOLE",
    "TOX": "1-hydroperoxy-L-tryptophan",
    "ARF": "FORMAMIDE",
    "LPD": "L-PROLINAMIDE",
    "DON": "6-DIAZENYL-5-OXO-L-NORLEUCINE",
    "2AQ": "QUINOLIN-2-AMINE",
    "RP4": "(1S,4S,5S)-1,4,5-TRIHYDROXY-3-[3-(PHENYLTHIO)PHENYL]CYCLOHEX-2-ENE-1-CARBOXYLIC ACID",
    "SBG": "O-[(S)-HYDROXY(METHYL)PHOSPHORYL]-L-SERINE",
    "H35": "N-(FURAN-2-YLMETHYL)-7H-PURIN-6-AMINE",
    "GVH": "1H-PYRROLO[2,3-B]PYRIDINE",
    "CM0": "5-(CARBOXYMETHOXY) URIDINE-5'-MONOPHOSPHATE",
    "ALE": "L-EPINEPHRINE",
    "UFT": "2'-deoxy-2'-fluorouridine 5'-(dihydrogen phosphate)",
    "EAH": "(5S,7E,9E,11Z,14Z)-5-hydroxyicosa-7,9,11,14-tetraenoic acid",
    "TCE": "3,3',3''-phosphanetriyltripropanoic acid",
    "3FD": "4-[[(2R,3S,4R,5R)-5-[6-amino-8-[(3,4-dichlorophenyl)methylamino]purin-9-yl]-3,4-dihydroxy-oxolan-2-yl]methoxymethyl]benzonitrile",
    "55V": "6-methyl-5-[3-methyl-3-(3,4,5-trimethoxyphenyl)but-1-yn-1-yl]pyrimidine-2,4-diamine",
    "FUB": "beta-L-arabinofuranose",
    "TMO": "trimethylamine oxide",
    "ZZU": "(2S,3S)-3-HYDROXYARGININE",
    "GD8": "(1R)-1,2,2-TRIMETHYLPROPYL (S)-METHYLPHOSPHINATE",
    "717": "imidazo[2,1-b][1,3]thiazol-6-ylmethanol",
    "DD9": "nonane",
    "PRQ": "(3S)-3-amino-3-(2-nitrophenyl)propanoic acid",
    "MVL": "(5R,6R,7S,8R)-5-(HYDROXYMETHYL)-5,6,7,8-TETRAHYDROIMIDAZO[1,2-A]PYRIDINE-6,7,8-TRIOL",
    "LGA": "PYRIMIDIN-2-AMINE",
    "GG2": "1-(4-METHOXYPHENYL)-7-OXO-6-[4-(2-OXOPIPERIDIN-1-YL)PHENYL]-4,5,6,7-TETRAHYDRO-1H-PYRAZOLO[3,4-C]PYRIDINE-3-CARBOXAMIDE",
    "PLQ": "1,4-benzoquinone",
    "C03": "(2R)-2-(4-CHLORO-3-{[3-(6-METHOXY-1,2-BENZISOXAZOL-3-YL)-2-METHYL-6-(TRIFLUOROMETHOXY)-1H-INDOL-1-YL]METHYL}PHENOXY)PROPANOIC ACID",
    "CYT": "6-AMINOPYRIMIDIN-2(1H)-ONE",
    "LAB": "LATRUNCULIN B",
    "SRO": "SEROTONIN",
    "LZ1": "1H-indazole",
    "TH5": "O-acetyl-L-threonine",
    "8AN": "3'-amino-3'-deoxyadenosine 5'-(dihydrogen phosphate)",
    "RIV": "5-chloro-N-({(5S)-2-oxo-3-[4-(3-oxomorpholin-4-yl)phenyl]-1,3-oxazolidin-5-yl}methyl)thiophene-2-carboxamide",
    "RSM": "(2S)-2-(acetylamino)-N-methyl-4-[(R)-methylsulfinyl]butanamide",
    "0LI": "3-(imidazo[1,2-b]pyridazin-3-ylethynyl)-4-methyl-N-{4-[(4-methylpiperazin-1-yl)methyl]-3-(trifluoromethyl)phenyl}benzam ide",
    "A8S": "(2Z,4E)-5-[(1S)-1-hydroxy-2,6,6-trimethyl-4-oxocyclohex-2-en-1-yl]-3-methylpenta-2,4-dienoic acid",
    "J0Z": "benzyl acetate",
    "VG9": "6-{[(2,5-DIETHOXYPHENYL)AMINO]METHYL}-5-METHYLPYRIDO[2,3-D]PYRIMIDINE-2,4-DIAMINE",
    "4AZ": "1,5-anhydro-D-fructose",
    "HOX": "4-amino-L-phenylalanine",
    "MP8": "(4R)-4-methyl-L-proline",
    "KTL": "(1S,2R,3R,4S)-1-{(1S)-2-[(2R,3S,4S)-3,4-dihydroxy-2-(hydroxymethyl)tetrahydrothiophenium-1-yl]-1-hydroxyethyl}-2,3,4,5-tetrahydroxypentyl sulfate",
    "VMX": "5'-{[(3-aminopropyl)sulfonyl]amino}-5'-deoxyadenosine",
    "KDN": "deamino-beta-neuraminic acid",
    "ABV": "1,3-benzothiazol-2-amine",
    "Q88": "9H-purine-6,8-diamine",
    "BYC": "benzoyl coenzyme A",
    "NKP": "(2R)-2-hydroxy-3-(phosphonooxy)propyl (9E)-octadec-9-enoate",
    "2HG": "(2R)-2-hydroxypentanedioic acid",
    "JKE": "2-sulfanylbenzoic acid",
    "TOF": "1,6-dimethylpyrimido[5,4-e][1,2,4]triazine-5,7(1H,6H)-dione",
    "Y01": "CHOLESTEROL HEMISUCCINATE",
    "HTH": "(2S,3R)-heptane-1,2,3-triol",
    "XFJ": "6,6'-(pyridine-3,5-diyldiethane-2,1-diyl)bis(4-methylpyridin-2-amine)",
    "XFN": "6,6'-[(5-aminobenzene-1,3-diyl)diethane-2,1-diyl]bis(4-methylpyridin-2-amine)",
    "3CX": "(2S)-3-(cyclohexylamino)-2-hydroxypropane-1-sulfonic acid",
    "BZX": "1,3-benzodioxol-5-ol",
    "CGE": "Clopidogrel",
    "CH0": "2-{[(3alpha,5alpha,7alpha,8alpha,10alpha,12alpha,17alpha)-3,12-bis{2-[(4-O-alpha-D-glucopyranosyl-beta-D-glucopyranosyl)oxy]ethoxy}cholan-7-yl]oxy}ethyl 4-O-alpha-D-glucopyranosyl-beta-D-glucopyranoside",
    "I2M": "3-methyl-L-alloisoleucine",
    "TIC": "ticlopidine",
    "X2W": "N-ACETYL-L-GLUTAMYL 5-PHOSPHATE",
    "8TX": "8-thioxo-3,7,8,9-tetrahydro-1H-purine-2,6-dione",
    "TCI": "(6aR,10aR)-6,6,9-trimethyl-3-pentyl-6a,7,8,10a-tetrahydro-6H-benzo[c]chromen-1-ol",
    "DZ4": "2'-deoxy-5'-O-[(R)-hydroxy{[(R)-hydroxy(phosphonooxy)phosphoryl]amino}phosphoryl]adenosine",
    "XNW": "(1R,2R,4S,5R)-1,4,5-TRIHYDROXY-2-(4-METHOXYBENZYL)-3-OXOCYCLOHEXANECARBOXYLIC ACID",
    "ES5": "quinolin-6-amine",
    "ES7": "1-methyl-1H-benzimidazol-2-amine",
    "MPV": "1-methyl-3-(thiophen-2-yl)-1H-pyrazol-5-amine",
    "3AU": "3-[(3S)-3-amino-3-carboxypropyl]uridine 5'-(dihydrogen phosphate)",
    "CVX": "O-BUTLYLMETHYLPHOSPHONIC ACID ESTER GROUP",
    "L8H": "4-methoxynaphthalen-2-amine",
    "VR": "2-METHYLPROPYL HYDROGEN (R)-METHYLPHOSPHONATE",
    "WGZ": "5-{6-[(1-METHYLPIPERIDIN-4-YL)OXY]-1H-BENZIMIDAZOL-1-YL}-3-{(1R)-1-[2-(TRIFLUOROMETHYL)PHENYL]ETHOXY}THIOPHENE-2-CARBOXAMIDE",
    "022": "3-{1-(4-carbamoyl-2-methylphenyl)-5-[4-(1H-imidazol-1-yl)phenyl]-1H-pyrrol-2-yl}propanoic acid",
    "FNU": "6-amino-5-fluorouridine 5'-(dihydrogen phosphate)",
    "IVM": "(2aE,4E,5'S,6S,6'R,7S,8E,11R,13R,15S,17aR,20R,20aR,20bS)-6'-[(2S)-butan-2-yl]-20,20b-dihydroxy-5',6,8,19-tetramethyl-17 -oxo-3',4',5',6,6',10,11,14,15,17,17a,20,20a,20b-tetradecahydro-2H,7H-spiro[11,15-methanofuro[4,3,2-pq][2,6]benzodioxacy clooctadecine-13,2'-pyran]-7-yl 2,6-dideoxy-4-O-(2,6-dideoxy-3-O-methyl-alpha-L-arabino-hexopyranosyl)-3-O-methyl-alpha-L-arabino-hexopyranoside",
    "QEL": "4-[(1R,2S)-2-(4-benzylpiperidin-1-yl)-1-hydroxypropyl]phenol",
    "QEM": "4-[(1R,2S)-3-(4-benzylpiperidin-1-yl)-1-hydroxy-2-methylpropyl]phenol",
    "RI5": "(1aR,2aR,3S,6R,6aS,8aS,8bR,9R)-2a-hydroxy-8b-methyl-9-(prop-1-en-2-yl)hexahydro-3,6-methano-1,5,7-trioxacyclopenta[ij]c yclopropa[a]azulene-4,8(3H)-dione",
    "QLR": "6-methyl-5-[(3R)-3-(3,4,5-trimethoxyphenyl)pent-1-yn-1-yl]pyrimidine-2,4-diamine",
    "PZA": "PYRAZINE-2-CARBOXAMIDE",
    "NBO": "nabumetone",
    "2M8": "(2R)-3-hydroxy-2-(alpha-D-mannopyranosyloxy)propanoic acid",
    "EVH": "1H-imidazole-2-sulfonamide",
    "NPX": "(2R)-2-(6-methoxynaphthalen-2-yl)propanoic acid",
    "0FX": "dTDP-4-amino-4,6-dideoxyglucose",
    "EPZ": "(2R)-2-{[(2R,3R,4R,5S,6R)-3-(acetylamino)-2-{[(S)-{[(R)-{[(2R,3S,4R,5R)-5-(2,4-dioxo-3,4-dihydropyrimidin-1(2H)-yl)-3,4-dihydroxytetrahydrofuran-2-yl]methoxy}(hydroxy)phosphoryl]oxy}(hydroxy)phosphoryl]oxy}-5-hydroxy-6-(hydroxymethyl)tetrahydro-2H-pyran-4-yl]oxy}propanoic acid",
    "KNA": "nonanoic acid",
    "RWZ": "N-[(2Z)-3,7-dimethylocta-2,6-dien-1-yl]-N'-[(1R,3S,5R,7R)-tricyclo[3.3.1.1~3,7~]dec-2-yl]ethane-1,2-diamine",
    "BMN": "(1R)-1,4-anhydro-2-deoxy-1-(3-methoxynaphthalen-2-yl)-5-O-phosphono-D-erythro-pentitol",
    "LHO": "2-(2-deoxy-5-O-phosphono-beta-D-erythro-pentofuranosyl)-6-methylisoquinoline-1(2H)-thione",
    "NXL": "(2S,5R)-1-formyl-5-[(sulfooxy)amino]piperidine-2-carboxamide",
    "0V5": "(2R)-2-(phosphonooxy)propanoic acid",
    "TIY": "2,3,4,6-tetrahydroxy-5H-benzo[7]annulen-5-one",
    "0R0": "2-hydroxybenzonitrile",
    "13X": "benzene-1,3,5-triol",
    "0TD": "(3S)-3-(methylsulfanyl)-L-aspartic acid",
    "T1C": "TIGECYCLINE",
    "ZAE": "N-methyl-D-phenylalanine",
    "PRD_000204": "Vancomycin",
    "HFG": "7-bromo-6-chloro-3-{3-[(2R,3S)-3-hydroxypiperidin-2-yl]-2-oxopropyl}quinazolin-4(3H)-one",
    "FBB": "6-fluoro-1,3-benzothiazol-2-amine",
    "78M": "(2S)-2,3-DIHYDROXYPROPYL(7Z)-PENTADEC-7-ENOATE",
    "SYN": "ethenylbenzene",
    "TD6": "(4S)-4-{3-[(4-amino-2-methylpyrimidin-5-yl)methyl]-5-(2-{[(S)-hydroxy(phosphonooxy)phosphoryl]oxy}ethyl)-4-methyl-1,3lambda~5~-thiazol-2-yl}-4-hydroxybutanoic acid",
    "QD0": "(2S,4R)-N-methyl-1-[2-(3-methyl-1,2-oxazol-5-yl)ethanoyl]-4-oxidanyl-pyrrolidine-2-carboxamide",
    "1VU": "propionyl Coenzyme A",
    "O4B": "1,4,7,10,13,16-HEXAOXACYCLOOCTADECANE",
    "PYJ": "PHENYLETHANE",
    "2YR": "2'-deoxy-N-(2-sulfanylethyl)cytidine 5'-(dihydrogen phosphate)",
    "U2X": "O-(cyclohexylmethyl)-L-tyrosine",
    "KRS": "cladosporin",
    "CM5": "5-CYCLOHEXYL-1-PENTYL-BETA-D-MALTOSIDE",
    "BTI": "5-(HEXAHYDRO-2-OXO-1H-THIENO[3,4-D]IMIDAZOL-6-YL)PENTANAL",
    "4BF": "4-BROMO-L-PHENYLALANINE",
    "OSM": "1-(OXIDOSULFANYL)METHANAMINE",
    "HSX": "5-O-phosphono-alpha-D-ribofuranose",
    "BE7": "(4-CARBOXYPHENYL)(CHLORO)MERCURY",
    "MIY": "(4S,4AS,5AR,12AS)-4,7-BIS(DIMETHYLAMINO)-3,10,12,12A-TETRAHYDROXY-1,11-DIOXO-1,4,4A,5,5A,6,11,12A-OCTAHYDROTETRACENE-2- CARBOXAMIDE",
    "AM2": "APRAMYCIN",
    "SVR": "SER8,8'-[CARBONYLBIS[IMINO-3,1-PHENYLENECARBONYLIMINO(4-METHYL-3,1-PHENYLENE)CARBONYLIMINO]]BIS-1,3,5-NAPHTHALENETRISULFON IC ACIDOTONIN",
    "R16": "HEXADECANE",
    "CM0": "5-(CARBOXYMETHOXY) URIDINE-5'-MONOPHOSPHATE",
    "LAB": "LATRUNCULIN B",
    "PIT": "PICEATANNOL",
    "HCY": "(11alpha,14beta)-11,17,21-trihydroxypregn-4-ene-3,20-dione",
    "CAU": "(2S)-1-(9H-Carbazol-4-yloxy)-3-(isopropylamino)propan-2-ol",
    "NPS": "(2S)-2-(6-methoxynaphthalen-2-yl)propanoic acid",
    "B3Q": "(3S)-3,6-diamino-6-oxohexanoic acid",
    "BIL": "(3R,4S)-3-amino-4-methylhexanoic acid",
    "XCP": "(1S,2S)-2-aminocyclopentanecarboxylic acid",
    "4OC": "4N,O2'-METHYLCYTIDINE-5'-MONOPHOSPHATE",
    "B3A": "(3S)-3-AMINOBUTANOIC ACID",
    "B3D": "3-AMINOPENTANEDIOIC ACID",
    "B3E": "(3S)-3-AMINOHEXANEDIOIC ACID",
    "6MZ": "N6-METHYLADENOSINE-5'-MONOPHOSPHATE",
    "8AN": "3'-amino-3'-deoxyadenosine 5'-(dihydrogen phosphate)",
    "53H": "5'-O-[(3-methyl-D-valyl)sulfamoyl]adenosine",
    "GW9": "2-chloro-5-nitro-N-phenylbenzamide",
    "NGC": "N-glycolyl-alpha-neuraminic acid",
    "NYH": "1-nitrocyclohexene",
    "TH8": "THIAMPHENICOL",
    "XAX": "{[(5aR,8R,9aR)-2-amino-4-oxo-6,7-di(sulfanyl-kappaS)-3,5,5a,8,9a,10-hexahydro-4H-pyrano[3,2-g]pteridin-8-yl]methyl dihydrogenato(2-) phosphate}(hydroxy)oxo(thioxo)molybdenum",
    "CBW": "(3BETA,5BETA,14BETA)-3-HYDROXY-11-OXOOLEAN-12-EN-29-OIC ACID",
    "TCE": "3,3',3''-phosphanetriyltripropanoic acid",
    "HBX": "benzaldehyde",
    "3AB": "3-aminobenzamide",
    "LO2": "2-{4-[butyl(3-chloro-4,5-dimethoxybenzyl)amino]phenyl}-1,1,1,3,3,3-hexafluoropropan-2-ol",
    "25D": "2-cyclohexyl-N-[(3-{[(2,4,6-trimethylphenyl)carbamoyl]amino}naphthalen-2-yl)carbonyl]-D-alanine",
    "YTT": "(3S,6S)-3,6-bis(4-hydroxybenzyl)piperazine-2,5-dione",
    "LPX": "(2S)-3-{[(R)-(2-aminoethoxy)(hydroxy)phosphoryl]oxy}-2-hydroxypropyl hexadecanoate",
    "B1T": "2,2'-sulfanediylbis(4,6-dichlorophenol)",
    "FIT": "(4aR,4bS,6aS,7S,9aS,9bS,11aR)-N-tert-butyl-4a,6a-dimethyl-2-oxo-2,4a,4b,5,6,6a,7,8,9,9a,9b,10,11,11a-tetradecahydro-1H-indeno[5,4-f]quinoline-7-carboxamide",
    "H3P": "2,2'-methanediylbis(3,4,6-trichlorophenol)  ",
    "5OH": "(2S)-amino[(4R,6S)-2-amino-6-hydroxy-3,4,5,6-tetrahydropyrimidin-4-yl]ethanoic acid",
    "KBE": "beta-lysine",
    "DPV": "dodecyl 2-(trimethylammonio)ethyl phosphate",
    "Y01": "CHOLESTEROL HEMISUCCINATE",
    "A8S": "(2Z,4E)-5-[(1S)-1-hydroxy-2,6,6-trimethyl-4-oxocyclohex-2-en-1-yl]-3-methylpenta-2,4-dienoic acid",
    "HAO": "{[3-(hydrazinocarbonyl)-4-methoxyphenyl]amino}(oxo)acetic acid",
    "0TD": "(3S)-3-(methylsulfanyl)-L-aspartic acid",
    "PRD_000204": "Vancomycin",
    "PRD_000226": "Viomycin",
    "9CR": "(9cis)-retinoic acid",
    "1N7": "CHAPSO",
    "1VU": "propionyl Coenzyme A",
    "PRD_000150": "GRAMICIDIN A",
    "31H": "3'-deoxy-3'-[(N-formyl-L-methionyl)amino]adenosine 5'-(dihydrogen phosphate)",
    "DI8": "(3S)-1,2,3,4-tetrahydroisoquinoline-3-carboxylic acid",
    "4D4": "(2S,3R)-2-azanyl-5-carbamimidamido-3-oxidanyl-pentanoic acid",
    "CL0": "CHLOROPHYLL A ISOMER",
    "ZEX": "(1R,2S)-4-{(1E,3E,5E,7E,9E,11E,13E,15E,17E)-18-[(4S)-4-hydroxy-2,6,6-trimethylcyclohex-1-en-1-yl]-3,7,12,16-tetramethyloctadeca-1,3,5,7,9,11,13,15,17-nonaen-1-yl}-2,5,5-trimethylcyclohex-3-en-1-ol",
    "45D": "beta,beta-carotene-4,4'-dione",
    "D2T": "(3R)-3-(methylsulfanyl)-L-aspartic acid",
    "AJP": "Digitonin",
    "B8T": "4-methyl, cytidine-5'-monophosphate",
    "PRD_900001": "alpha-maltose",
    "PRD_900003": "sucrose",
    "PRD_900007": "alpha-acarbose",
    "PRD_900067": "3'-sialyl-N-acetyllactosamine",
    "MS6": "(2S)-2-amino-4-(methylsulfanyl)butane-1-thiol",
    "DD6": "(3S,3'R,5R,6S,7cis)-7',8'-didehydro-5,6-dihydro-5,6-epoxy-beta,beta-carotene-3,3'-diol",
    "KC1": "Chlorophyll c1",
    "KC2": "Chlorophyll c2",
    "4WI": "(1R,2S,5S)-N-{(1E,2S)-1-imino-3-[(3S)-2-oxopyrrolidin-3-yl]propan-2-yl}-6,6-dimethyl-3-[3-methyl-N-(trifluoroacetyl)-L-valyl]-3-azabicyclo[3.1.0]hexane-2-carboxamide",
    "CCN": "ACETONITRILE",
    "OXM": "OXAMIC ACID",
    "IMD": "IMIDAZOLE",
    "TRS": "2-AMINO-2-HYDROXYMETHYL-PROPANE-1,3-DIOL",
    "SEP": "PHOSPHOSERINE",
    "IPA": "ISOPROPYL ALCOHOL",
    "GOL": "GLYCEROL",
    "GSH": "GLUTATHIONE",
    "NDG": "2-acetamido-2-deoxy-alpha-D-glucopyranose",
    "CRS": "M-CRESOL",
    "DGL": "D-GLUTAMIC ACID",
    "GTP": "GUANOSINE-5'-TRIPHOSPHATE",
    "OLA": "OLEIC ACID",
    "CRT": "SPIRILLOXANTHIN",
    "PQQ": "PYRROLOQUINOLINE QUINONE",
    "G3H": "GLYCERALDEHYDE-3-PHOSPHATE",
    "NEP": "N1-PHOSPHONOHISTIDINE",
    "HED": "2-HYDROXYETHYL DISULFIDE",
    "KCX": "LYSINE NZ-CARBOXYLIC ACID",
    "PAM": "PALMITOLEIC ACID",
    "THM": "THYMIDINE",
    "TAM": "TRIS(HYDROXYETHYL)AMINOMETHANE",
    "IMN": "INDOMETHACIN",
    "GMP": "GUANOSINE",
    "AYA": "N-ACETYLALANINE",
    "CNA": "CARBA-NICOTINAMIDE-ADENINE-DINUCLEOTIDE",
    "MEA": "N-METHYLPHENYLALANINE",
    "PEE": "1,2-dioleoyl-sn-glycero-3-phosphoethanolamine",
    "UQ2": "UBIQUINONE-2",
    "NCN": "NICOTINATE MONONUCLEOTIDE",
    "CXS": "3-CYCLOHEXYL-1-PROPYLSULFONIC ACID",
    "BU1": "1,4-BUTANEDIOL",
    "LDP": "L-DOPAMINE",
    "LNR": "L-NOREPINEPHRINE",
    "MTA": "5'-DEOXY-5'-METHYLTHIOADENOSINE",
    "IYR": "3-IODO-TYROSINE",
    "LMT": "DODECYL-BETA-D-MALTOSIDE",
    "PCR": "P-CRESOL",
    "CDL": "CARDIOLIPIN",
    "PG4": "TETRAETHYLENE GLYCOL",
    "CHD": "CHOLIC ACID",
    "PEF": "DI-PALMITOYL-3-SN-PHOSPHATIDYLETHANOLAMINE",
    "GTA": "P1-7-METHYLGUANOSINE-P3-ADENOSINE-5',5'-TRIPHOSPHATE",
    "CYZ": "CYCLOTHIAZIDE",
    "X8Z": "L-CAPTOPRIL",
    "P33": "3,6,9,12,15,18-HEXAOXAICOSANE-1,20-DIOL",
    "M77": "5-(1,4-DIAZEPAN-1-SULFONYL)ISOQUINOLINE",
    "SOG": "octyl 1-thio-beta-D-glucopyranoside",
    "CTN": "4-AMINO-1-BETA-D-RIBOFURANOSYL-2(1H)-PYRIMIDINONE",
    "PCW": "1,2-DIOLEOYL-SN-GLYCERO-3-PHOSPHOCHOLINE",
    "G2P": "PHOSPHOMETHYLPHOSPHONIC ACID GUANYLATE ESTER",
    "P4G": "1-ETHOXY-2-(2-ETHOXYETHOXY)ETHANE",
    "PEV": "(1S)-2-{[(2-AMINOETHOXY)(HYDROXY)PHOSPHORYL]OXY}-1-[(PALMITOYLOXY)METHYL]ETHYL STEARATE",
    "PGW": "(1R)-2-{[(S)-{[(2S)-2,3-dihydroxypropyl]oxy}(hydroxy)phosphoryl]oxy}-1-[(hexadecanoyloxy)methyl]ethyl (9Z)-octadec-9-enoate",
    "ZMP": "S-[2-({N-[(2S)-2-hydroxy-3,3-dimethyl-4-(phosphonooxy)butanoyl]-beta-alanyl}amino)ethyl] tetradecanethioate",
    "H56": "(3~{R},4~{S})-1-(4-fluorophenyl)-3-[(3~{S})-3-(4-fluorophenyl)-3-oxidanyl-propyl]-4-(4-hydroxyphenyl)azetidin-2-one",
    "HEZ": "HEXANE-1,6-DIOL",
    "HSM": "HISTAMINE",
    "HT3": "(2R,3S)-heptane-1,2,3-triol",
    "I7Y": "(2R)-2-(methoxymethyl)-4-{[(25R)-spirost-5-en-3beta-yl]oxy}butyl 4-O-alpha-D-glucopyranosyl-beta-D-glucopyranoside",
    "I7D": "(6~{E},8~{E},10~{E},12~{E},14~{E},16~{E},18~{E},20~{E},22~{E},24~{E},26~{E},28~{E})-2,31-dimethoxy-2,6,10,14,19,23,27,31-octamethyl-dotriaconta-6,8,10,12,14,16,18,20,22,24,26,28-dodecaen-5-one",
    "IVA": "ISOVALERIC ACID",
    "IX8": "7-methyl-N-[(2R)-1-phenoxypropan-2-yl]-3-(4-propan-2-ylphenyl)pyrazolo[1,5-a]pyrimidine-6-carboxamide",
    "K1S": "N,N-diethyl-5-methyl[1,2,4]triazolo[1,5-a]pyrimidin-7-amine",
    "K86": "4-tert-butyl-N-[6-(2-hydroxyethyloxy)-5-(2-methoxyphenoxy)-2-pyrimidin-2-yl-pyrimidin-4-yl]benzenesulfonamide",
    "LSN": "[2-butyl-5-chloranyl-3-[[4-[2-(2H-1,2,3,4-tetrazol-5-yl)phenyl]phenyl]methyl]imidazol-4-yl]methanol",
    "MAH": "3-HYDROXY-3-METHYL-GLUTARIC ACID",
    "12M": "(2-ETHYLPHENYL)METHANOL",
    "BBI": "(2-butyl-1-benzofuran-3-yl){4-[2-(diethylamino)ethoxy]-3,5-diiodophenyl}methanone",
    "COH": "PROTOPORPHYRIN IX CONTAINING CO",
    "F2T": "2'-deoxy-2'-fluoro-5'-O-thiophosphonouridine",
    "FBI": "7-[4-(4-FLUORO-PHENYL)-6-ISOPROPYL-2-(METHANESULFONYL-METHYL-AMINO)-PYRIMIDIN-5-YL] -3,5-DIHYDROXY-HEPTANOIC ACID",
    "FME": "N-FORMYLMETHIONINE",
    "FUN": "5-(AMINOSULFONYL)-4-CHLORO-2-[(2-FURYLMETHYL)AMINO]BENZOIC ACID",
    "GAL": "beta-D-galactopyranose",
    "GDP": "GUANOSINE-5'-DIPHOSPHATE",
    "GP7": "(1R)-2-{[(S)-(2-aminoethoxy)(hydroxy)phosphoryl]oxy}-1-[(pentadecanoyloxy)methyl]ethyl (12E)-hexadeca-9,12-dienoate",
    "P6G": "HEXAETHYLENE GLYCOL",
    "P7F": "(2~{R})-2-[3-[[1,3-benzoxazol-2-yl-[3-(4-methoxyphenoxy)propyl]amino]methyl]phenoxy]butanoic acid",
    "PCA": "PYROGLUTAMIC ACID",
    "PCF": "1,2-DIACYL-SN-GLYCERO-3-PHOSHOCHOLINE",
    "PDF": "4,4-difluoro-L-proline",
    "PDO": "1,3-PROPANDIOL",
    "PEM": "2-[P-[2-P-CHLOROBENZAMIDO)ETHYL]PHENOXY]-2-METHYLPROPIONIC ACID",
    "MLA": "MALONIC ACID",
    "MNR": "PROTOPORPHYRIN IX CONTAINING MN",
    "MOH": "METHANOL",
    "NIO": "NICOTINIC ACID",
    "NIM": "4-NITRO-2-PHENOXYMETHANESULFONANILIDE",
    "NRH": "Norharmane",
    "NRO": "3-[5-(2-nitropent-1-en-1-yl)furan-2-yl]benzoic acid",
    "NVA": "NORVALINE",
    "OBN": "OUABAIN",
    "OCT": "N-OCTANE",
    "OGA": "N-OXALYLGLYCINE",
    "OLC": "(2R)-2,3-dihydroxypropyl (9Z)-octadec-9-enoate",
    "ORO": "OROTIC ACID",
    "P1Z": "4-BUTYL-1,2-DIPHENYL-PYRAZOLIDINE-3,5-DIONE",
    "ME2": "1-ETHOXY-2-(2-METHOXYETHOXY)ETHANE",
    "MCN": "PTERIN CYTOSINE DINUCLEOTIDE",
    "MCM": "7-AMINO-4-METHYL-CHROMEN-2-ONE",
    "MC3": "1,2-DIMYRISTOYL-RAC-GLYCERO-3-PHOSPHOCHOLINE",
    "PTR": "O-PHOSPHOTYROSINE",
    "PX2": "1,2-DILAUROYL-SN-GLYCERO-3-PHOSPHATE",
    "PX9": "Pramocaine",
    "RBZ": "ALPHA-RIBAZOLE-5'-PHOSPHATE",
    "S12": "O-[(S)-hydroxy{[(2S)-2-hydroxy-3-(octadec-9-enoyloxy)propyl]oxy}phosphoryl]-L-serine",
    "SAC": "N-ACETYL-SERINE",
    "PLC": "DIUNDECYL PHOSPHATIDYL CHOLINE",
    "PLM": "PALMITIC ACID",
    "PLP": "PYRIDOXAL-5'-PHOSPHATE",
    "POV": "(2S)-3-(hexadecanoyloxy)-2-[(9Z)-octadec-9-enoyloxy]propyl 2-(trimethylammonio)ethyl phosphate",
    "PPV": "PYROPHOSPHATE",
    "PRD_000557": "Pepstatin",
    "PRD_900017": "triacetyl-beta-chitotriose",
    "PRP": "1-O-pyrophosphono-5-O-phosphono-alpha-D-ribofuranose",
    "PRS": "THIOPROLINE",
    "PHD": "ASPARTYL PHOSPHATE",
    "TIM": "(2S)-1-(tert-butylamino)-3-[(4-morpholin-4-yl-1,2,5-thiadiazol-3-yl)oxy]propan-2-ol",
    "TPQ": "5-(2-CARBOXY-2-AMINOETHYL)-2-HYDROXY-1,4-BENZOQUINONE",
    "TTP": "THYMIDINE-5'-TRIPHOSPHATE",
    "TYI": "3,5-DIIODOTYROSINE",
    "U3P": "3'-URIDINEMONOPHOSPHATE",
    "UQ2": "UBIQUINONE-1",
    "UQ6": "5-(3,7,11,15,19,23-HEXAMETHYL-TETRACOSA-2,6,10,14,18,22-HEXAENYL)-2,3-DIMETHOXY-6-METHYL-BENZENE-1,4-DIOL",
    "UQ9": "Ubiquinone-9",
    "SFE": "(3S)-3-amino-3-phenylpropanoic acid",
    "SNL": "SPIRONOLACTONE",
    "SP2": "3,4-DIHYDROSPHEROIDENE",
    "SEC": "SELENOCYSTEINE",
    "STA": "STATINE",
    "T7X": "Phosphatidylinositol",
    "YHR": "reserpine",
    "YNM": "N-METHYL-L-TYROSINE",
    "1MG": "1N-METHYLGUANOSINE-5'-MONOPHOSPHATE",
    "1PE": "PENTAETHYLENE GLYCOL",
    "1UN": "2-[2-HYDROXY-3-(3-HYDROXY-2-METHYL-BENZOYLAMINO)-4-PHENYL SULFANYL-BUTYL]-DECAHYDRO-ISOQUINOLINE-3-CARBOXYLIC ACID TERT-BUTYLAMIDE",
    "2MG": "2N-METHYLGUANOSINE-5'-MONOPHOSPHATE",
    "2MU": "2',5-DIMETHYLURIDINE-5'-MONOPHOSPHATE",
    "3DR": "1',2'-DIDEOXYRIBOFURANOSE-5'-PHOSPHATE",
    "478": "{3-[(4-AMINO-BENZENESULFONYL)-ISOBUTYL-AMINO]-1-BENZYL-2-HYDROXY-PROPYL}-CARBAMIC ACID TETRAHYDRO-FURAN-3-YL ESTER",
    "4AC": "N(4)-ACETYLCYTIDINE-5'-MONOPHOSPHATE",
    "4D6": "Vaborbactam",
    "4J6": "(4R,5S)-5-[(2S,3R)-3-hydroxy-1-oxobutan-2-yl]-4-methyl-3-({(3S,5S)-5-[(sulfamoylamino)methyl]pyrrolidin-3-yl}sulfanyl)-4,5-dihydro-1H-pyrrole-2-carboxylic acid",
    "2MA": "2-METHYLADENOSINE-5'-MONOPHOSPHATE",
    "4SU": "4-THIOURIDINE-5'-MONOPHOSPHATE",
    "5BU": "5-BROMO-URIDINE-5'-MONOPHOSPHATE",
    "7YY": "6-[(6-chloranyl-2-methyl-indazol-5-yl)amino]-3-[(1-methyl-1,2,4-triazol-3-yl)methyl]-1-[[2,4,5-tris(fluoranyl)phenyl]methyl]-1,3,5-triazine-2,4-dione",
    "AB1": "N-{1-BENZYL-4-[2-(2,6-DIMETHYL-PHENOXY)-ACETYLAMINO]-3-HYDROXY-5-PHENYL-PENTYL}-3-METHYL-2-(2-OXO-TETRAHYDRO-PYRIMIDIN-1-YL)-BUTYRAMIDE",
    "ACP": "PHOSPHOMETHYLPHOSPHONIC ACID ADENYLATE ESTER",
    "ACT": "ACETATE ION",
    "AMP": "ADENOSINE MONOPHOSPHATE",
    "AR6": "[(2R,3S,4R,5R)-5-(6-AMINOPURIN-9-YL)-3,4-DIHYDROXY-OXOLAN-2-YL]METHYL[HYDROXY-[[(2R,3S,4R,5S)-3,4,5-TRIHYDROXYOXOLAN-2-YL]METHOXY]PHOSPHORYL] HYDROGEN PHOSPHATE",
    "ASN": "ASPARAGINE",
    "ATM": "3'-AZIDO-3'-DEOXYTHYMIDINE-5'-MONOPHOSPHATE",
    "BCT": "BICARBONATE ION",
    "BCZ": "3-(1-ACETYLAMINO-2-ETHYL-BUTYL)-4-GUANIDINO-2-HYDROXY-CYCLOPENTANECARBOXYLIC ACID",
    "BE7": "(4-CARBOXYPHENYL)(CHLORO)MERCURY",
    "4UH": "Amphotericin B",
    "BMA": "beta-D-mannopyranose",
    "BME": "BETA-MERCAPTOETHANOL",
    "BML": "4-BROMOPHENOL",
    "C14": "TETRADECANE",
    "CLM": "CHLORAMPHENICOL",
    "CLR": "CHOLESTEROL",
    "CPS": "3-[(3-CHOLAMIDOPROPYL)DIMETHYLAMMONIO]-1-PROPANESULFONATE",
    "CSS": "S-MERCAPTOCYSTEINE",
    "D2T": "(3R)-3-(methylsulfanyl)-L-aspartic acid",
    "DDQ": "DECYLAMINE-N,N-DIMETHYL-N-OXIDE",
    "DTT": "2,3-DIHYDROXY-1,4-DITHIOBUTANE",
    "ETE": "2-{2-[2-2-(METHOXY-ETHOXY)-ETHOXY]-ETHOXY}-ETHANOL",
    "FMT": "FORMIC ACID",
    "G47": "N2-ETHANETHIOL-2'-DEOXY-GUANOSINE-5'-MONOPHOSPHATE",
    "IHP": "INOSITOL HEXAKISPHOSPHATE",
    "KAN": "KANAMYCIN A",
    "LBN": "1-palmitoyl-2-oleoyl-sn-glycero-3-phosphocholine",
    "LFX": "(3S)-9-fluoro-3-methyl-10-(4-methylpiperazin-1-yl)-7-oxo-2,3-dihydro-7H-[1,4]oxazino[2,3,4-ij]quinoline-6-carboxylic acid",
    "LMR": "(2S)-2-hydroxybutanedioic acid",
    "LMU": "DODECYL-ALPHA-D-MALTOSIDE",
    "M2G": "N2-DIMETHYLGUANOSINE-5'-MONOPHOSPHATE",
    "MA4": "CYCLOHEXYL-HEXYL-BETA-D-MALTOSIDE",
    "MPD": "(4S)-2-METHYL-2,4-PENTANEDIOL",
    "MRG": "N2-(3-MERCAPTOPROPYL)-2'-DEOXYGUANOSINE-5'-MONOPHOSPHATE",
    "N": "ANY 5'-MONOPHOSPHATE NUCLEOTIDE",
    "NAG": "2-acetamido-2-deoxy-beta-D-glucopyranose",
    "NAP": "NADP NICOTINAMIDE-ADENINE-DINUCLEOTIDE PHOSPHATE",
    "NMY": "NEOMYCIN",
    "NVP": "11-CYCLOPROPYL-5,11-DIHYDRO-4-METHYL-6H-DIPYRIDO[3,2-B:2',3'-E][1,4]DIAZEPIN-6-ONE",
    "O31": "Glecaprevir",
    "OMU": "O2'-METHYLURIDINE 5'-MONOPHOSPHATE",
    "PAR": "PAROMOMYCIN",
    "PEG": "DI(HYDROXYETHYL)ETHER",
    "PG6": "1-(2-METHOXY-ETHOXY)-2-{2-[2-(2-METHOXY-ETHOXY]-ETHOXY}-ETHANE",
    "QUE": "3,5,7,3',4'-PENTAHYDROXYFLAVONE",
    "SRY": "STREPTOMYCIN",
    "SUE": "(1aR,5S,8S,10R,22aR)-5-tert-butyl-N-{(1R,2S)-1-[(cyclopropylsulfonyl)carbamoyl]-2-ethenylcyclopropyl}-14-methoxy-3,6-di oxo-1,1a,3,4,5,6,9,10,18,19,20,21,22,22a-tetradecahydro-8H-7,10-methanocyclopropa[18,19][1,10,3,6]dioxadiazacyclononadec ino[11,12-b]quinoxaline-8-carboxamide",
    "T27": "4-{[4-({4-[(E)-2-cyanoethenyl]-2,6-dimethylphenyl}amino)pyrimidin-2-yl]amino}benzonitrile",
    "TLA": "L(+)-TARTARIC ACID",
    "TOP": "TRIMETHOPRIM",
    "RSQ": "5-formylcytidine 5'-(dihydrogen phosphate)",
    "SCM": "SPECTINOMYCIN",
    "SPD": "SPERMIDINE",
    "TPF": "2-(2,4-DIFLUOROPHENYL)-1,3-DI(1H-1,2,4-TRIAZOL-1-YL)PROPAN-2-OL",
    "TSL": "TRANS-ENAMINE INTERMEDIATE OF SULBACTAM",
    "U8U": "5-METHYLAMINOMETHYL-2-THIOURIDINE-5'-MONOPHOSPHATE",
    "UAL": "(2Z)-2-amino-3-(carbamoylamino)prop-2-enoic acid",
    "UMP": "2'-DEOXYURIDINE 5'-MONOPHOSPHATE",
    "UMS": "2'-METHYLSELENYL-2'-DEOXYURIDINE-5'-PHOSPHATE",
    "UNK": "UNKNOWN",
    "VIB": "PA3-(4-AMINO-2-METHYL-PYRIMIDIN-5-YLMETHYL)-5-(2-HYDROXY-ETHYL)-4-METHYL-THIAZOL-3-IUMROMOMYCIN",
    "VIR": "VIRGINIAMYCIN M1",
    "VOR": "Voriconazole",
    "XPE": "3,6,9,12,15,18,21,24,27-NONAOXANONACOSANE-1,29-DIOL",
    "YYG": "4-(3-[5-O-PHOSPHONORIBOFURANOSYL]-4,6-DIMETHYL-8-OXO-4,8-DIHYDRO-3H-1,3,4,5,7A-PENTAAZA-S-INDACEN-YLAMINO-BUTYRIC ACID METHYL ESTER",
    "ZMR": "ZANAMIVIR",
    "O8J": "(3R,4R,5E,10E,12E,14S,26aR)-14-hydroxy-4,12-dimethyl-3-(propan-2-yl)-8,9,14,15,24,25,26,26a-octahydro-1H,3H,22H-21,18-(azeno)pyrrolo[2,1-c][1,8,4,19]dioxadiazacyclotetracosine-1,7,16,22(4H,17H)-tetrone",
    "O8D": "(3R,4R,5E,10E,12E,14S,26aR)-14-hydroxy-12-methyl-3-(propan-2-yl)-4-(prop-2-en-1-yl)-8,9,14,15,24,25,26,26a-octahydro-1H,3H,22H-21,18-(azeno)pyrrolo[2,1-c][1,8,4,19]dioxadiazacyclotetracosine-1,7,16,22(4H,17H)-tetrone",
    "VIF": "Flopristin",
    "H8T": "Virginiamycin M",
    "M2D": "Madumycin II",
    "00S": "4-(aminomethyl)benzenecarboximidamide",
    "017": "(3R,3AS,6AR)-HEXAHYDROFURO[2,3-B]FURAN-3-YL(1S,2R)-3-[[(4-AMINOPHENYL)SULFONYL](ISOBUTYL)AMINO]-1-BENZYL-2-HYDROXYPROPYLCARBAMATE",
    "02A": "(2S)-azetidine-2-carboxylic acid",
    "065": "(3R,3AS,6AR)-HEXAHYDROFURO[2,3-B]FURAN-3-YL(2S,3R)-3-HYDROXY-4-(N-ISOBUTYLBENZO[D][1,3]DIOXOLE-5-SULFONAMIDO)-1-PHENYLBUTAN-2-YLCARBAMATE"

}

AA_CODES = {
    "ALA","ARG","ASN","ASP","CYS","GLN","GLU","GLY","HIS","ILE","LEU",
    "LYS","MET","PHE","PRO","SER","THR","TRP","TYR","VAL","SEC","PYL","MSE"
}
# Ровно те, что нужны в этой выборке
TARGET_LIGANDS = LIGANDS

SEARCH_PARAMS = {
    "base_url": "https://search.rcsb.org/rcsbsearch/v2/query",
    "timeout": 30,
    "max_retries": 3
}



def _chain_label(res):              # в пдб цепи помечают через segid, chainID
    seg = (getattr(res, "segid", "") or "").strip()
    if seg:
        return seg
    try:
        chs = [c for c in set(str(c).strip() for c in set(res.atoms.chainIDs)) if c]
        if chs:
            return chs[0]
    except Exception:
        pass
    return ""  # если совсем нет метки, оставляем пусто

def _label_res(res):
    return f"{res.resname.upper()}{int(res.resid)}{_chain_label(res)}" # делаем удобную для чтения GLU81A



def ligand_centroid_from_ag(ag):
    try:
        heavy = ag.select_atoms("not element H")
        if heavy.n_atoms:
            return heavy.positions.mean(axis=0)
    except Exception:
        pass
    return ag.positions.mean(axis=0) if ag.n_atoms else np.array([0.0, 0.0, 0.0])


def res_representative_coord(res):
    try:
        ca = res.atoms.select_atoms("name CA")
        if ca.n_atoms:
            return ca.positions[0].copy()
    except Exception:
        pass
    try:
        heavy = res.atoms.select_atoms("not element H")
        if heavy.n_atoms:
            return heavy.positions.mean(axis=0).copy()
    except Exception:
        pass
    return res.atoms.positions.mean(axis=0).copy()


def greedy_route_from_coords(coords, start_idx=0):
    n = len(coords)
    if n == 0:
        return []
    order = [int(start_idx)]
    unvis = set(range(n))
    unvis.remove(int(start_idx))
    while unvis:
        last = order[-1]
        nxt = min(unvis, key=lambda j: np.linalg.norm(coords[last] - coords[j]))
        order.append(nxt)
        unvis.remove(nxt)
    return order

#  СКАЧИВАНИЕ СТРУКТУР 

def _local_structure_path(pdb_id: str) -> str | None:
    pdb_id = str(pdb_id).upper().strip()
    # Сначала PDB/MMTF
    for ext in PREFERRED_FORMATS:
        p = PDB_DIR / f"{pdb_id}.{ext}"
        if p.exists() and p.stat().st_size > 0:
            return str(p)
    if ALLOW_CIF_FALLBACK:
        p = PDB_DIR / f"{pdb_id}.cif"
        if p.exists() and p.stat().st_size > 0:
            return str(p)
    return None

def download_structure(pdb_id):
    pdb_id = str(pdb_id).upper().strip()

    # КЭШ
    local = _local_structure_path(pdb_id)
    if local:
        return local
    
    base_url = "https://files.rcsb.org/download/"
    exts = [
        (f"{pdb_id}.pdb.gz",  "pdb"),
        (f"{pdb_id}.pdb",     "pdb"),
        (f"{pdb_id}.mmtf.gz", "mmtf"),
        (f"{pdb_id}.mmtf",    "mmtf"),
]
    if ALLOW_CIF_FALLBACK:
        exts += [
        (f"{pdb_id}.cif.gz", "cif"),
        (f"{pdb_id}.cif",    "cif"),
    ]

    for ext, fmt in exts:
        url = base_url + ext
        try:
            head = SESSION.head(url, timeout=20)
        except Exception:
            continue
        if head.status_code == 200:
            try:
                resp = SESSION.get(url, stream=True, timeout=60)
                resp.raise_for_status()
            except Exception:
                continue
            temp_path = PDB_DIR / ext
            final_path = PDB_DIR / f"{pdb_id}.{fmt}"
            write_path = temp_path if ext.endswith(".gz") else final_path

            with open(write_path, "wb") as f:
                for chunk in resp.iter_content(chunk_size=8192):
                    if chunk:
                        f.write(chunk)

            if ext.endswith(".gz"):
                with gzip.open(temp_path, "rb") as f_in, open(final_path, "wb") as f_out:
                    shutil.copyfileobj(f_in, f_out)
                os.remove(temp_path)

            return str(final_path)

    return _local_structure_path(pdb_id)




# АНАЛИЗ 

def _select_ligand_atoms(u, lig_id, pdb_path):
    lig_id = (lig_id or "").strip().upper()
    parts = []
    for r in u.residues:
        name = (r.resname or "").strip().upper()
        if name != lig_id:
            continue
        if _is_nonpolymeric_ligand(r) or _is_aa_het_ligand(r, pdb_path):
            parts.append(r.atoms)
    if not parts:
        return u.atoms[[]]  
    ag = parts[0]
    for p in parts[1:]:
        ag = ag + p          
    return ag

def analyze_structure(u, pdb_id, path, ligand_ids, radius=4.0):
    try:
        protein_name = "Unknown"
        try:
            if str(path).endswith(".pdb"):
                with open(path, "r", encoding="utf-8", errors="ignore") as f:
                    for line in f:
                        if line.startswith("TITLE"):
                            protein_name = line[10:].strip()
                            break
        except Exception:
            pass

        results = []

        for lig_id in ligand_ids:
            ligand = _select_ligand_atoms(u, lig_id, path)
            if ligand.n_atoms == 0:
                continue

            # все protein atoms в радиусе от лиганда
            protein = u.select_atoms(f"protein and around {radius:.1f} group ligand", ligand=ligand)
            residues = list({r for r in protein.residues})
            if not residues:
                continue

            L = ligand_centroid_from_ag(ligand)  # центроид лиганда 

            entries = []
            for r in residues:
                entries.append({
                    "res": r,
                    "label": _label_res(r),
                    "rep_abs": res_representative_coord(r)
                })
            # относительные координаты к центроиду лиганда
            for e in entries:
                e["rep_rel"] = e["rep_abs"] - L

            sorted_idx = sorted(
                range(len(entries)),
                key=lambda i: (_chain_label(entries[i]["res"]), int(entries[i]["res"].resid))
            )
            binding_residues_str = ", ".join(entries[i]["label"] for i in sorted_idx)

            # Детеминированный старт: первый в порядке (chain, resid)
            start_idx = sorted_idx[0]

            coords_abs = np.array([e["rep_abs"] for e in entries], dtype=float)
            order = greedy_route_from_coords(coords_abs, start_idx=start_idx)


            ordered_labels = " - ".join(entries[i]["label"] for i in order)
            ordered_with_rel_coords = " - ".join(
                f'{entries[i]["label"]}'
                f'({entries[i]["rep_rel"][0]:.3f},{entries[i]["rep_rel"][1]:.3f},{entries[i]["rep_rel"][2]:.3f})'
                for i in order
            )

            results.append({
                "PDB_ID": pdb_id,
                "Protein_Name": protein_name,
                "Ligand_ID": lig_id,
                "Ligand_Name": LIGANDS.get(lig_id, lig_id),

                "Binding_Residues": binding_residues_str,

                "Ordered_Binding_Residues": ordered_labels,
                "Ordered_Residues_Coords": ordered_with_rel_coords,

                "Num_Binding_Residues": len(entries),
                "Source_File": os.path.basename(path),
            })

        return results

    except Exception as e:
        print(f"Ошибка при анализе {pdb_id}: {str(e)}")
        return []

# Main Function

def process_single_pdb_collect(pdb_id):
    path = download_structure(pdb_id)
    if not path:
        return []

    if str(path).lower().endswith(".cif") and not ALLOW_CIF_FALLBACK:
        print(f"[{pdb_id}] доступен только mmCIF - пропускаю (ALLOW_CIF_FALLBACK=False).")
        return []

    try:
        u = load_universe(path)
    except Exception as e:
        print(f"Ошибка при загрузке {pdb_id}: {e}")
        return []

    ligs_here = sorted(ligands_present_in(u, TARGET_LIGANDS.keys(), path))
    if not ligs_here:
        return []

    try:
        with _TimeLimit(ANALYZE_TIMEOUT_S):
            results = analyze_structure(u, pdb_id, path, ligs_here, radius=4.0)
    except TimeoutError:
        print(f"[{pdb_id}] анализ превысил {ANALYZE_TIMEOUT_S}s — пропускаю.")
        return []

    return results or []

def main():
    if RESULTS_FILE.exists():
        try:
            RESULTS_FILE.unlink()
            print("Удалил старый CSV:", RESULTS_FILE)
        except Exception as e:
            print("Не смог удалить старый CSV:", e)

    print("BASE:", BASE.resolve())
    print("PDB_DIR:", PDB_DIR.resolve())
    print("RESULTS_FILE:", RESULTS_FILE.resolve())

    all_results = []
    processed_pdbs = set()
    invalid_response_ligands = []

    # Идём по всем лигандам и собираем PDB ID
    all_ligand_ids = list(TARGET_LIGANDS.keys())
    for lig_id in tqdm(all_ligand_ids, desc="Processing ligands"):
        query = {
            "query": {
                "type": "terminal",
                "service": "text",
                "parameters": {
                    "attribute": "rcsb_nonpolymer_entity_container_identifiers.nonpolymer_comp_id",
                    "operator": "exact_match",
                    "value": lig_id
                }
            },
            "return_type": "entry"
        }

        response = None
        for attempt in range(1, SEARCH_PARAMS["max_retries"] + 1):
            try:
                r = requests.post(SEARCH_PARAMS["base_url"], json=query, timeout=SEARCH_PARAMS["timeout"])
                if r.status_code in (200, 204):
                    response = r
                    break
                else:
                    raise requests.RequestException(f"HTTP {r.status_code}")
            except Exception as e:
                if attempt == SEARCH_PARAMS["max_retries"]:
                    print(f"[{lig_id}] Ошибка запроса после {attempt} попыток: {e}")
                else:
                    continue

        if response is None:
            invalid_response_ligands.append(lig_id)
            continue

        if response.status_code == 204:
            # нет записей — просто пропускаем лиганд
            continue

        pdb_ids = [hit["identifier"] for hit in response.json().get("result_set", [])]

        # Обрабатываем каждую PDB один раз
        for pdb_id in tqdm(pdb_ids[:MAX_PDBS_PER_LIGAND], desc=f"Processing {lig_id}", leave=False):
            if pdb_id in processed_pdbs:
                continue
            rows = process_single_pdb_collect(pdb_id)
            if rows:
                all_results.extend(rows)
            processed_pdbs.add(pdb_id)

    if all_results:
        df_out = pd.DataFrame(all_results)
        df_out = df_out.reindex(columns=EXPECTED_COLUMNS)
    else:
        df_out = pd.DataFrame(columns=EXPECTED_COLUMNS)

    df_out.to_csv(RESULTS_FILE, index=False)
    print(f"\nDone! Результаты сохранены в {RESULTS_FILE.resolve()}")
    print(f"Всего записей: {len(df_out)}")
    if len(df_out):
        print(f"Уникальных структур: {df_out['PDB_ID'].nunique()}")
    if invalid_response_ligands:
        print(f"Лиганды с ошибкой ответа от RCSB (пропущены): {len(invalid_response_ligands)} шт.")

if __name__ == "__main__":
    main()



  from .autonotebook import tqdm as notebook_tqdm


BASE: /Users/strateford/Desktop/MyPythonProjects/НИРСИИ_DEC_25
PDB_DIR: /Users/strateford/Desktop/MyPythonProjects/НИРСИИ_DEC_25/pdb_downloads
RESULTS_FILE: /Users/strateford/Desktop/MyPythonProjects/НИРСИИ_DEC_25/protein_ligand_dataset_ALL.csv


Processing ligands:   0%|          | 0/706 [00:00<?, ?it/s]

In [None]:
df_5 = pd.read_csv('protein_ligand_dataset_ALL_final.csv')
df_5

Unnamed: 0,PDB_ID,Protein_Name,Ligand_ID,Ligand_Name,Binding_Residues,Ordered_Binding_Residues,Ordered_Residues_Coords,Num_Binding_Residues,Source_File
0,1HNW,STRUCTURE OF THE THERMUS THERMOPHILUS 30S RIBO...,TAC,Tetracycline,ARG19L,ARG19L,"ARG19L(-26.749,-6.897,-1.939)",1,1HNW.pdb
1,1I97,CRYSTAL STRUCTURE OF THE 30S RIBOSOMAL SUBUNIT...,TAC,Tetracycline,"GLU81D, ALA82D, LYS85D, VAL92D, PHE93D, GLY95D...",GLU81D - ALA82D - LYS85D - VAL92D - PHE93D - G...,"GLU81D(47.341,31.470,24.713) - ALA82D(48.184,2...",12,1I97.pdb
2,2HCJ,TRYPSIN-MODIFIED ELONGATION FACTOR TU IN COMPL...,GDP,GUANOSINE-5'-DIPHOSPHATE,"HIS19A, VAL20A, ASP21A, HIS22A, GLY23A, LYS24A...",HIS19A - VAL20A - ASP21A - HIS22A - GLY23A - L...,"HIS19A(-0.139,-4.435,-10.046) - VAL20A(2.478,-...",15,2HCJ.pdb
3,2HCJ,TRYPSIN-MODIFIED ELONGATION FACTOR TU IN COMPL...,TAC,Tetracycline,"THR25A, THR64B, SER65B, ASP80B, PRO82B",THR25A - ASP80B - SER65B - THR64B - PRO82B,"THR25A(-5.995,2.424,-1.106) - ASP80B(-7.894,-3...",5,2HCJ.pdb
4,2HDN,TRYPSIN-MODIFIED ELONGATION FACTOR TU IN COMPL...,GDP,GUANOSINE-5'-DIPHOSPHATE,"HIS19A, VAL20A, ASP21A, HIS22A, GLY23A, LYS24A...",HIS19A - VAL20A - ASP21A - HIS22A - GLY23A - L...,"HIS19A(30.814,7.758,59.337) - VAL20A(30.153,7....",90,2HDN.pdb
...,...,...,...,...,...,...,...,...,...
8041,2Z4O,WILD TYPE HIV-1 PROTEASE WITH POTENT ANTIVIRAL...,ACT,ACETATE ION,"PRO139B, GLY140B, ARG141B, TYR159B, ASP160B",PRO139B - GLY140B - ARG141B - TYR159B - ASP160B,"PRO139B(4.557,0.069,4.073) - GLY140B(2.184,-2....",5,2Z4O.pdb
8042,3ECG,HIGH RESOLUTION HIV-2 PROTEASE STRUCTURE IN CO...,065,"(3R,3AS,6AR)-HEXAHYDROFURO[2,3-B]FURAN-3-YL(2S...","LEU23A, ASP25A, GLY27A, ALA28A, ASP29A, ASP30A...",LEU23A - ILE84A - ILE32A - ASP30A - ASP29A - A...,"LEU23A(-4.144,0.220,-8.931) - ILE84A(-6.643,-0...",25,3ECG.pdb
8043,3ECG,HIGH RESOLUTION HIV-2 PROTEASE STRUCTURE IN CO...,IMD,IMIDAZOLE,"ARG8A, TYR14A, GLU65A, PRO81A, ILE82A, LEU99A,...",ARG8A - ASP130B - MET176B - ASN155B - ILE146B ...,"ARG8A(0.104,12.835,-6.295) - ASP130B(7.603,12....",16,3ECG.pdb
8044,7LDZ,HIV-1 PROTEASE WT (NL4-3) IN COMPLEX WITH GRL-...,065,"(3R,3AS,6AR)-HEXAHYDROFURO[2,3-B]FURAN-3-YL(2S...","LEU23A, ASP25A, GLY27A, ALA28A, ASP29A, ASP30A...",LEU23A - ILE84A - VAL32A - ASP30A - ASP29A - A...,"LEU23A(7.448,-0.872,-8.260) - ILE84A(8.383,0.1...",29,7LDZ.pdb
