In [9]:
import glycosylator as gl
from pathlib import Path

def get_components(file_path: Path) -> dict:
    """
    Parse a GlycoShape structure file and return a dict of component counts.
    """
    path_str = str(file_path)
    if path_str.endswith(".cif"):
        g = gl.Glycan.from_cif(path_str)
    elif path_str.endswith(".pdb"):
        g = gl.Glycan.from_pdb(path_str)
    else:
        raise AssertionError(f"Uncertain which filetype this is: {path_str}")

    g.infer_bonds(max_bond_length=1.622, restrict_residues=True)
    g.infer_residue_connections(bond_length=1.62)
    g.infer_glycan_tree()

    hist = g.hist().set_index("residue").to_dict()["count"]
    components = {}
    for k, v in hist.items():
        key = k.split("-")[-1]
        components[key] = components.get(key, 0) + v

    return components


In [10]:
a = Path("/data/rbg/users/dkwabiad/temp_while_cp_rsg/glycan_comp_restructure/output/ingest/glycoshape/glycoshape_pdbs/GS00276/cluster0_alpha.pdb")
b = Path("/data/rbg/users/dkwabiad/temp_while_cp_rsg/glycan_comp_restructure/output/ingest/glycoshape/glycoshape_pdbs/GS00276/cluster2_alpha.pdb")

In [11]:
get_components(a)

{'Glc': 1, 'Gal': 2, 'GlcNAc': 2, 'Fuc': 1}

In [6]:
get_components(b)

{'Glc': 1, 'Gal': 2, 'GlcNAc': 2, 'Fuc': 1}