In [1]:
# Colab cell ①  ─────────────────────────────────────────
# Core library
!pip install -q biopython
!pip -q install pandas    # <- run once per fresh runtime

# Optional: DSSP binary for full-fledged analysis
# (takes ~10 s; skip if you’re happy with the quick HELIX/SHEET route)
!apt-get -qq update
!apt-get -qq install -y dssp

W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)


In [2]:
# Colab cell ② ────────────────────────────────────────
from google.colab import drive
drive.mount('/content/drive')

import os
folder_name=os.getcwd()
os.chdir('/content/drive/MyDrive/Samuel/')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# Colab cell ③  ─────────────────────────────────────────
import pandas as pd
from pathlib import Path
from collections import Counter
from Bio.PDB import PDBParser, DSSP, PDBList

def ss_from_records(structure):
    helix = sum(len(h) for h in structure.header.get("helix", []))
    sheet = sum(len(s) for s in structure.header.get("sheet", []))
    total = sum(1 for _ in structure.get_residues() if _.id[0] == " ")
    return Counter(helix=helix, sheet=sheet, coil=total-helix-sheet)

def ss_from_dssp(structure, pdb_path, exe="mkdssp"):
    """Run DSSP and tally helix/sheet/coil counts (robust to API quirks)."""
    model = structure[0]
    dssp  = DSSP(model, pdb_path, dssp=exe)

    code_map = {"H":"helix","G":"helix","I":"helix",
                "E":"sheet","B":"sheet",
                "-":"coil","T":"coil","S":"coil"," ":"coil", None:"coil"}

    counts = Counter()

    for feat in dssp.property_dict.values():

        # --- Get the DSSP secondary-structure letter safely ---
        if isinstance(feat, dict):                 # Biopython ≥ 1.83
            ss = feat.get("ss", " ")
        else:                                      # tuple / namedtuple style
            # empirical: aa at [1], ss at [2]  (works back to 1.79)
            ss = feat[2] if len(feat) > 2 else " "

        counts[code_map.get(ss, "coil")] += 1

    return counts

In [4]:
def analyse_folder(folder="Samples",
                   use_dssp=True,
                   show_by_subfolder=True):

    parser = PDBParser(QUIET=True)
    out_csv = f"{folder[-2:]}-secondary_structure_summary.csv"

    # 🚨 NEW LINE: ignore files such as “o-something.pdb”
    pdb_paths = [p for p in Path(folder).rglob("*.pdb")
                if not p.name.startswith("o-")]

    if not pdb_paths:
        print("Nothing to process—every .pdb began with 'o-' or folder was empty.")
        return pd.DataFrame()

    grand   = Counter()
    rows    = []
    last_dir = None

    for pdb_path in pdb_paths:
        this_dir = pdb_path.parent
        if show_by_subfolder and this_dir != last_dir:
            print(f"\n📁  {this_dir.relative_to(folder)}/")
            last_dir = this_dir

        structure = parser.get_structure(pdb_path.stem, pdb_path)
        counts = (ss_from_dssp if use_dssp else ss_from_records)(structure, pdb_path)
        grand += counts

        total  = sum(counts.values()) or 1
        h_pct, e_pct, c_pct = (counts[k]/total for k in ("helix","sheet","coil"))
        print(f"  {pdb_path.name:25s}  H {h_pct:6.1%}   E {e_pct:6.1%}   C {c_pct:6.1%}")

        rows.append({
            "file":   str(pdb_path.relative_to(folder)),
            "helix":  counts["helix"],  "sheet": counts["sheet"], "coil":  counts["coil"],
            "total":  total,
            "helix_pct": h_pct,         "sheet_pct": e_pct,      "coil_pct": c_pct})

    # ⬇️  grand totals row
    total = sum(grand.values())
    rows.append({"file":"ALL_FILES", "helix":grand["helix"], "sheet":grand["sheet"],
                 "coil":grand["coil"], "total":total,
                 "helix_pct":grand["helix"]/total,
                 "sheet_pct":grand["sheet"]/total,
                 "coil_pct": grand["coil"]/total})

    print("\n" + "—"*28)
    print(f"ALL FILES             "
          f"H {grand['helix']/total:6.1%}   "
          f"E {grand['sheet']/total:6.1%}   "
          f"C {grand['coil']/total:6.1%}")

    df = pd.DataFrame(rows)
    df.to_csv(out_csv, index=False)
    print(f"\n📄  Results written to {out_csv}")
    return df

In [None]:
df_results = analyse_folder("Samples-Ca", use_dssp=True)


📁  6197/
  6197-4.pdb                 H   8.5%   E  40.6%   C  50.9%
  6197-1.pdb                 H   9.1%   E  41.2%   C  49.7%
  6197-3.pdb                 H   9.1%   E  40.0%   C  50.9%
  6197-2.pdb                 H  31.5%   E  24.8%   C  43.6%

📁  6200/
  6200-4.pdb                 H  19.9%   E  28.7%   C  51.4%
  6200-1.pdb                 H  19.4%   E  31.5%   C  49.1%
  6200-3.pdb                 H  17.6%   E  25.9%   C  56.5%
  6200-2.pdb                 H  18.1%   E  27.8%   C  54.2%

📁  6201/
  6201-4.pdb                 H  31.9%   E  26.4%   C  41.7%
  6201-1.pdb                 H  30.6%   E  25.0%   C  44.4%
  6201-3.pdb                 H  30.6%   E  16.7%   C  52.8%
  6201-2.pdb                 H  31.9%   E  13.9%   C  54.2%

📁  6203/
  6203-4.pdb                 H  57.7%   E   0.0%   C  42.3%
  6203-3.pdb                 H  57.7%   E   0.0%   C  42.3%
  6203-2.pdb                 H  57.7%   E   0.0%   C  42.3%
  6203-1.pdb                 H  53.8%   E   0.0%   C  46.2%


In [None]:
df_results = analyse_folder("Samples-Mg", use_dssp=True)


📁  6348/
  6348-3.pdb                 H  54.8%   E   4.3%   C  40.9%
  6348-4.pdb                 H  55.9%   E   4.3%   C  39.8%
  6348-2.pdb                 H  55.9%   E   4.3%   C  39.8%

📁  6350/
  6350-3.pdb                 H  52.2%   E   0.0%   C  47.8%
  6350-4.pdb                 H  56.5%   E   0.0%   C  43.5%
  6350-2.pdb                 H  21.7%   E   0.0%   C  78.3%

📁  6349/
  6349-3.pdb                 H  23.9%   E  29.9%   C  46.2%
  6349-4.pdb                 H  23.1%   E  28.2%   C  48.7%
  6349-2.pdb                 H  27.0%   E  27.6%   C  45.4%

📁  6353/
  6353-3.pdb                 H  32.2%   E  23.5%   C  44.3%
  6353-4.pdb                 H  31.8%   E  23.3%   C  44.9%
  6353-2.pdb                 H  21.5%   E  27.6%   C  50.9%
  temp-6353-4.pdb            H  31.8%   E  23.3%   C  44.9%
  solvated-6353-4.pdb        H  31.8%   E  23.3%   C  44.9%

📁  6354/
  6354-1.pdb                 H  16.0%   E  29.9%   C  54.0%

📁  6355/
  6355-3.pdb                 H  14.3%   

In [None]:
df_results = analyse_folder("Samples-Zn", use_dssp=True)


📁  6896/
  6896-4.pdb                 H  41.1%   E  11.5%   C  47.4%
  6896-1.pdb                 H  35.9%   E  17.4%   C  46.7%
  6896-3.pdb                 H  40.8%   E  15.5%   C  43.8%
  6896-2.pdb                 H  41.8%   E  13.8%   C  44.4%

📁  6897/
  6897-4.pdb                 H  58.7%   E   1.4%   C  39.9%
  6897-1.pdb                 H  69.9%   E   0.0%   C  30.1%
  6897-3.pdb                 H  66.4%   E   2.8%   C  30.8%
  6897-2.pdb                 H  67.1%   E   0.0%   C  32.9%
  temp-6897-3.pdb            H  66.4%   E   2.8%   C  30.8%
  solvated-6897-3.pdb        H  66.4%   E   2.8%   C  30.8%

📁  6898/
  6898-4.pdb                 H  25.5%   E  40.4%   C  34.0%
  6898-1.pdb                 H  26.6%   E  22.3%   C  51.1%
  6898-3.pdb                 H  29.8%   E  35.1%   C  35.1%
  6898-2.pdb                 H  27.7%   E  35.1%   C  37.2%

📁  6899/
  6899-4.pdb                 H   0.0%   E  42.9%   C  57.1%
  6899-1.pdb                 H   0.0%   E  42.9%   C  57.1%


In [None]:
import numpy as np
from typing import Tuple, Dict, List
import warnings

def ss_from_records(structure, pdb_path: Path) -> Counter:
    """
    Fallback secondary-structure counter using HELIX / SHEET records
    already present in the PDB file.  Counts every residue; anything
    not on a HELIX/SHEET line is classified as 'coil'.
    Replace or extend if you have fancier needs.
    """
    helix_residues = {(h.chain_id, *h.residue_range) for h in structure.header.get("helices", [])}
    sheet_residues = {(s.chain_id, *s.residue_range) for s in structure.header.get("sheets", [])}

    cnt = Counter()
    for model in structure:
        for chain in model:
            for residue in chain:
                key = (chain.id, residue.id[1])
                if key in helix_residues:
                    cnt["helix"] += 1
                elif key in sheet_residues:
                    cnt["sheet"] += 1
                else:
                    cnt["coil"] += 1
    return cnt


def ss_from_dssp(structure, pdb_path: Path) -> Counter:
    """
    DSSP-based classification (BioPython’s built-in wrapper).
    Needs mkdssp in PATH.  Handles helices/sheets more rigorously.
    """
    dssp = DSSP(structure[0], str(pdb_path))
    cnt = Counter()
    for _idx, record in dssp.property_dict.items():
        code = record[1]  # DSSP single-letter code
        if code in "HGI":          # α, 3₁₀, π helices → "helix"
            cnt["helix"] += 1
        elif code in "EB":         # β sheet / β bridge → "sheet"
            cnt["sheet"] += 1
        else:                      # everything else → "coil"
            cnt["coil"] += 1
    return cnt

In [None]:
# ------------------------------------------------------------------------
# Main one-stop function -------------------------------------------------
# ------------------------------------------------------------------------
warnings.filterwarnings("ignore", message="Unknown or untrusted program in REMARK 3")

def analyse_id_folder(
    id_folder: str,
    cache_dir: str = "pdb_cache",
    use_dssp: bool = True,
    out_csv: str | None = None,
    ping_every: int | None = 500,
) -> Tuple[pd.DataFrame, Dict[str, float]]:
    """
    Parameters
    ----------
    id_folder : str
        Path to folder containing *.txt files with PDB IDs.
    cache_dir : str
        Folder to store downloaded PDB files.
    use_dssp  : bool
        True → classify via DSSP; False → use HELIX/SHEET records.
    out_csv   : str | None
        If given, save per-structure table to this CSV.
    ping_every: int | None
        Print progress every N structures.  None → silent.

    Returns
    -------
    df_struct : DataFrame
        One row per PDB: raw counts + proportions.
    summary   : dict
        Mean and sample SD of the proportions across all PDBs.
    """

    # ── 1. Collect unique 4-letter PDB IDs ───────────────────────────────
    txt_files = list(Path(id_folder).glob("*.txt"))
    if not txt_files:
        raise FileNotFoundError(f"No *.txt files found in ‘{id_folder}’")

    def first_id(txt: Path) -> str | None:
        stem = txt.stem.strip().lower()
        first_line = next((l.strip() for l in txt.read_text().splitlines() if l.strip()), "")
        for cand in (stem, first_line):
            if len(cand) == 4 and cand.isalnum():
                return cand.lower()
        return None

    pdb_ids = {pid for f in txt_files if (pid := first_id(f))}
    if not pdb_ids:
        raise ValueError("No valid 4-letter PDB IDs discovered.")

    # ── 2. Prep I/O helpers ───────────────────────────────────────────────
    cache    = Path(cache_dir)
    cache.mkdir(exist_ok=True)
    pdbl     = PDBList()                      # downloader
    parser   = PDBParser(QUIET=True)          # PDB parser
    per_rows: List[Dict] = []
    grand    = Counter()

    # ── 3. Main loop ──────────────────────────────────────────────────────
    for idx, pid in enumerate(sorted(pdb_ids), start=1):
        if ping_every and idx % ping_every == 0:
            print(f"[{idx:>6}/{len(pdb_ids):>6}]  Now processing {pid}")

        try:
            local_path = cache / f"{pid}.pdb"
            if not local_path.exists():
                tmp = pdbl.retrieve_pdb_file(
                    pid, pdir=str(cache), file_format="pdb", obsolete=False
                )
                Path(tmp).rename(local_path)

            structure = parser.get_structure(pid, local_path)
            counter = ss_from_dssp(structure, local_path) if use_dssp \
                      else ss_from_records(structure, local_path)

            total = sum(counter.values()) or 1  # guard /0
            per_rows.append({
                "pdb": pid,
                "helix": counter["helix"],
                "sheet": counter["sheet"],
                "coil":  counter["coil"],
                "helix_prop": counter["helix"] / total,
                "sheet_prop": counter["sheet"] / total,
                "coil_prop":  counter["coil"]  / total,
            })
            grand += counter

        except Exception as e:
            print(f"⚠️  Skipping {pid}: {e}")

    if not per_rows:
        raise RuntimeError("No structures processed successfully.")

    # ── 4. Per-structure DataFrame ────────────────────────────────────────
    df_struct = pd.DataFrame(per_rows)
    prop_cols = ["helix_prop", "sheet_prop", "coil_prop"]

    # ── 5. Mean ± SD across dataset ───────────────────────────────────────
    mean_props = df_struct[prop_cols].mean()
    std_props  = df_struct[prop_cols].std(ddof=1)

    summary = {f"{c}_mean": mean_props[c] for c in prop_cols} | \
              {f"{c}_std":  std_props[c]  for c in prop_cols}

    # ── 6. Optional CSV output ────────────────────────────────────────────
    if out_csv:
        df_struct.to_csv(out_csv, index=False)

    return df_struct, summary

In [None]:
# ------------------------------------------------------------------------
# If run directly, demonstrate on an example folder ----------------------
# ------------------------------------------------------------------------

df, stats = analyse_id_folder("Ca_bind", out_csv="Ca_bind.csv",)
print("\nFirst few proteins:\n", df.head())
print("\nSummary (mean ± SD):\n", stats)












Invalid mmCIF file use --verbose option to see errors

Invalid mmCIF file use --verbose option to see errors

Invalid mmCIF file use --verbose option to see errors

Invalid mmCIF file use --verbose option to see errors

Invalid mmCIF file use --verbose option to see errors














Invalid mmCIF file use --verbose option to see errors



[   500/ 13279]  Now processing 1f8e


Invalid mmCIF file use --verbose option to see errors











Downloading PDB structure '1fzp'...
Desired structure doesn't exist
⚠️  Skipping 1fzp: [Errno 2] No such file or directory: 'pdb_cache/pdb1fzp.ent' -> 'pdb_cache/1fzp.pdb'














Downloading PDB structure '1gq3'...
Desired structure doesn't exist
⚠️  Skipping 1gq3: [Errno 2] No such file or directory: 'pdb_cache/pdb1gq3.ent' -> 'pdb_cache/1gq3.pdb'

























[  1000/ 13279]  Now processing 1k1p



























Invalid mmCIF file use --verbose option to see errors







Invalid mmCIF file use --verbose option to see errors







[  1500/ 13279]  Now processing 1oyt


























































Invalid mmCIF file use --verbose option to see errors































[  2000/ 13279]  Now processing 1ujc





















Invalid mmCIF file use --verbose option to see errors

Invalid mmCIF file use --verbose option to see errors

Invalid mmCIF file use --verbose option to see errors









Downloading PDB structure '1vlf'...
Desired structure doesn't exist
⚠️  Skipping 1vlf: [Errno 2] No such file or directory: 'pdb_cache/pdb1vlf.ent' -> 'pdb_cache/1vlf.pdb'






Downloading PDB structure '1vtz'...
Desired structure doesn't exist
⚠️  Skipping 1vtz: [Errno 2] No such file or directory: 'pdb_cache/pdb1vtz.ent' -> 'pdb_cache/1vtz.pdb'














Invalid mmCIF file use --verbose option to see errors

Invalid mmCIF file use --verbose option to see errors

Invalid mmCIF file use --verbose option to see errors













Invalid mmCIF file use --verbose option to see errors








Resulting mmCIF file is not valid!
Invalid mmCIF file use --verbose option to see errors

















[  2500/ 13279]  Now processing 2ayk






Invalid mmCIF file use --verbose option to see errors




Resulting mmCIF file is not valid!
Invalid mmCIF file use --verbose option to see errors
















Downloading PDB structure '2cmz'...
Desired structure doesn't exist
⚠️  Skipping 2cmz: [Errno 2] No such file or directory: 'pdb_cache/pdb2cmz.ent' -> 'pdb_cache/2cmz.pdb'




























Error trying to load file "pdb_cache/2f03.pdb"
Residue E401  could not be mapped
 >> map::at



⚠️  Skipping 2f03: DSSP failed to produce an output





⚠️  Skipping 2fmd: Structure/DSSP mismatch at <Residue ASX het=  resseq=204 icode= >


















Invalid mmCIF file use --verbose option to see errors









[  3000/ 13279]  Now processing 2i4b






























Error trying to load file "pdb_cache/2k60.pdb"
Residue A656  could not be mapped
 >> map::at



⚠️  Skipping 2k60: DSSP failed to produce an output
































































Downloading PDB structure '2q23'...
Desired structure doesn't exist
⚠️  Skipping 2q23: [Errno 2] No such file or directory: 'pdb_cache/pdb2q23.ent' -> 'pdb_cache/2q23.pdb'




Invalid mmCIF file use --verbose option to see errors





[  3500/ 13279]  Now processing 2qwi







Invalid mmCIF file use --verbose option to see errors
dssp: ./src/Structure.cpp:836: mmcif::Residue::Residue(const mmcif::Structure&, const string&, const string&, int, const string&): Assertion `mCompoundID != "HOH"' failed.



⚠️  Skipping 2rjp: DSSP failed to produce an output


Invalid mmCIF file use --verbose option to see errors
dssp: ./src/Structure.cpp:836: mmcif::Residue::Residue(const mmcif::Structure&, const string&, const string&, int, const string&): Assertion `mCompoundID != "HOH"' failed.



⚠️  Skipping 2rjq: DSSP failed to produce an output




Resulting mmCIF file is not valid!
Invalid mmCIF file use --verbose option to see errors






Invalid mmCIF file use --verbose option to see errors












Resulting mmCIF file is not valid!
Invalid mmCIF file use --verbose option to see errors


Invalid mmCIF file use --verbose option to see errors



⚠️  Skipping 2vqr: Structure/DSSP mismatch at <Residue DDZ het=H_DDZ resseq=57 icode= >


Invalid mmCIF file use --verbose option to see errors




Invalid mmCIF file use --verbose option to see errors

Invalid mmCIF file use --verbose option to see errors








Downloading PDB structure '2x54'...
Desired structure doesn't exist
⚠️  Skipping 2x54: [Errno 2] No such file or directory: 'pdb_cache/pdb2x54.ent' -> 'pdb_cache/2x54.pdb'






Downloading PDB structure '2ydn'...
Desired structure doesn't exist
⚠️  Skipping 2ydn: [Errno 2] No such file or directory: 'pdb_cache/pdb2ydn.ent' -> 'pdb_cache/2ydn.pdb'
Downloading PDB structure '2yf1'...
Desired structure doesn't exist
⚠️  Skipping 2yf1: [Errno 2] No such file or directory: 'pdb_cache/pdb2yf1.ent' -> 'pdb_cache/2yf1.pdb'





Downloading PDB structure '2z2q'...
Desired structure doesn't exist
⚠️  Skipping 2z2q: [Errno 2] No such file or directory: 'pdb_cache/pdb2z2q.ent' -> 'pdb_cache/2z2q.pdb'


Invalid mmCIF file use --verbose option to see errors



[  4000/ 13279]  Now processing 2zal











Invalid mmCIF file use --verbose option to see errors
dssp: ./src/Structure.cpp:836: mmcif::Residue::Residue(const mmcif::Structure&, const string&, const string&, int, const string&): Assertion `mCompoundID != "HOH"' failed.



⚠️  Skipping 3ak5: DSSP failed to produce an output
Downloading PDB structure '3arc'...
Desired structure doesn't exist
⚠️  Skipping 3arc: [Errno 2] No such file or directory: 'pdb_cache/pdb3arc.ent' -> 'pdb_cache/3arc.pdb'





Invalid mmCIF file use --verbose option to see errors
dssp: ./src/Structure.cpp:836: mmcif::Residue::Residue(const mmcif::Structure&, const string&, const string&, int, const string&): Assertion `mCompoundID != "HOH"' failed.



⚠️  Skipping 3b2z: DSSP failed to produce an output







Downloading PDB structure '3bz1'...
Desired structure doesn't exist
⚠️  Skipping 3bz1: [Errno 2] No such file or directory: 'pdb_cache/pdb3bz1.ent' -> 'pdb_cache/3bz1.pdb'





Downloading PDB structure '3c68'...
Desired structure doesn't exist
⚠️  Skipping 3c68: [Errno 2] No such file or directory: 'pdb_cache/pdb3c68.ent' -> 'pdb_cache/3c68.pdb'











Error trying to load file "pdb_cache/3cye.pdb"
Residue A3  could not be mapped
 >> map::at



⚠️  Skipping 3cye: DSSP failed to produce an output





Invalid mmCIF file use --verbose option to see errors










Invalid mmCIF file use --verbose option to see errors

Invalid mmCIF file use --verbose option to see errors



[  4500/ 13279]  Now processing 3f19












Downloading PDB structure '3hho'...
Desired structure doesn't exist
⚠️  Skipping 3hho: [Errno 2] No such file or directory: 'pdb_cache/pdb3hho.ent' -> 'pdb_cache/3hho.pdb'








Downloading PDB structure '3hx2'...
Desired structure doesn't exist
⚠️  Skipping 3hx2: [Errno 2] No such file or directory: 'pdb_cache/pdb3hx2.ent' -> 'pdb_cache/3hx2.pdb'



This file contains data that won't fit in the original DSSP format






Invalid mmCIF file use --verbose option to see errors





























[  5000/ 13279]  Now processing 3mhb



Invalid mmCIF file use --verbose option to see errors
dssp: ./src/Structure.cpp:836: mmcif::Residue::Residue(const mmcif::Structure&, const string&, const string&, int, const string&): Assertion `mCompoundID != "HOH"' failed.



⚠️  Skipping 3mn5: DSSP failed to produce an output





Invalid mmCIF file use --verbose option to see errors

Invalid mmCIF file use --verbose option to see errors
dssp: ./src/Structure.cpp:836: mmcif::Residue::Residue(const mmcif::Structure&, const string&, const string&, int, const string&): Assertion `mCompoundID != "HOH"' failed.



⚠️  Skipping 3o0x: DSSP failed to produce an output











Downloading PDB structure '3p0d'...
Desired structure doesn't exist
⚠️  Skipping 3p0d: [Errno 2] No such file or directory: 'pdb_cache/pdb3p0d.ent' -> 'pdb_cache/3p0d.pdb'








Invalid mmCIF file use --verbose option to see errors
dssp: ./src/Structure.cpp:836: mmcif::Residue::Residue(const mmcif::Structure&, const string&, const string&, int, const string&): Assertion `mCompoundID != "HOH"' failed.



⚠️  Skipping 3pos: DSSP failed to produce an output


Invalid mmCIF file use --verbose option to see errors
dssp: ./src/Structure.cpp:836: mmcif::Residue::Residue(const mmcif::Structure&, const string&, const string&, int, const string&): Assertion `mCompoundID != "HOH"' failed.



⚠️  Skipping 3pow: DSSP failed to produce an output










Invalid mmCIF file use --verbose option to see errors





Downloading PDB structure '3q9m'...
Desired structure doesn't exist
⚠️  Skipping 3q9m: [Errno 2] No such file or directory: 'pdb_cache/pdb3q9m.ent' -> 'pdb_cache/3q9m.pdb'






Downloading PDB structure '3qnh'...
Desired structure doesn't exist
⚠️  Skipping 3qnh: [Errno 2] No such file or directory: 'pdb_cache/pdb3qnh.ent' -> 'pdb_cache/3qnh.pdb'





Downloading PDB structure '3r78'...
Desired structure doesn't exist
⚠️  Skipping 3r78: [Errno 2] No such file or directory: 'pdb_cache/pdb3r78.ent' -> 'pdb_cache/3r78.pdb'











Downloading PDB structure '3rqv'...
Desired structure doesn't exist
⚠️  Skipping 3rqv: [Errno 2] No such file or directory: 'pdb_cache/pdb3rqv.ent' -> 'pdb_cache/3rqv.pdb'
[  5500/ 13279]  Now processing 3rza
Downloading PDB structure '3s4f'...
Desired structure doesn't exist
⚠️  Skipping 3s4f: [Errno 2] No such file or directory: 'pdb_cache/pdb3s4f.ent' -> 'pdb_cache/3s4f.pdb'




Invalid mmCIF file use --verbose option to see errors

Invalid mmCIF file use --verbose option to see errors









Invalid mmCIF file use --verbose option to see errors
dssp: ./src/Structure.cpp:836: mmcif::Residue::Residue(const mmcif::Structure&, const string&, const string&, int, const string&): Assertion `mCompoundID != "HOH"' failed.



⚠️  Skipping 3u24: DSSP failed to produce an output












Downloading PDB structure '3wmn'...
Desired structure doesn't exist
⚠️  Skipping 3wmn: [Errno 2] No such file or directory: 'pdb_cache/pdb3wmn.ent' -> 'pdb_cache/3wmn.pdb'








[  6000/ 13279]  Now processing 4abg


Resulting mmCIF file is not valid!
Invalid mmCIF file use --verbose option to see errors



Downloading PDB structure '4ahb'...
Desired structure doesn't exist
⚠️  Skipping 4ahb: [Errno 2] No such file or directory: 'pdb_cache/pdb4ahb.ent' -> 'pdb_cache/4ahb.pdb'


Invalid mmCIF file use --verbose option to see errors


Invalid mmCIF file use --verbose option to see errors








Resulting mmCIF file is not valid!
Invalid mmCIF file use --verbose option to see errors

Resulting mmCIF file is not valid!
Invalid mmCIF file use --verbose option to see errors

Resulting mmCIF file is not valid!
Invalid mmCIF file use --verbose option to see errors

Resulting mmCIF file is not valid!
Invalid mmCIF file use --verbose option to see errors



Downloading PDB structure '4fy1'...
Desired structure doesn't exist
⚠️  Skipping 4fy1: [Errno 2] No such file or directory: 'pdb_cache/pdb4fy1.ent' -> 'pdb_cache/4fy1.pdb'
[  6500/ 13279]  Now processing 4gez


























Downloading PDB structure '4k1m'...
Desired structure doesn't exist
⚠️  Skipping 4k1m: [Errno 2] No such file or directory: 'pdb_cache/pdb4k1m.ent' -> 'pdb_cache/4k1m.pdb'





Downloading PDB structure '4k4l'...
Desired structure doesn't exist
⚠️  Skipping 4k4l: [Errno 2] No such file or directory: 'pdb_cache/pdb4k4l.ent' -> 'pdb_cache/4k4l.pdb'






Resulting mmCIF file is not valid!
Invalid mmCIF file use --verbose option to see errors



[  7000/ 13279]  Now processing 4m1l






Invalid mmCIF file use --verbose option to see errors

Invalid mmCIF file use --verbose option to see errors

Error trying to load file "pdb_cache/4p3q.pdb"
Residue A384  could not be mapped
 >> map::at



⚠️  Skipping 4p3q: DSSP failed to produce an output



This file contains data that won't fit in the original DSSP format





Invalid mmCIF file use --verbose option to see errors

Invalid mmCIF file use --verbose option to see errors

Invalid mmCIF file use --verbose option to see errors



Resulting mmCIF file is not valid!
Invalid mmCIF file use --verbose option to see errors



[  7500/ 13279]  Now processing 4tnc


This file contains data that won't fit in the original DSSP format

This file contains data that won't fit in the original DSSP format

This file contains data that won't fit in the original DSSP format

This file contains data that won't fit in the original DSSP format



This file contains data that won't fit in the original DSSP format






Downloading PDB structure '4wbn'...
Desired structure doesn't exist
⚠️  Skipping 4wbn: [Errno 2] No such file or directory: 'pdb_cache/pdb4wbn.ent' -> 'pdb_cache/4wbn.pdb'









Downloading PDB structure '4yza'...
Desired structure doesn't exist
⚠️  Skipping 4yza: [Errno 2] No such file or directory: 'pdb_cache/pdb4yza.ent' -> 'pdb_cache/4yza.pdb'





[  8000/ 13279]  Now processing 5b5o







Downloading PDB structure '5c3v'...
Desired structure doesn't exist
⚠️  Skipping 5c3v: [Errno 2] No such file or directory: 'pdb_cache/pdb5c3v.ent' -> 'pdb_cache/5c3v.pdb'









Downloading PDB structure '5dpk'...
Desired structure doesn't exist
⚠️  Skipping 5dpk: [Errno 2] No such file or directory: 'pdb_cache/pdb5dpk.ent' -> 'pdb_cache/5dpk.pdb'














[  8500/ 13279]  Now processing 5i89














Error parsing PDB at line 678
Error trying to load file "pdb_cache/5lpx.pdb"
When validating _struct_conn.ptnr2_symmetry: Value '31_059' does not match type expression for type symop



⚠️  Skipping 5lpx: DSSP failed to produce an output










Downloading PDB structure '5ncx'...
Desired structure doesn't exist
⚠️  Skipping 5ncx: [Errno 2] No such file or directory: 'pdb_cache/pdb5ncx.ent' -> 'pdb_cache/5ncx.pdb'


Invalid mmCIF file use --verbose option to see errors





[  9000/ 13279]  Now processing 5olq








Invalid mmCIF file use --verbose option to see errors








[  9500/ 13279]  Now processing 5xnd






Invalid mmCIF file use --verbose option to see errors




This file contains data that won't fit in the original DSSP format

Invalid mmCIF file use --verbose option to see errors












[ 10000/ 13279]  Now processing 6eq8


Invalid mmCIF file use --verbose option to see errors





Error parsing PDB at line 1188
Error trying to load file "pdb_cache/6frn.pdb"
When validating _struct_conn.ptnr1_symmetry: Value '26_410' does not match type expression for type symop



⚠️  Skipping 6frn: DSSP failed to produce an output



Invalid mmCIF file use --verbose option to see errors










Invalid mmCIF file use --verbose option to see errors

Invalid mmCIF file use --verbose option to see errors

Invalid mmCIF file use --verbose option to see errors
















Downloading PDB structure '6lco'...
Desired structure doesn't exist
⚠️  Skipping 6lco: [Errno 2] No such file or directory: 'pdb_cache/pdb6lco.ent' -> 'pdb_cache/6lco.pdb'





[ 10500/ 13279]  Now processing 6lxs





Invalid mmCIF file use --verbose option to see errors

Invalid mmCIF file use --verbose option to see errors





filesystem error: status: Transport endpoint is not connected [mmcif_pdbx_v50]



⚠️  Skipping 6o61: DSSP failed to produce an output


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



⚠️  Skipping 6o6j: [Errno 107] Transport endpoint is not connected: 'pdb_cache/6o6j.pdb'
⚠️  Skipping 6o77: [Errno 107] Transport endpoint is not connected: 'pdb_cache/6o77.pdb'
⚠️  Skipping 6o7c: [Errno 107] Transport endpoint is not connected: 'pdb_cache/6o7c.pdb'
⚠️  Skipping 6o83: [Errno 107] Transport endpoint is not connected: 'pdb_cache/6o83.pdb'
⚠️  Skipping 6o9n: [Errno 107] Transport endpoint is not connected: 'pdb_cache/6o9n.pdb'
⚠️  Skipping 6oad: [Errno 107] Transport endpoint is not connected: 'pdb_cache/6oad.pdb'
⚠️  Skipping 6oae: [Errno 107] Transport endpoint is not connected: 'pdb_cache/6oae.pdb'
⚠️  Skipping 6oau: [Errno 107] Transport endpoint is not connected: 'pdb_cache/6oau.pdb'
⚠️  Skipping 6oaz: [Errno 107] Transport endpoint is not connected: 'pdb_cache/6oaz.pdb'
⚠️  Skipping 6ob0: [Errno 107] Transport endpoint is not connected: 'pdb_cache/6ob0.pdb'
⚠️  Skipping 6obx: [Errno 107] Transport endpoint is not connected: 'pdb_cache/6obx.pdb'
⚠️  Skipping 6ocn: [E

In [None]:
df, stats = analyse_id_folder("Mg_bind", out_csv="Mg_bind.csv")

In [None]:
df, stats = analyse_id_folder("Zn_bind", out_csv="Zn_bind.csv")