### Fetch phospho peptides from Scop3P and map onto protein structures
> 1. Enter the protein ID (ex: P07949) and click 'Load'
> 2. The app will let you choose between all peptides ('All rows') and unique spans (the peptide ranges are merged)
> 3. Map all petides on the AF structure using 'Map all' (shows the mass spec coverage of your protein)
> 4. Alternatively click one or multiple peptides on peptide panel to see their structural mapping
> 5. Hint:
> > Explore what the search funtion does!

In [3]:
import requests, tempfile, json, sys
import pandas as pd
from b2bTools import SingleSeq, constants
import py3Dmol
import ipywidgets as widgets

import urllib.request
from urllib.error import HTTPError, URLError
import re

from IPython.display import display, clear_output
import nglview as nv

from pathlib import Path


def scop3p_ngl_mapper_app(default_accession=""):
    # -------------------------
    # 1) Scop3P API fetch
    # -------------------------
    def fetch_scop3p_peptides(accession: str) -> pd.DataFrame:
        url = f"https://iomics.ugent.be/scop3p/api/get-peptides-modifications?accession={accession}"
        r = requests.get(url, timeout=30)
        r.raise_for_status()
        data = r.json()

        df = pd.DataFrame(data.get("peptides", []))
        if df.empty:
            return df

        for c in ["peptideStart", "peptideEnd", "peptideModificationPosition", "uniprotPosition"]:
            if c in df.columns:
                df[c] = pd.to_numeric(df[c], errors="coerce").astype("Int64")

        df["label"] = df.apply(
            lambda x: (
                f'{x["peptideSequence"]} ({int(x["peptideStart"])}-{int(x["peptideEnd"])}) '
                f'@{x.get("modifiedResidue","")}{int(x["uniprotPosition"])} score={x.get("score","")}'
            ),
            axis=1
        )
        return df

    # -------------------------
    # 2) AlphaFold download (fallback v6 -> v4)
    # -------------------------
    def download_alphafold_pdb(accession: str, versions=("v6", "v4")) -> str:
        base = "https://alphafold.ebi.ac.uk/files"
        last_err = None

        # keep files in a predictable writable cache dir (nice for Docker/Voila)
        cache_dir = Path("af_cache")
        cache_dir.mkdir(exist_ok=True)
        out = str(cache_dir / f"{accession}.pdb")

        for v in versions:
            pdb_name = f"AF-{accession}-F1-model_{v}.pdb"
            url = f"{base}/{pdb_name}"
            try:
                urllib.request.urlretrieve(url, out)

                import os
                if os.path.getsize(out) < 1000:
                    raise RuntimeError(f"Downloaded file too small from {url}")

                return out
            except (HTTPError, URLError, RuntimeError) as e:
                last_err = e

        raise RuntimeError(f"Could not download AlphaFold PDB for {accession}. Last error: {last_err}")

    # -------------------------
    # 3) NGL helpers
    # -------------------------
    def positions_to_ranges(pos_list):
        if not pos_list:
            return []
        pos_list = sorted(set(int(p) for p in pos_list))
        ranges = []
        s = pos_list[0]
        prev = pos_list[0]
        for x in pos_list[1:]:
            if x == prev + 1:
                prev = x
            else:
                ranges.append((s, prev))
                s = x
                prev = x
        ranges.append((s, prev))
        return ranges

    def add_cartoon_selection(view, ranges, color, name):
        if not ranges:
            return
        selection = " or ".join([f"resi {a}-{b}" for a, b in ranges])
        view.add_representation("cartoon", selection=selection, color=color, name=name)

    def add_positions(view, positions, color, name, repr_type="ball+stick"):
        if not positions:
            return
        selection = " or ".join([f"resi {int(p)}" for p in sorted(set(int(p) for p in positions))])
        view.add_representation(repr_type, selection=selection, color=color, name=name)

    # -------------------------
    # 4) Filter logic
    # -------------------------
    def filter_peptides(df: pd.DataFrame, query: str) -> pd.DataFrame:
        if df is None or df.empty:
            return df
        if not query:
            return df

        q = query.strip()

        m = re.match(r"^(\d+)\s*-\s*(\d+)$", q)
        if m:
            a, b = int(m.group(1)), int(m.group(2))
            return df[(df["peptideStart"] <= b) & (df["peptideEnd"] >= a)]

        m = re.match(r"^>=\s*(\d+)$", q)
        if m:
            p = int(m.group(1))
            return df[df["peptideEnd"] >= p]

        m = re.match(r"^<=\s*(\d+)$", q)
        if m:
            p = int(m.group(1))
            return df[df["peptideStart"] <= p]

        if q.isdigit():
            p = int(q)
            return df[(df["peptideStart"] <= p) & (df["peptideEnd"] >= p)]

        return df[df["peptideSequence"].astype(str).str.contains(q, case=False, na=False)]

    # -------------------------
    # 5) UI (Voila-safe: no display here)
    # -------------------------
    header = widgets.HTML(
        "<b>Scop3P → AlphaFold → NGLView peptide mapper</b><br>"
        "Enter accession → Load → (optional) Search → select peptides (auto-renders)."
    )

    acc_input = widgets.Text(value=default_accession, description="ACC_ID:", layout=widgets.Layout(width="260px"))
    load_btn = widgets.Button(description="Load", button_style="primary")

    mode = widgets.ToggleButtons(
        options=["Unique peptide spans", "All rows"],
        value="Unique peptide spans",
        description="List:"
    )

    search_box = widgets.Text(
        value="",
        placeholder="Filter: substring (SSFG), range (70-90), >=150, <=300, or single pos (154)",
        description="Search:",
        layout=widgets.Layout(width="750px")
    )

    peptide_multi = widgets.SelectMultiple(
        description="Peptides:",
        options=[],
        layout=widgets.Layout(width="980px", height="240px")
    )

    show_mods_chk = widgets.Checkbox(value=True, description="Show modified sites (magenta)")
    show_mods_mode = widgets.ToggleButtons(
        options=["Selected peptides only", "All protein mods"],
        value="Selected peptides only",
        description="Mods:"
    )

    map_all_btn = widgets.Button(description="Map all (filtered)", button_style="warning")

    export_html_btn = widgets.Button(description="Export styled HTML", button_style="info")

    out = widgets.Output()

    controls_row1 = widgets.HBox([acc_input, load_btn, mode])
    controls_row2 = widgets.HBox([map_all_btn, show_mods_chk, show_mods_mode, export_html_btn])

    # Top-level widget that Voila will render
    app = widgets.VBox([header, controls_row1, search_box, peptide_multi, controls_row2, out])

    # -------------------------
    # 6) State
    # -------------------------
    STATE = {
        "df": pd.DataFrame(),
        "df_filtered": pd.DataFrame(),
        "pdb_path": None,
        "acc_loaded": None,
        "suspend_autorender": False,
        "last_action": None,  # "map_all" or "select"

        "last_union_ranges": [],
        "last_inter_pos": [],
        "last_mod_pos": [],
        "last_pdb_path": None,
    }

    def build_peptide_options(df: pd.DataFrame, mode_value: str):
        if df is None or df.empty:
            return []

        if mode_value == "Unique peptide spans":
            g = (
                df.groupby(["peptideSequence", "peptideStart", "peptideEnd"], as_index=False)
                  .agg(n_mod_sites=("uniprotPosition", "nunique"),
                       max_score=("score", "max"))
            )
            opts = []
            for _, row in g.iterrows():
                key = (row["peptideSequence"], int(row["peptideStart"]), int(row["peptideEnd"]))
                label = f'{key[0]} ({key[1]}-{key[2]}) | modSites={int(row["n_mod_sites"])} maxScore={row["max_score"]}'
                opts.append((label, key))
            return opts

        return [(r["label"], int(idx)) for idx, r in df.iterrows()]

    def ensure_loaded_assets(acc: str):
        if STATE["acc_loaded"] != acc:
            STATE["pdb_path"] = None
            STATE["acc_loaded"] = acc

        if STATE["pdb_path"] is None:
            STATE["pdb_path"] = download_alphafold_pdb(acc)

        return STATE["pdb_path"]

    def refresh_filtered_and_options(keep_selection=True):
        df = STATE["df"]
        df_filt = filter_peptides(df, search_box.value)
        STATE["df_filtered"] = df_filt

        old_sel = set(peptide_multi.value) if keep_selection else set()
        peptide_multi.options = build_peptide_options(df_filt, mode.value)

        if keep_selection and old_sel:
            valid_vals = [v for _, v in peptide_multi.options]
            restored = [v for v in valid_vals if v in old_sel]
            STATE["suspend_autorender"] = True
            try:
                peptide_multi.value = tuple(restored)
            finally:
                STATE["suspend_autorender"] = False

    def render_current_selection():
        with out:
            clear_output()

            acc = acc_input.value.strip()
            df_all = STATE["df"]
            df_filt = STATE["df_filtered"]

            if df_all is None or df_all.empty:
                print("No data loaded. Click Load.")
                return

            selected = list(peptide_multi.value)
            if not selected:
                print("Select at least one peptide (or click 'Map all (filtered)').")
                return

            try:
                pdb_path = ensure_loaded_assets(acc)
            except Exception as e:
                print("AlphaFold download error:", e)
                return

            spans = []
            mod_positions = []

            if mode.value == "Unique peptide spans":
                spans = [(int(s), int(e)) for _, s, e in selected]

                if show_mods_mode.value == "Selected peptides only":
                    for pepSeq, s, e in selected:
                        sub = df_all[
                            (df_all["peptideSequence"] == pepSeq)
                            & (df_all["peptideStart"] == int(s))
                            & (df_all["peptideEnd"] == int(e))
                        ]
                        mod_positions.extend(sub["uniprotPosition"].dropna().astype(int).tolist())
                else:
                    mod_positions = df_all["uniprotPosition"].dropna().astype(int).tolist()

            else:
                sub = df_filt.loc[selected].copy()
                spans = [(int(r["peptideStart"]), int(r["peptideEnd"])) for _, r in sub.iterrows()]

                if show_mods_mode.value == "Selected peptides only":
                    mod_positions = sub["uniprotPosition"].dropna().astype(int).tolist()
                else:
                    mod_positions = df_all["uniprotPosition"].dropna().astype(int).tolist()

            # union/intersection
            pos_lists = [list(range(a, b + 1)) for a, b in spans]
            union_pos = sorted(set(p for L in pos_lists for p in L))
            inter_pos = sorted(set(pos_lists[0]).intersection(*map(set, pos_lists[1:]))) if len(pos_lists) > 1 else []
            union_ranges = positions_to_ranges(union_pos)

            # --- Big NGL panel ---
            view = nv.NGLWidget()
            view.add_component(pdb_path)
            view.clear_representations()
            view.add_cartoon(color="silver")

            add_cartoon_selection(view, union_ranges, color="blue", name="peptide_union")

            if inter_pos:
                add_positions(view, inter_pos, color="red", name="peptide_intersection", repr_type="ball+stick")

            if show_mods_chk.value and mod_positions:
                add_positions(view, mod_positions, color="magenta", name="mods", repr_type="ball+stick")

            view.center()
            view.layout = widgets.Layout(width="1100px", height="700px")

            display(view)

            # store last render state
            STATE["last_union_ranges"] = union_ranges
            STATE["last_inter_pos"] = sorted(set(int(x) for x in inter_pos))
            STATE["last_mod_pos"] = sorted(set(int(x) for x in mod_positions))
            STATE["last_pdb_path"] = pdb_path

            spans_sorted = sorted(spans, key=lambda x: (x[0], x[1]))
            first_start = spans_sorted[0][0]
            last_end = spans_sorted[-1][1]

            print(f"\nACC_ID: {acc}")
            print(f"AlphaFold model: {pdb_path}")
            print(f"Selected peptide spans: {len(spans_sorted)}")
            print(f"Coverage: {first_start} → {last_end}")

            if inter_pos:
                print(f"Intersection (red): {len(inter_pos)} residues")
            else:
                print("Intersection: none (only one peptide)")

            if show_mods_chk.value:
                print(f"Modified sites (magenta): {len(set(mod_positions))} unique positions")

    def _write_styled_ngl_html(acc, pdb_path, union_ranges, inter_pos, mod_pos, out_html_path, auto_download_png=False):
        pdb_text = Path(pdb_path).read_text(errors="ignore")

        payload = {
            "acc": acc,
            "union_ranges": union_ranges,
            "intersection": inter_pos,
            "mods": mod_pos
        }

        auto_png_js = """
      stage.makeImage({ factor: 2, antialias: true, trim: false }).then(function (blob) {
        var a = document.createElement("a");
        a.href = URL.createObjectURL(blob);
        a.download = payload.acc + "_snapshot.png";
        document.body.appendChild(a);
        a.click();
        a.remove();
      });
        """ if auto_download_png else ""

        html = f"""<!doctype html>
<html>
<head>
  <meta charset="utf-8"/>
  <title>{acc} styled NGL session</title>
  <style>
    body {{ margin: 0; font-family: sans-serif; }}
    #viewport {{ width: 100vw; height: 100vh; }}
    #panel {{
      position: absolute; top: 10px; left: 10px; z-index: 10;
      background: rgba(255,255,255,0.9); padding: 10px; border-radius: 8px;
      max-width: 520px;
    }}
    code {{ font-size: 12px; }}
  </style>
  <script src="https://unpkg.com/ngl@latest/dist/ngl.js"></script>
</head>
<body>
  <div id="panel">
    <b>{acc}</b><br/>
    <div>Grey: protein | Blue: peptides | Red: intersection | Magenta: mods</div>
    <div style="margin-top:6px;"><code>union ranges: {len(union_ranges)} | mods: {len(mod_pos)} | intersection: {len(inter_pos)}</code></div>
  </div>
  <div id="viewport"></div>

  <script>
    const pdbText = {json.dumps(pdb_text)};
    const payload = {json.dumps(payload)};

    function rangesToSelection(ranges) {{
      if (!ranges || ranges.length === 0) return "";
      return ranges.map(r => `resi ${{r[0]}}-${{r[1]}}`).join(" OR ");
    }}

    function positionsToSelection(pos) {{
      if (!pos || pos.length === 0) return "";
      return pos.map(p => `resi ${{p}}`).join(" OR ");
    }}

    const stage = new NGL.Stage("viewport", {{ backgroundColor: "white" }});
    window.addEventListener("resize", () => stage.handleResize(), false);

    const blob = new Blob([pdbText], {{type: "text/plain"}});
    stage.loadFile(blob, {{ ext: "pdb" }}).then(comp => {{
      comp.addRepresentation("cartoon", {{ color: "grey" }});

      const pepSel = rangesToSelection(payload.union_ranges);
      if (pepSel) {{
        comp.addRepresentation("cartoon", {{ sele: pepSel, color: "blue" }});
      }}

      const interSel = positionsToSelection(payload.intersection);
      if (interSel) {{
        comp.addRepresentation("ball+stick", {{ sele: interSel, color: "red" }});
      }}

      const modSel = positionsToSelection(payload.mods);
      if (modSel) {{
        comp.addRepresentation("ball+stick", {{ sele: modSel, color: "magenta" }});
      }}

      comp.autoView();
      {auto_png_js}
    }});
  </script>
</body>
</html>
"""
        Path(out_html_path).write_text(html, encoding="utf-8")

    def on_export_html(_):
        with out:
            acc = acc_input.value.strip()
            if not acc:
                print("Enter an accession first.")
                return
            if not STATE.get("last_pdb_path"):
                print("Render a selection first (so peptides/mods exist) before exporting.")
                return

            export_dir = Path("exports")
            export_dir.mkdir(exist_ok=True)
            html_path = export_dir / f"{acc}_styled_session.html"

            _write_styled_ngl_html(
                acc=acc,
                pdb_path=STATE["last_pdb_path"],
                union_ranges=STATE["last_union_ranges"],
                inter_pos=STATE["last_inter_pos"],
                mod_pos=STATE["last_mod_pos"],
                out_html_path=html_path,
                auto_download_png=False
            )

            print(f"Exported styled HTML to: {html_path.resolve()}")
            print("Open it in a browser — peptide/mod colors are preserved.")

    # -------------------------
    # 7) Events
    # -------------------------
    def on_load(_):
        with out:
            clear_output()
            acc = acc_input.value.strip()
            if not acc:
                print("Enter an accession (e.g., O00571) then click Load.")
                return

            try:
                df = fetch_scop3p_peptides(acc)
            except Exception as e:
                print("API error:", e)
                STATE["df"] = pd.DataFrame()
                STATE["df_filtered"] = pd.DataFrame()
                peptide_multi.options = []
                return

            if df.empty:
                print(f"No peptides returned for {acc}.")
                STATE["df"] = df
                STATE["df_filtered"] = df
                peptide_multi.options = []
                return

            STATE["df"] = df
            STATE["acc_loaded"] = acc
            STATE["pdb_path"] = None
            STATE["last_action"] = None

            refresh_filtered_and_options(keep_selection=False)

            print(f"Loaded {len(df)} Scop3P peptide-mod rows for {acc}.")
            print("Use Search to filter; select peptides to map (auto-renders).")

    def on_mode_change(_):
        refresh_filtered_and_options(keep_selection=True)
        if peptide_multi.value:
            STATE["last_action"] = "select"
            render_current_selection()

    def on_search_change(_):
        refresh_filtered_and_options(keep_selection=True)
        if peptide_multi.value:
            STATE["last_action"] = "select"
            render_current_selection()

    def on_selection_change(_):
        if STATE["suspend_autorender"]:
            return
        if peptide_multi.value:
            STATE["last_action"] = "select"
            render_current_selection()

    def on_mods_toggle(_):
        if peptide_multi.value:
            STATE["last_action"] = "select"
            render_current_selection()

    def on_map_all(_):
        STATE["last_action"] = "map_all"
        STATE["suspend_autorender"] = True
        try:
            all_vals = [v for (_, v) in peptide_multi.options]
            peptide_multi.value = tuple(all_vals)
        finally:
            STATE["suspend_autorender"] = False
        render_current_selection()

    load_btn.on_click(on_load)
    mode.observe(on_mode_change, names="value")
    search_box.observe(on_search_change, names="value")
    peptide_multi.observe(on_selection_change, names="value")
    show_mods_chk.observe(on_mods_toggle, names="value")
    show_mods_mode.observe(on_mods_toggle, names="value")
    map_all_btn.on_click(on_map_all)
    export_html_btn.on_click(on_export_html)

    # IMPORTANT: return the widget tree (Voila-compatible)
    return app


# Notebook usage:
app = scop3p_ngl_mapper_app("")
display(app)


VBox(children=(HTML(value='<b>Scop3P → AlphaFold → NGLView peptide mapper</b><br>Enter accession → Load → (opt…