# Scop3P

A comprehensive database of human phosphosites within their full context. Scop3P integrates sequences (UniProtKB/Swiss-Prot), structures (PDB), and uniformly reprocessed phosphoproteomics data (PRIDE) to annotate all known human phosphosites. 

Scop3P, available at https://iomics.ugent.be/scop3p, presents a unique resource for visualization and analysis of phosphosites and for understanding of phosphosite structure–function relationships.

Please cite: https://doi.org/10.1021/acs.jproteome.0c00306


### Install Dependencies

In [2]:
%%capture
!jupyter labextension install jupyterlab_3dmol
!jupyter labextension install @jupyter-widgets/jupyterlab-manager
!pip install pandas matplotlib py3Dmol nglview

### Import required packages

In [3]:
%%capture
import requests, tempfile,json,sys
import pandas as pd 
from b2bTools import SingleSeq, constants
import py3Dmol
import ipywidgets as widgets

### Fetch phospho peptides from Scop3P and map onto protein structures
> 1. Enter the protein ID (ex: P07949) and click 'Load'
> 2. The app will let you choose between all peptides ('All rows') and unique spans (the peptide ranges are merged)
> 3. Map all petides on the AF structure using 'Map all' (shows the mass spec coverage of your protein)
> 4. Alternatively click one or multiple peptides on peptide panel to see their structural mapping
> 5. Hint:
> > Explore what the search funtion does!

In [4]:
import pandas as pd
import requests
import urllib.request
from urllib.error import HTTPError, URLError
import re

import ipywidgets as widgets
from IPython.display import display, clear_output
import nglview as nv

# --- added (export only) ---
from pathlib import Path
import json


def scop3p_ngl_mapper_app(default_accession=""):
    # -------------------------
    # 1) Scop3P API fetch
    # -------------------------
    def fetch_scop3p_peptides(accession: str) -> pd.DataFrame:
        url = f"https://iomics.ugent.be/scop3p/api/get-peptides-modifications?accession={accession}"
        r = requests.get(url, timeout=30)
        r.raise_for_status()
        data = r.json()

        df = pd.DataFrame(data.get("peptides", []))
        if df.empty:
            return df

        for c in ["peptideStart", "peptideEnd", "peptideModificationPosition", "uniprotPosition"]:
            if c in df.columns:
                df[c] = pd.to_numeric(df[c], errors="coerce").astype("Int64")

        df["label"] = df.apply(
            lambda x: (
                f'{x["peptideSequence"]} ({int(x["peptideStart"])}-{int(x["peptideEnd"])}) '
                f'@{x.get("modifiedResidue","")}{int(x["uniprotPosition"])} score={x.get("score","")}'
            ),
            axis=1
        )
        return df

    # -------------------------
    # 2) AlphaFold download (fallback v6 -> v4)
    # -------------------------
    def download_alphafold_pdb(accession: str, versions=("v6", "v4")) -> str:
        base = "https://alphafold.ebi.ac.uk/files"
        last_err = None

        for v in versions:
            pdb_name = f"AF-{accession}-F1-model_{v}.pdb"
            url = f"{base}/{pdb_name}"
            out = f"{accession}.pdb"
            try:
                urllib.request.urlretrieve(url, out)

                import os
                if os.path.getsize(out) < 1000:
                    raise RuntimeError(f"Downloaded file too small from {url}")

                return out
            except (HTTPError, URLError, RuntimeError) as e:
                last_err = e

        raise RuntimeError(f"Could not download AlphaFold PDB for {accession}. Last error: {last_err}")

    # -------------------------
    # 3) NGL helpers
    # -------------------------
    def positions_to_ranges(pos_list):
        if not pos_list:
            return []
        pos_list = sorted(set(int(p) for p in pos_list))
        ranges = []
        s = pos_list[0]
        prev = pos_list[0]
        for x in pos_list[1:]:
            if x == prev + 1:
                prev = x
            else:
                ranges.append((s, prev))
                s = x
                prev = x
        ranges.append((s, prev))
        return ranges

    def add_cartoon_selection(view, ranges, color, name):
        if not ranges:
            return
        selection = " or ".join([f"resi {a}-{b}" for a, b in ranges])
        view.add_representation("cartoon", selection=selection, color=color, name=name)

    def add_positions(view, positions, color, name, repr_type="ball+stick"):
        if not positions:
            return
        selection = " or ".join([f"resi {int(p)}" for p in sorted(set(int(p) for p in positions))])
        view.add_representation(repr_type, selection=selection, color=color, name=name)

    # -------------------------
    # 4) Filter logic
    # -------------------------
    def filter_peptides(df: pd.DataFrame, query: str) -> pd.DataFrame:
        if df is None or df.empty:
            return df
        if not query:
            return df

        q = query.strip()

        m = re.match(r"^(\d+)\s*-\s*(\d+)$", q)
        if m:
            a, b = int(m.group(1)), int(m.group(2))
            return df[(df["peptideStart"] <= b) & (df["peptideEnd"] >= a)]

        m = re.match(r"^>=\s*(\d+)$", q)
        if m:
            p = int(m.group(1))
            return df[df["peptideEnd"] >= p]

        m = re.match(r"^<=\s*(\d+)$", q)
        if m:
            p = int(m.group(1))
            return df[df["peptideStart"] <= p]

        if q.isdigit():
            p = int(q)
            return df[(df["peptideStart"] <= p) & (df["peptideEnd"] >= p)]

        return df[df["peptideSequence"].astype(str).str.contains(q, case=False, na=False)]

    # -------------------------
    # 5) UI
    # -------------------------
    acc_input = widgets.Text(value=default_accession, description="ACC_ID:", layout=widgets.Layout(width="260px"))
    load_btn = widgets.Button(description="Load", button_style="primary")

    mode = widgets.ToggleButtons(
        options=["Unique peptide spans", "All rows"],
        value="Unique peptide spans",
        description="List:"
    )

    search_box = widgets.Text(
        value="",
        placeholder="Filter: substring (SSFG), range (70-90), >=150, <=300, or single pos (154)",
        description="Search:",
        layout=widgets.Layout(width="750px")
    )

    peptide_multi = widgets.SelectMultiple(
        description="Peptides:",
        options=[],
        layout=widgets.Layout(width="980px", height="240px")
    )

    show_mods_chk = widgets.Checkbox(value=True, description="Show modified sites (magenta)")
    show_mods_mode = widgets.ToggleButtons(
        options=["Selected peptides only", "All protein mods"],
        value="Selected peptides only",
        description="Mods:"
    )

    map_all_btn = widgets.Button(description="Map all (filtered)", button_style="warning")

    # --- added (export only) ---
    export_html_btn = widgets.Button(description="Export styled HTML", button_style="info")
    # export_png_btn  = widgets.Button(description="Export PNG (via HTML)", button_style="info")

    out = widgets.Output()

    display(widgets.HTML(
        "<b>Scop3P → AlphaFold → NGLView peptide mapper</b><br>"
        "Enter accession → Load → (optional) Search → select peptides (auto-renders)."
    ))
    display(widgets.HBox([acc_input, load_btn, mode]))
    display(search_box)
    display(peptide_multi)
    display(widgets.HBox([map_all_btn, show_mods_chk, show_mods_mode, export_html_btn])) #export_png_btn
    display(out)

    # -------------------------
    # 6) State
    # -------------------------
    STATE = {
        "df": pd.DataFrame(),
        "df_filtered": pd.DataFrame(),
        "pdb_path": None,
        "acc_loaded": None,
        "suspend_autorender": False,
        "last_action": None,  # "map_all" or "select"

        # --- added (export only) ---
        "last_union_ranges": [],
        "last_inter_pos": [],
        "last_mod_pos": [],
        "last_pdb_path": None,
    }

    def build_peptide_options(df: pd.DataFrame, mode_value: str):
        if df is None or df.empty:
            return []

        if mode_value == "Unique peptide spans":
            g = (
                df.groupby(["peptideSequence", "peptideStart", "peptideEnd"], as_index=False)
                  .agg(n_mod_sites=("uniprotPosition", "nunique"),
                       max_score=("score", "max"))
            )
            opts = []
            for _, row in g.iterrows():
                key = (row["peptideSequence"], int(row["peptideStart"]), int(row["peptideEnd"]))
                label = f'{key[0]} ({key[1]}-{key[2]}) | modSites={int(row["n_mod_sites"])} maxScore={row["max_score"]}'
                opts.append((label, key))
            return opts

        return [(r["label"], int(idx)) for idx, r in df.iterrows()]

    def ensure_loaded_assets(acc: str):
        if STATE["acc_loaded"] != acc:
            STATE["pdb_path"] = None
            STATE["acc_loaded"] = acc

        if STATE["pdb_path"] is None:
            STATE["pdb_path"] = download_alphafold_pdb(acc)

        return STATE["pdb_path"]

    def refresh_filtered_and_options(keep_selection=True):
        df = STATE["df"]
        df_filt = filter_peptides(df, search_box.value)
        STATE["df_filtered"] = df_filt

        old_sel = set(peptide_multi.value) if keep_selection else set()
        peptide_multi.options = build_peptide_options(df_filt, mode.value)

        if keep_selection and old_sel:
            valid_vals = [v for _, v in peptide_multi.options]
            restored = [v for v in valid_vals if v in old_sel]
            STATE["suspend_autorender"] = True
            try:
                peptide_multi.value = tuple(restored)
            finally:
                STATE["suspend_autorender"] = False

    def render_current_selection():
        with out:
            clear_output()

            acc = acc_input.value.strip()
            df_all = STATE["df"]
            df_filt = STATE["df_filtered"]

            if df_all is None or df_all.empty:
                print("No data loaded. Click Load.")
                return

            selected = list(peptide_multi.value)
            if not selected:
                print("Select at least one peptide (or click 'Map all (filtered)').")
                return

            try:
                pdb_path = ensure_loaded_assets(acc)
            except Exception as e:
                print("AlphaFold download error:", e)
                return

            spans = []
            mod_positions = []

            if mode.value == "Unique peptide spans":
                spans = [(int(s), int(e)) for _, s, e in selected]

                if show_mods_mode.value == "Selected peptides only":
                    for pepSeq, s, e in selected:
                        sub = df_all[
                            (df_all["peptideSequence"] == pepSeq)
                            & (df_all["peptideStart"] == int(s))
                            & (df_all["peptideEnd"] == int(e))
                        ]
                        mod_positions.extend(sub["uniprotPosition"].dropna().astype(int).tolist())
                else:
                    mod_positions = df_all["uniprotPosition"].dropna().astype(int).tolist()

            else:
                sub = df_filt.loc[selected].copy()
                spans = [(int(r["peptideStart"]), int(r["peptideEnd"])) for _, r in sub.iterrows()]

                if show_mods_mode.value == "Selected peptides only":
                    mod_positions = sub["uniprotPosition"].dropna().astype(int).tolist()
                else:
                    mod_positions = df_all["uniprotPosition"].dropna().astype(int).tolist()

            # union/intersection
            pos_lists = [list(range(a, b + 1)) for a, b in spans]
            union_pos = sorted(set(p for L in pos_lists for p in L))
            inter_pos = sorted(set(pos_lists[0]).intersection(*map(set, pos_lists[1:]))) if len(pos_lists) > 1 else []
            union_ranges = positions_to_ranges(union_pos)

            # --- Big NGL panel ---
            view = nv.NGLWidget()
            view.add_component(pdb_path)
            view.clear_representations()
            view.add_cartoon(color="silver")

            add_cartoon_selection(view, union_ranges, color="blue", name="peptide_union")

            if inter_pos:
                add_positions(view, inter_pos, color="red", name="peptide_intersection", repr_type="ball+stick")

            if show_mods_chk.value and mod_positions:
                add_positions(view, mod_positions, color="magenta", name="mods", repr_type="ball+stick")

            view.center()

            # Make structure panel bigger (tweak as you like)
            view.layout = widgets.Layout(width="1100px", height="700px")

            display(view)

            # --- added (export only): store last render state ---
            STATE["last_union_ranges"] = union_ranges
            STATE["last_inter_pos"] = sorted(set(int(x) for x in inter_pos))
            STATE["last_mod_pos"] = sorted(set(int(x) for x in mod_positions))
            STATE["last_pdb_path"] = pdb_path

            # --- Summary printing tweaks ---
            spans_sorted = sorted(spans, key=lambda x: (x[0], x[1]))
            first_start = spans_sorted[0][0]
            last_end = spans_sorted[-1][1]

            print(f"\nACC_ID: {acc}")
            print(f"AlphaFold model: {pdb_path}")
            print(f"Selected peptide spans: {len(spans_sorted)}")

            # When map-all clicked, show compact coverage (also useful generally)
            if STATE["last_action"] == "map_all":
                print(f"Coverage (first peptide start → last peptide end): {first_start} → {last_end}")
            else:
                # For manual selection, still show compact coverage (less spammy)
                print(f"Coverage: {first_start} → {last_end}")

            if inter_pos:
                print(f"Intersection (red): {len(inter_pos)} residues")
            else:
                print("Intersection: none (only one peptide)")

            if show_mods_chk.value:
                print(f"Modified sites (magenta): {len(set(mod_positions))} unique positions")

    # -------------------------
    # --- added (export only): standalone styled HTML writer ---
    # -------------------------
    def _write_styled_ngl_html(acc, pdb_path, union_ranges, inter_pos, mod_pos, out_html_path, auto_download_png=False):
        pdb_text = Path(pdb_path).read_text(errors="ignore")

        payload = {
            "acc": acc,
            "union_ranges": union_ranges,
            "intersection": inter_pos,
            "mods": mod_pos
        }

        # If auto_download_png=True, the HTML will immediately trigger a PNG download via stage.makeImage()
        auto_png_js = """
      // Auto-download PNG snapshot
      stage.makeImage({ factor: 2, antialias: true, trim: false }).then(function (blob) {
        var a = document.createElement("a");
        a.href = URL.createObjectURL(blob);
        a.download = payload.acc + "_snapshot.png";
        document.body.appendChild(a);
        a.click();
        a.remove();
      });
        """ if auto_download_png else ""

        html = f"""<!doctype html>
<html>
<head>
  <meta charset="utf-8"/>
  <title>{acc} styled NGL session</title>
  <style>
    body {{ margin: 0; font-family: sans-serif; }}
    #viewport {{ width: 100vw; height: 100vh; }}
    #panel {{
      position: absolute; top: 10px; left: 10px; z-index: 10;
      background: rgba(255,255,255,0.9); padding: 10px; border-radius: 8px;
      max-width: 520px;
    }}
    code {{ font-size: 12px; }}
  </style>
  <script src="https://unpkg.com/ngl@latest/dist/ngl.js"></script>
</head>
<body>
  <div id="panel">
    <b>{acc}</b><br/>
    <div>Grey: protein | Blue: peptides | Red: intersection | Magenta: mods</div>
    <div style="margin-top:6px;"><code>union ranges: {len(union_ranges)} | mods: {len(mod_pos)} | intersection: {len(inter_pos)}</code></div>
  </div>
  <div id="viewport"></div>

  <script>
    const pdbText = {json.dumps(pdb_text)};
    const payload = {json.dumps(payload)};

    function rangesToSelection(ranges) {{
      if (!ranges || ranges.length === 0) return "";
      return ranges.map(r => `resi ${{r[0]}}-${{r[1]}}`).join(" OR ");
    }}

    function positionsToSelection(pos) {{
      if (!pos || pos.length === 0) return "";
      return pos.map(p => `resi ${{p}}`).join(" OR ");
    }}

    const stage = new NGL.Stage("viewport", {{ backgroundColor: "white" }});
    window.addEventListener("resize", () => stage.handleResize(), false);

    const blob = new Blob([pdbText], {{type: "text/plain"}});
    stage.loadFile(blob, {{ ext: "pdb" }}).then(comp => {{
      comp.addRepresentation("cartoon", {{ color: "grey" }});

      const pepSel = rangesToSelection(payload.union_ranges);
      if (pepSel) {{
        comp.addRepresentation("cartoon", {{ sele: pepSel, color: "blue" }});
      }}

      const interSel = positionsToSelection(payload.intersection);
      if (interSel) {{
        comp.addRepresentation("ball+stick", {{ sele: interSel, color: "red" }});
      }}

      const modSel = positionsToSelection(payload.mods);
      if (modSel) {{
        comp.addRepresentation("ball+stick", {{ sele: modSel, color: "magenta" }});
      }}

      comp.autoView();
      {auto_png_js}
    }});
  </script>
</body>
</html>
"""
        Path(out_html_path).write_text(html, encoding="utf-8")

    # -------------------------
    # --- added (export only): export button actions ---
    # -------------------------
    def on_export_html(_):
        with out:
            acc = acc_input.value.strip()
            if not acc:
                print("Enter an accession first.")
                return
            if not STATE.get("last_pdb_path"):
                print("Render a selection first (so peptides/mods exist) before exporting.")
                return

            export_dir = Path("exports")
            export_dir.mkdir(exist_ok=True)
            html_path = export_dir / f"{acc}_styled_session.html"

            _write_styled_ngl_html(
                acc=acc,
                pdb_path=STATE["last_pdb_path"],
                union_ranges=STATE["last_union_ranges"],
                inter_pos=STATE["last_inter_pos"],
                mod_pos=STATE["last_mod_pos"],
                out_html_path=html_path,
                auto_download_png=False
            )

            print(f"Exported styled HTML to: {html_path.resolve()}")
            print("Open it in a browser — peptide/mod colors are preserved.")

    def on_export_png(_):
        with out:
            acc = acc_input.value.strip()
            if not acc:
                print("Enter an accession first.")
                return
            if not STATE.get("last_pdb_path"):
                print("Render a selection first (so peptides/mods exist) before exporting.")
                return

            export_dir = Path("exports")
            export_dir.mkdir(exist_ok=True)
            html_path = export_dir / f"{acc}_snapshot_downloader.html"

            _write_styled_ngl_html(
                acc=acc,
                pdb_path=STATE["last_pdb_path"],
                union_ranges=STATE["last_union_ranges"],
                inter_pos=STATE["last_inter_pos"],
                mod_pos=STATE["last_mod_pos"],
                out_html_path=html_path,
                auto_download_png=True
            )

            print(f"Created PNG downloader HTML: {html_path.resolve()}")
            print("Open this HTML in a browser — it will auto-download the PNG snapshot with styling.")

    # -------------------------
    # 7) Events
    # -------------------------
    def on_load(_):
        with out:
            clear_output()
            acc = acc_input.value.strip()
            if not acc:
                print("Enter an accession (e.g., O00571) then click Load.")
                return

            try:
                df = fetch_scop3p_peptides(acc)
            except Exception as e:
                print("API error:", e)
                STATE["df"] = pd.DataFrame()
                STATE["df_filtered"] = pd.DataFrame()
                peptide_multi.options = []
                return

            if df.empty:
                print(f"No peptides returned for {acc}.")
                STATE["df"] = df
                STATE["df_filtered"] = df
                peptide_multi.options = []
                return

            STATE["df"] = df
            STATE["acc_loaded"] = acc
            STATE["pdb_path"] = None
            STATE["last_action"] = None

            refresh_filtered_and_options(keep_selection=False)

            print(f"Loaded {len(df)} Scop3P peptide-mod rows for {acc}.")
            print("Use Search to filter; select peptides to map (auto-renders).")

    def on_mode_change(_):
        refresh_filtered_and_options(keep_selection=True)
        if peptide_multi.value:
            STATE["last_action"] = "select"
            render_current_selection()

    def on_search_change(_):
        refresh_filtered_and_options(keep_selection=True)
        if peptide_multi.value:
            STATE["last_action"] = "select"
            render_current_selection()

    def on_selection_change(_):
        if STATE["suspend_autorender"]:
            return
        if peptide_multi.value:
            STATE["last_action"] = "select"
            render_current_selection()

    def on_mods_toggle(_):
        if peptide_multi.value:
            STATE["last_action"] = "select"
            render_current_selection()

    def on_map_all(_):
        STATE["last_action"] = "map_all"
        STATE["suspend_autorender"] = True
        try:
            all_vals = [v for (_, v) in peptide_multi.options]
            peptide_multi.value = tuple(all_vals)
        finally:
            STATE["suspend_autorender"] = False
        render_current_selection()

    load_btn.on_click(on_load)
    mode.observe(on_mode_change, names="value")
    search_box.observe(on_search_change, names="value")
    peptide_multi.observe(on_selection_change, names="value")
    show_mods_chk.observe(on_mods_toggle, names="value")
    show_mods_mode.observe(on_mods_toggle, names="value")
    map_all_btn.on_click(on_map_all)

    # --- added (export only) ---
    export_html_btn.on_click(on_export_html)
    # export_png_btn.on_click(on_export_png)

    # No return → avoids printing the dict at cell end
    return None


# Run the app (empty default forces user to type accession)
scop3p_ngl_mapper_app("")




HTML(value='<b>Scop3P → AlphaFold → NGLView peptide mapper</b><br>Enter accession → Load → (optional) Search →…

HBox(children=(Text(value='', description='ACC_ID:', layout=Layout(width='260px')), Button(button_style='prima…

Text(value='', description='Search:', layout=Layout(width='750px'), placeholder='Filter: substring (SSFG), ran…

SelectMultiple(description='Peptides:', layout=Layout(height='240px', width='980px'), options=(), value=())



Output()

### Upload your own data (Peptide table)
#### Gets a peptide file and visualize the peptides and modifications on protein structures
> 1. Choose peptide file (tsv/tab delimkited text file)  and click 'Load file'
> 2. The app will autofill the columns using keywords or enter the column headers
>> (protein ID column name [ACC_ID], peptide seq column name [pep_seq] and the modified position column name [UP_POS]
> 3. Click 'Build mapping'
> 4. Select the protein from list for which you want to map the peptides (note: these proteins are identified in your sample)
> 5. Map all petides on the AF structure using 'Map all' (shows the mass spec coverage of your protein)
> 6. Alternatively click one or multiple peptides on peptide panel to see their structural mapping
> 7. Hint:
>> Explore what the search funtion does!


In [6]:
import pandas as pd
import urllib.request
from urllib.error import HTTPError, URLError
from pathlib import Path
import shutil
import json

import ipywidgets as widgets
from IPython.display import display, clear_output
import nglview as nv


def peptide_upload_ngl_mapper_app():
    # -------------------------
    # AlphaFold download
    # -------------------------
    def download_alphafold_pdb(accession: str, versions=("v6", "v4")) -> str:
        base = "https://alphafold.ebi.ac.uk/files"
        last_err = None
        for v in versions:
            pdb_name = f"AF-{accession}-F1-model_{v}.pdb"
            url = f"{base}/{pdb_name}"
            out = f"{accession}.pdb"
            try:
                urllib.request.urlretrieve(url, out)
                import os
                if os.path.getsize(out) < 1000:
                    raise RuntimeError("Downloaded file too small")
                return out
            except (HTTPError, URLError, RuntimeError) as e:
                last_err = e
        raise RuntimeError(f"No AlphaFold structure for {accession}: {last_err}")

    # -------------------------
    # NGL helpers
    # -------------------------
    def positions_to_ranges(pos_list):
        if not pos_list:
            return []
        pos_list = sorted(set(int(p) for p in pos_list))
        ranges = []
        s = pos_list[0]
        prev = pos_list[0]
        for x in pos_list[1:]:
            if x == prev + 1:
                prev = x
            else:
                ranges.append((s, prev))
                s = x
                prev = x
        ranges.append((s, prev))
        return ranges

    def add_cartoon_selection(view, ranges, color="blue", name="peptides"):
        if not ranges:
            return
        sel = " or ".join([f"resi {a}-{b}" for a, b in ranges])
        view.add_representation("cartoon", selection=sel, color=color, name=name)

    def add_positions(view, positions, color="magenta", name="sites"):
        if not positions:
            return
        sel = " or ".join([f"resi {int(p)}" for p in sorted(set(int(p) for p in positions))])
        view.add_representation("ball+stick", selection=sel, color=color, name=name)

    # -------------------------
    # Robust upload reader
    # -------------------------
    def read_uploaded_table(upload_widget):
        if not upload_widget.value:
            return None, "No file uploaded."

        v = upload_widget.value
        if isinstance(v, dict):
            item = next(iter(v.values()))
            filename = item.get("metadata", {}).get("name", "uploaded")
            content = item.get("content", None)
        elif isinstance(v, (list, tuple)) and len(v) > 0:
            item = v[0]
            filename = item.get("name", "uploaded")
            content = item.get("content", None) or item.get("data", None)
        else:
            return None, "Unrecognized upload payload."

        if content is None:
            return None, "Uploaded file content not found."

        bio = pd.io.common.BytesIO(content)
        try:
            df = pd.read_csv(bio, sep="\t")
        except Exception:
            bio = pd.io.common.BytesIO(content)
            df = pd.read_csv(bio, sep=",")

        return df, filename

    # -------------------------
    # UI
    # -------------------------
    upload = widgets.FileUpload(accept=".tsv,.txt,.csv", multiple=False, description="Choose file")
    load_file_btn = widgets.Button(description="Load file", button_style="primary")

    protein_col = widgets.Dropdown(description="Protein ID:", options=[])
    pepseq_col  = widgets.Dropdown(description="Pep seq:", options=[])
    pepstart_col = widgets.Dropdown(description="pep_start:", options=[])
    pepend_col   = widgets.Dropdown(description="pep_end:", options=[])
    uppos_col   = widgets.Dropdown(description="UP_POS:", options=[])

    build_btn = widgets.Button(description="Build mapping", button_style="success")

    protein_dd = widgets.Dropdown(description="Protein:", options=[])
    list_mode = widgets.ToggleButtons(options=["Unique peptide spans", "All rows"], value="Unique peptide spans", description="List:")

    peptide_multi = widgets.SelectMultiple(
        description="Peptides:",
        options=[],
        layout=widgets.Layout(width="980px", height="240px")
    )

    map_all_btn = widgets.Button(description="Map all peptides", button_style="warning")

    export_pdb_btn = widgets.Button(description="Export PDB", button_style="info")
    export_html_btn = widgets.Button(description="Export styled HTML session", button_style="info")
    export_sel_btn = widgets.Button(description="Export selections (TSV)", button_style="info")

    show_mods_chk = widgets.Checkbox(value=True, description="Show mods (magenta)")
    show_mods_mode = widgets.ToggleButtons(
        options=["Selected peptides only", "All protein mods"],
        value="Selected peptides only",
        description="Mods:"
    )

    viewer_size = widgets.Dropdown(
        options=[("Large", ("1100px","700px")), ("Medium", ("900px","550px")), ("Huge", ("1300px","850px"))],
        value=("1100px","700px"),
        description="Viewer:"
    )

    out = widgets.Output()

    display(widgets.HTML("<b>Upload peptide table (protein-mapped pep_start/pep_end) → map to AlphaFold with NGLView</b>"))
    display(widgets.HBox([upload, load_file_btn]))
    display(widgets.HBox([protein_col, pepseq_col, pepstart_col, pepend_col, uppos_col, build_btn]))
    display(widgets.HBox([protein_dd, list_mode, viewer_size]))
    display(peptide_multi)
    display(widgets.HBox([
        map_all_btn,
        export_pdb_btn,
        export_html_btn,
        export_sel_btn,
        show_mods_chk,
        show_mods_mode
    ]))
    display(out)

    # -------------------------
    # State
    # -------------------------
    STATE = {
        "raw_df": pd.DataFrame(),
        "map_df": pd.DataFrame(),
        "suspend": False,
        "last_action": None,

        # last render info for exports
        "last_acc": None,
        "last_pdb_path": None,
        "last_union_ranges": [],
        "last_inter_positions": [],
        "last_mod_positions": [],
    }

    # -------------------------
    # Utilities
    # -------------------------
    def guess_column(cols, candidates):
        cols_lower = {c.lower(): c for c in cols}
        for cand in candidates:
            if cand.lower() in cols_lower:
                return cols_lower[cand.lower()]
        return None

    def build_mapping_df():
        df = STATE["raw_df"]
        if df is None or df.empty:
            return pd.DataFrame()

        chosen = {
            "ACC_ID": protein_col.value,
            "Pep_seq": pepseq_col.value,
            "pep_start": pepstart_col.value,
            "pep_end": pepend_col.value,
            "UP_POS": uppos_col.value,
        }
        if not all(chosen.values()):
            return pd.DataFrame()

        m = df[list(chosen.values())].copy()
        m.columns = list(chosen.keys())

        m["ACC_ID"] = m["ACC_ID"].astype(str).str.strip()
        m["Pep_seq"] = m["Pep_seq"].astype(str).str.strip()

        for c in ["pep_start", "pep_end", "UP_POS"]:
            m[c] = pd.to_numeric(m[c], errors="coerce")

        m = m.dropna(subset=["ACC_ID", "Pep_seq", "pep_start", "pep_end", "UP_POS"]).copy()
        m["pep_start"] = m["pep_start"].astype(int)
        m["pep_end"] = m["pep_end"].astype(int)
        m["UP_POS"] = m["UP_POS"].astype(int)

        m = m[(m["pep_start"] >= 1) & (m["pep_end"] >= m["pep_start"])].copy()
        return m

    def peptide_options_for_acc(acc):
        df = STATE["map_df"]
        df_sub = df[df["ACC_ID"] == acc].copy()
        if df_sub.empty:
            return []

        if list_mode.value == "Unique peptide spans":
            g = (
                df_sub.groupby(["Pep_seq", "pep_start", "pep_end"], as_index=False)
                      .agg(n_mod_sites=("UP_POS", "nunique"), n_rows=("UP_POS", "size"))
            )
            opts = []
            for _, r in g.iterrows():
                key = (r["Pep_seq"], int(r["pep_start"]), int(r["pep_end"]))
                label = f'{key[0]} ({key[1]}-{key[2]}) | modSites={int(r["n_mod_sites"])} rows={int(r["n_rows"])}'
                opts.append((label, key))
            return opts

        opts = []
        for idx, r in df_sub.iterrows():
            label = f'{r["Pep_seq"]} ({int(r["pep_start"])}-{int(r["pep_end"])}) @UP_POS={int(r["UP_POS"])}'
            opts.append((label, idx))
        return opts

    def refresh_peptides(acc, keep_selection=False):
        old = set(peptide_multi.value) if keep_selection else set()
        opts = peptide_options_for_acc(acc)
        peptide_multi.options = opts

        if keep_selection and old:
            valid_vals = [v for _, v in opts]
            restored = [v for v in valid_vals if v in old]
            STATE["suspend"] = True
            try:
                peptide_multi.value = tuple(restored)
            finally:
                STATE["suspend"] = False

    # -------------------------
    # Renderer
    # -------------------------
    def render_current():
        with out:
            clear_output()

            if STATE["map_df"] is None or STATE["map_df"].empty:
                print("Build the mapping table first.")
                return

            acc = protein_dd.value
            if not acc:
                print("Select a protein.")
                return

            selected = list(peptide_multi.value)
            if not selected:
                print("Select one or more peptides, or click 'Map all peptides'.")
                return

            try:
                pdb_path = download_alphafold_pdb(acc)
                STATE["last_pdb_path"] = pdb_path
            except Exception as e:
                print(f"[AlphaFold not available for {acc}]")
                print(e)
                return

            df = STATE["map_df"]
            df_sub = df[df["ACC_ID"] == acc].copy()

            spans = []
            mod_positions = []

            if list_mode.value == "Unique peptide spans":
                spans = [(int(s), int(e)) for _, s, e in selected]

                if show_mods_mode.value == "Selected peptides only":
                    for pepSeq, s, e in selected:
                        sub = df_sub[
                            (df_sub["Pep_seq"] == pepSeq) &
                            (df_sub["pep_start"] == int(s)) &
                            (df_sub["pep_end"] == int(e))
                        ]
                        mod_positions.extend(sub["UP_POS"].tolist())
                else:
                    mod_positions = df_sub["UP_POS"].tolist()

            else:
                sub = df_sub.loc[selected].copy()
                spans = list(zip(sub["pep_start"].astype(int).tolist(), sub["pep_end"].astype(int).tolist()))
                if show_mods_mode.value == "Selected peptides only":
                    mod_positions = sub["UP_POS"].astype(int).tolist()
                else:
                    mod_positions = df_sub["UP_POS"].astype(int).tolist()

            pos_lists = [list(range(a, b + 1)) for a, b in spans]
            union_pos = sorted(set(p for L in pos_lists for p in L))
            inter_pos = sorted(set(pos_lists[0]).intersection(*map(set, pos_lists[1:]))) if len(pos_lists) > 1 else []
            union_ranges = positions_to_ranges(union_pos)

            view = nv.NGLWidget()
            view.add_component(pdb_path)
            view.clear_representations()
            view.add_cartoon(color="silver")

            add_cartoon_selection(view, union_ranges, color="blue", name="peptide_union")
            if inter_pos:
                add_positions(view, inter_pos, color="red", name="intersection")
            if show_mods_chk.value and mod_positions:
                add_positions(view, mod_positions, color="magenta", name="mods")

            w, h = viewer_size.value
            view.layout = widgets.Layout(width=w, height=h)
            view.center()
            display(view)

            # Save for exports
            STATE["last_acc"] = acc
            STATE["last_union_ranges"] = union_ranges
            STATE["last_inter_positions"] = sorted(set(int(x) for x in inter_pos))
            STATE["last_mod_positions"] = sorted(set(int(x) for x in mod_positions))

            print(f"ACC_ID: {acc}")
            print(f"AlphaFold: {pdb_path}")
            print(f"Union ranges: {len(union_ranges)}")
            if show_mods_chk.value:
                print(f"Unique mod sites: {len(set(mod_positions))}")

    # -------------------------
    # Export: raw PDB copy
    # -------------------------
    def on_export_pdb(_):
        with out:
            acc = protein_dd.value
            if not acc:
                print("No protein selected to export.")
                return
            try:
                pdb_path = download_alphafold_pdb(acc)
            except Exception as e:
                print(f"Cannot export: AlphaFold not available for {acc}")
                print(e)
                return

            export_dir = Path("exports")
            export_dir.mkdir(exist_ok=True)
            export_path = export_dir / f"{acc}_alphafold.pdb"
            shutil.copyfile(pdb_path, export_path)
            print(f"Exported raw PDB to: {export_path.resolve()}")

    # -------------------------
    # Export: selections TSV
    # -------------------------
    def on_export_selections(_):
        with out:
            acc = STATE.get("last_acc")
            if not acc:
                print("Nothing to export yet — render a protein first.")
                return

            export_dir = Path("exports")
            export_dir.mkdir(exist_ok=True)
            tsv_path = export_dir / f"{acc}_mapped_residues.tsv"

            union_ranges = STATE.get("last_union_ranges", [])
            mods = STATE.get("last_mod_positions", [])
            inter = STATE.get("last_inter_positions", [])

            rows = []
            for a, b in union_ranges:
                rows.append({"type": "union_range", "start": a, "end": b, "pos": ""})
            for p in mods:
                rows.append({"type": "mod_site", "start": "", "end": "", "pos": p})
            for p in inter:
                rows.append({"type": "intersection_site", "start": "", "end": "", "pos": p})

            pd.DataFrame(rows).to_csv(tsv_path, sep="\t", index=False)
            print(f"Exported selections TSV to: {tsv_path.resolve()}")

    # -------------------------
    # Export: TRUE standalone styled NGL HTML
    # -------------------------
    def on_export_styled_html(_):
        with out:
            acc = STATE.get("last_acc")
            pdb_path = STATE.get("last_pdb_path")
            if not acc or not pdb_path:
                print("Nothing to export yet — render a protein first.")
                return

            union_ranges = STATE.get("last_union_ranges", [])
            inter = STATE.get("last_inter_positions", [])
            mods = STATE.get("last_mod_positions", [])

            export_dir = Path("exports")
            export_dir.mkdir(exist_ok=True)
            html_path = export_dir / f"{acc}_styled_session.html"

            # Embed PDB text directly into HTML
            pdb_text = Path(pdb_path).read_text(errors="ignore")

            payload = {
                "acc": acc,
                "union_ranges": union_ranges,   # list of [start,end]
                "intersection": inter,          # list of positions
                "mods": mods                    # list of positions
            }

            # Standalone HTML uses NGL from CDN and applies your same styling
            html = f"""<!doctype html>
<html>
<head>
  <meta charset="utf-8"/>
  <title>{acc} styled NGL session</title>
  <style>
    body {{ margin: 0; font-family: sans-serif; }}
    #viewport {{ width: 100vw; height: 100vh; }}
    #panel {{
      position: absolute; top: 10px; left: 10px; z-index: 10;
      background: rgba(255,255,255,0.9); padding: 10px; border-radius: 8px;
      max-width: 520px;
    }}
    code {{ font-size: 12px; }}
  </style>
  <script src="https://unpkg.com/ngl@latest/dist/ngl.js"></script>
</head>
<body>
  <div id="panel">
    <b>{acc}</b><br/>
    <div>Grey: protein | Blue: peptides | Red: intersection | Magenta: mods</div>
    <div style="margin-top:6px;"><code>union ranges: {len(union_ranges)} | mods: {len(mods)} | intersection: {len(inter)}</code></div>
  </div>
  <div id="viewport"></div>

  <script>
    const pdbText = {json.dumps(pdb_text)};
    const payload = {json.dumps(payload)};

    function rangesToSelection(ranges) {{
      if (!ranges || ranges.length === 0) return "";
      return ranges.map(r => `resi ${{r[0]}}-${{r[1]}}`).join(" OR ");
    }}

    function positionsToSelection(pos) {{
      if (!pos || pos.length === 0) return "";
      return pos.map(p => `resi ${{p}}`).join(" OR ");
    }}

    const stage = new NGL.Stage("viewport", {{ backgroundColor: "white" }});
    window.addEventListener("resize", () => stage.handleResize(), false);

    // Load from a blob so this HTML is standalone
    const blob = new Blob([pdbText], {{type: "text/plain"}});
    stage.loadFile(blob, {{ ext: "pdb" }}).then(comp => {{
      // Base grey cartoon
      comp.addRepresentation("cartoon", {{ color: "grey" }});

      // Blue peptide union
      const pepSel = rangesToSelection(payload.union_ranges);
      if (pepSel) {{
        comp.addRepresentation("cartoon", {{ sele: pepSel, color: "blue" }});
      }}

      // Red intersection sites
      const interSel = positionsToSelection(payload.intersection);
      if (interSel) {{
        comp.addRepresentation("ball+stick", {{ sele: interSel, color: "red" }});
      }}

      // Magenta mod sites
      const modSel = positionsToSelection(payload.mods);
      if (modSel) {{
        comp.addRepresentation("ball+stick", {{ sele: modSel, color: "magenta" }});
      }}

      comp.autoView();
    }});
  </script>
</body>
</html>
"""
            html_path.write_text(html, encoding="utf-8")
            print(f"Exported TRUE styled session HTML to: {html_path.resolve()}")
            print("Open it in a browser — colors & selections are preserved (no Jupyter dependencies).")

    # -------------------------
    # Callbacks
    # -------------------------
    def on_load_file(_):
        with out:
            clear_output()

            df, filename = read_uploaded_table(upload)
            if df is None:
                print(filename)
                return

            STATE["raw_df"] = df
            cols = list(df.columns)

            for dd in [protein_col, pepseq_col, pepstart_col, pepend_col, uppos_col]:
                dd.options = cols

            protein_col.value  = guess_column(cols, ["ACC_ID", "Protein", "protein", "Uniprot", "UniProt", "accession"]) or cols[0]
            pepseq_col.value   = guess_column(cols, ["Pep_seq", "peptideSequence", "peptide", "Peptide"]) or cols[0]
            pepstart_col.value = guess_column(cols, ["pep_start", "PeptideStart", "peptideStart", "start"]) or cols[0]
            pepend_col.value   = guess_column(cols, ["pep_end", "PeptideEnd", "peptideEnd", "end"]) or cols[0]
            uppos_col.value    = guess_column(cols, ["UP_POS", "uniprotPosition", "UniprotPosition", "modpos_prot"]) or cols[0]

            print(f"Loaded file: {filename}")
            print(f"Shape: {df.shape[0]} rows × {df.shape[1]} cols")
            print("Now choose the 5 columns and click 'Build mapping'.\n")
            display(df.head(5))

    def on_build_mapping(_):
        with out:
            clear_output()

            mdf = build_mapping_df()
            if mdf.empty:
                print("Failed to build mapping table. Check that pep_start/pep_end/UP_POS are numeric and columns are correct.")
                return

            STATE["map_df"] = mdf
            prots = sorted(mdf["ACC_ID"].unique().tolist())
            protein_dd.options = prots
            protein_dd.value = prots[0] if prots else None

            refresh_peptides(protein_dd.value, keep_selection=False)
            print(f"Mapping built: {len(mdf)} rows across {len(prots)} proteins.")
            print("Select peptides to render (auto-refresh).")

    def on_protein_change(_):
        if STATE["map_df"] is None or STATE["map_df"].empty:
            return
        acc = protein_dd.value
        if not acc:
            return
        STATE["last_action"] = None
        refresh_peptides(acc, keep_selection=False)
        with out:
            clear_output()
            print(f"Protein switched to {acc}. Peptides refreshed.")
            print("Select peptides or click 'Map all peptides'.")

    def on_list_mode_change(_):
        acc = protein_dd.value
        if not acc:
            return
        refresh_peptides(acc, keep_selection=True)
        if peptide_multi.value:
            STATE["last_action"] = "select"
            render_current()

    def on_peptide_select(_):
        if STATE["suspend"]:
            return
        if peptide_multi.value:
            STATE["last_action"] = "select"
            render_current()

    def on_map_all(_):
        if not peptide_multi.options:
            return
        STATE["last_action"] = "map_all"
        STATE["suspend"] = True
        try:
            all_vals = [v for _, v in peptide_multi.options]
            peptide_multi.value = tuple(all_vals)
        finally:
            STATE["suspend"] = False
        render_current()

    def on_any_toggle(_):
        if peptide_multi.value:
            STATE["last_action"] = "select"
            render_current()

    # wire up
    load_file_btn.on_click(on_load_file)
    build_btn.on_click(on_build_mapping)

    protein_dd.observe(on_protein_change, names="value")
    list_mode.observe(on_list_mode_change, names="value")
    peptide_multi.observe(on_peptide_select, names="value")
    map_all_btn.on_click(on_map_all)

    export_pdb_btn.on_click(on_export_pdb)
    export_html_btn.on_click(on_export_styled_html)
    export_sel_btn.on_click(on_export_selections)

    show_mods_chk.observe(on_any_toggle, names="value")
    show_mods_mode.observe(on_any_toggle, names="value")
    viewer_size.observe(on_any_toggle, names="value")

    return None


# Run it
peptide_upload_ngl_mapper_app()


HTML(value='<b>Upload peptide table (protein-mapped pep_start/pep_end) → map to AlphaFold with NGLView</b>')

HBox(children=(FileUpload(value=(), accept='.tsv,.txt,.csv', description='Choose file'), Button(button_style='…

HBox(children=(Dropdown(description='Protein ID:', options=(), value=None), Dropdown(description='Pep seq:', o…

HBox(children=(Dropdown(description='Protein:', options=(), value=None), ToggleButtons(description='List:', op…

SelectMultiple(description='Peptides:', layout=Layout(height='240px', width='980px'), options=(), value=())



Output()