In [47]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import requests
from pathlib import Path
import plotly.io as pio

In [None]:
def get_data(url: str) -> pd.DataFrame:
    try:
        resp = requests.get(url, timeout=(5, 30))  
        resp.raise_for_status()
    except requests.RequestException as e:
        print(f"HTTP error for {url}: {e}")
        return pd.DataFrame()

    try:
        payload = resp.json()
    except ValueError as e:
        print(f"Invalid JSON from {url}: {e}")
        return pd.DataFrame()

    if isinstance(payload, dict) and isinstance(payload.get("data"), list):
        return pd.json_normalize(payload["data"]).reset_index(drop=True)

    if isinstance(payload, list):
        return pd.json_normalize(payload).reset_index(drop=True)

    if isinstance(payload, dict):
        for v in payload.values():
            if isinstance(v, list):
                return pd.json_normalize(v).reset_index(drop=True)
        return pd.json_normalize(payload).reset_index(drop=True)

    print("Unrecognized JSON structure; returning empty DataFrame.")
    return pd.DataFrame()


In [30]:
url = "https://node.gurustats.usermd.net:60519/pgee2025"
df_pgee = get_data(url)

In [31]:
url_standings = "https://node.gurustats.usermd.net:60519/pgee2025dru"
df_standings = get_data(url=url_standings)

In [32]:
df_standings_msc = df_standings.loc[:,["KLUB","MSCTABELA"]]

In [33]:
df_standings_msc

Unnamed: 0,KLUB,MSCTABELA
0,Wrocław,3
1,Zielona Góra,5
2,Lublin,1
3,Częstochowa,7
4,Grudziądz,4
5,Toruń,2
6,Rybnik,8
7,Gorzów,6


In [34]:
df_pgee

Unnamed: 0,_id,MSC,ZAWODNIK,KLUB,SREDNIA,%POJ,PKT,BON,BIEGI,SST,...,TORB,TORC,TORD,DOM,WYJAZD,mecze,KLASYFIKOWANY?,id,wychowanek,status PL 2025
0,689b1da3027e2d6989ce8d1c,1,Bartosz Zmarzlik,Lublin,2.730159,0.873016,167,5,63,0.725806,...,2.571429,2.416667,2.954545,2.888889,2.611111,14,1,95,,SP
1,689b1da3027e2d6989ce8d1d,2,Patryk Dudek,Toruń,2.333333,0.77305,146,15,69,0.727941,...,2.111111,2.5,2,2.484848,2.194444,14,1,692,,SP
2,689b1da3027e2d6989ce8d1e,7,Wiktor Przyjemski,Lublin,2.2,0.727273,110,11,55,0.747475,...,2,2.307692,2.181818,2.259259,2.142857,14,1,3579,Polonia Bydgoszcz,JP
3,689b1da3027e2d6989ce8d1f,8,Mikkel Michelsen,Toruń,2.166667,0.686567,135,8,66,0.70229,...,2.263158,2,2.1875,2.21875,2.117647,14,1,155,,SO
4,689b1da3027e2d6989ce8d20,9,Maksym Drabik,Rybnik,2.134328,0.62963,134,9,67,0.609023,...,2,2.117647,1.9375,2.1875,2.085714,12,1,3112,,SP
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70,689b1da3027e2d6989ce8d62,71,Krystian Gręda,Wrocław,0,0,0,0,4,0,...,,,0,,0,1,0,,,JP
71,689b1da3027e2d6989ce8d63,72,Damian Miller,Grudziądz,0,0,0,0,2,0,...,0,,,,0,1,0,,,JP
72,689b1da3027e2d6989ce8d64,73,Mitchell McDiarmid,Zielona Góra,0,0,0,0,2,0,...,0,0,,0,,1,0,,,JO
73,689b1da3027e2d6989ce8d65,75,Krzysztof Lewandowski,Wrocław,,,0,0,0,,...,0.6,0,0.666667,0.428571,0.833333,7,0,3561,Apator Toruń,JP


In [35]:
df_pgee.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 75 entries, 0 to 74
Data columns (total 37 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   _id             75 non-null     object 
 1   MSC             75 non-null     int64  
 2   ZAWODNIK        75 non-null     object 
 3   KLUB            75 non-null     object 
 4   SREDNIA         75 non-null     object 
 5   %POJ            75 non-null     object 
 6   PKT             75 non-null     int64  
 7   BON             75 non-null     int64  
 8   BIEGI           75 non-null     int64  
 9   SST             75 non-null     object 
 10  SDY             75 non-null     float64
 11  DYSTBILANS      75 non-null     int64  
 12  P3              75 non-null     int64  
 13  P2              75 non-null     int64  
 14  P1              75 non-null     int64  
 15  P0              75 non-null     int64  
 16  D               75 non-null     int64  
 17  W               75 non-null     int64

In [36]:
df_selected = df_pgee.loc[:,["MSC","ZAWODNIK","KLUB","PKT","P3","P2","P1","P0"]].copy()

In [37]:
df_data = df_selected.merge(df_standings_msc, on="KLUB", how="left")

In [None]:
import pandas as pd
import plotly.graph_objects as go

def _rgba(rgb_tuple, alpha: float) -> str:
    r,g,b = rgb_tuple
    return f"rgba({r},{g},{b},{alpha})"

def _as_rgba(color, alpha: float = 1.0) -> str:
    """Przyjmij '#RRGGBB', (r,g,b) lub 'rgb/rgba(...)' i zwróć 'rgba(...)' z daną alfą."""
    if isinstance(color, tuple) and len(color) == 3:
        return f"rgba({color[0]},{color[1]},{color[2]},{alpha})"
    if isinstance(color, str):
        c = color.strip()
        if c.startswith("#") and len(c) in (7, 9):
            hx = c.lstrip("#")
            r = int(hx[0:2], 16); g = int(hx[2:4], 16); b = int(hx[4:6], 16)
            return f"rgba({r},{g},{b},{alpha})"
        if c.lower().startswith(("rgb(", "rgba(", "hsl(", "hsla(")):
            return c
        return c
    return f"rgba(245,245,245,{alpha})"

def make_sankey_rider_3210_club_with_menu(
    df: pd.DataFrame,
    value_mode: str = "counts",            # "counts" = liczba biegów, "points" = punkty
    show_zero_as_counts: bool = True,      # tylko przy value_mode="points"
    cat_rgb: dict | None = None,           # {"3":(R,G,B), "2":..., "1":..., "0":...}
    alpha_rc: float = 0.35,                # opacity linków Zawodnik→Kategoria
    alpha_ck: float = 0.70,                # opacity linków Kategoria→Klub
    alpha_mid_node: float = 0.90,          # opacity węzłów KATEGORII („3/2/1/0”)
    # teraz: KOLORY WĘZŁÓW KLUBÓW (gniazd), NIE połączeń
    club_color_map: dict | None = None,    # np. {"Wrocław":"#da251c","Toruń":"#020381",...}
    alpha_club_node: float = 1.0,          # opacity dla węzłów klubów
    other_node_color: str | tuple | None = "#f5f5f5",
    height: int | None = 1100,
    width: int | None = 1600,
    title_prefix: str = "Zawodnik → (3/2/1/0) → Klub"
) -> go.Figure:

    required = {"ZAWODNIK","KLUB","PKT","P3","P2","P1","P0"}
    missing = required - set(df.columns)
    if missing:
        raise ValueError(f"Brakuje kolumn: {missing}")

    df = df.copy()
    for c in ["P3","P2","P1","P0"]:
        df[c] = pd.to_numeric(df[c], errors="coerce").fillna(0).astype(int)
    df["PKT"] = pd.to_numeric(df["PKT"], errors="coerce").fillna(0)

    df = df.sort_values("PKT", ascending=False).reset_index(drop=True)
    df["RANK"] = df.index + 1
    ranges = [("1–20",1,20), ("21–40",21,40), ("41–60",41,60), ("61+",61,None)]

    if cat_rgb is None:
        cat_rgb = {"3":(33,150,243), "2":(0,200,83), "1":(255,193,7), "0":(244,67,54)}

    CAT_COLS   = {"P3":"3","P2":"2","P1":"1","P0":"0"}
    mid_map    = {"3":"3 punkty","2":"2 punkty","1":"1 punkt","0":"0 punktów"}
    points_val = {"3":3,"2":2,"1":1,"0":0}

    def weight(base_k: str, count: int) -> float:
        if value_mode == "counts":
            return float(count)
        if base_k in ("3","2","1"):
            return float(count * points_val[base_k])
        return float(count) if show_zero_as_counts else 0.0

    clubs_order = (
        df.groupby("KLUB")["MSCTABELA"].min().sort_values(ascending=True).index.tolist()
    )

    # dopasowanie kolorów klubów (po nazwie pełnej lub przez zawieranie)
    def _resolve_club_color(club_name: str):
        if not club_color_map: 
            return None
        if club_name in club_color_map:
            return _as_rgba(club_color_map[club_name], alpha_club_node)
        # fallback: dopasowanie przez zawieranie (np. "Wrocław" w "Sparta Wrocław")
        low = club_name.lower()
        for k, col in club_color_map.items():
            if k.lower() in low or low in k.lower():
                return _as_rgba(col, alpha_club_node)
        return None

    def build_state(df_slice: pd.DataFrame):
        labels, idx = [], {}
        def add_node(name: str):
            if name not in idx:
                idx[name] = len(labels); labels.append(name)

        # lewa: zawodnicy
        for r in df_slice["ZAWODNIK"]: add_node(r)
        # środek: kategorie
        for node in mid_map.values():   add_node(node)
        # prawa: kluby
        for c in clubs_order:           add_node(c)

        # KOLORY WĘZŁÓW
        default_node = _as_rgba(other_node_color, 1.0)
        node_colors = [default_node] * len(labels)

        # kategorie: kolor wg cat_rgb
        for base_k, node_label in mid_map.items():
            node_colors[idx[node_label]] = _rgba(cat_rgb[base_k], alpha_mid_node)

        # kluby: kolor wg club_color_map (jeśli podany)
        for c in clubs_order:
            col = _resolve_club_color(c)
            if col:
                node_colors[idx[c]] = col

        sources, targets, values, hovers, link_colors = [], [], [], [], []

        # Zawodnik → Kategoria (połączenia: odcień kategorii, alpha_rc)
        for _, row in df_slice.iterrows():
            r = row["ZAWODNIK"]
            for col_name, base_k in CAT_COLS.items():
                cnt = int(row[col_name]); val = weight(base_k, cnt)
                if val > 0:
                    mid_label = mid_map[base_k]
                    sources.append(idx[r]); targets.append(idx[mid_label]); values.append(val)
                    pts = cnt * points_val[base_k]
                    h = (f"{r} → {mid_label}: {cnt} biegów (={pts} pkt)"
                         if value_mode=="counts"
                         else (f"{r} → {mid_label}: {cnt} biegów (0 pkt)"
                               if base_k=="0" and show_zero_as_counts
                               else f"{r} → {mid_label}: {cnt} × {points_val[base_k]} = {pts} pkt"))
                    hovers.append(h)
                    link_colors.append(_rgba(cat_rgb[base_k], alpha_rc))

        # Kategoria → Klub (połączenia: odcień kategorii, alpha_ck)
        agg = {c: {"3":0.0,"2":0.0,"1":0.0,"0":0.0} for c in clubs_order}
        for _, row in df_slice.iterrows():
            c = row["KLUB"]
            for col_name, base_k in CAT_COLS.items():
                agg[c][base_k] += weight(base_k, int(row[col_name]))

        for base_k in ("3","2","1","0"):
            mid_label = mid_map[base_k]
            for c in clubs_order:
                val = agg[c][base_k]
                if val > 0:
                    sources.append(idx[mid_label]); targets.append(idx[c]); values.append(val)
                    h = (f"{mid_label} → {c}: {val:.0f} biegów"
                         if value_mode=="counts"
                         else (f"{mid_label} → {c}: {val:.0f} biegów (0 pkt)"
                               if base_k=="0" and show_zero_as_counts
                               else f"{mid_label} → {c}: {val:.0f} pkt"))
                    hovers.append(h)
                    link_colors.append(_rgba(cat_rgb[base_k], alpha_ck))  # bez kolorów klubowych – spójność kategorii

        node = dict(label=labels, pad=18, thickness=20,
                    color=node_colors, line=dict(color="rgba(0,0,0,0.15)", width=1), hoverinfo="skip")
        link = dict(source=sources, target=targets, value=values,
                    color=link_colors, hovertemplate="%{customdata}<extra></extra>",
                    customdata=hovers)
        return node, link

    # zbuduj stany dla zakresów
    states, titles = [], []
    for label, start, end in ranges:
        sl = df[(df["RANK"] >= start)] if end is None else df[(df["RANK"] >= start) & (df["RANK"] <= end)]
        if sl.empty:
            node = dict(label=["Brak danych"], pad=18, thickness=20, color=["rgba(245,245,245,1)"])
            link = dict(source=[], target=[], value=[])
        else:
            node, link = build_state(sl)
        states.append((node, link))
        titles.append(
        f"{title_prefix} Przepływ punktów: zawodnicy → punkty → kluby | Zawodnicy: {label}"
        f"<br><span style='font-size:12px;color:#666'>Dane: gurustats.pl | Autor: Paweł Jaszczerski</span>"
        )
    base_node, base_link = states[0]
    fig = go.Figure(data=[go.Sankey(node=base_node, link=base_link)])
    fig.update_layout(
    title=dict(
        text=titles[0],   # <- jest <br>, więc będzie druga linia
        x=0.01, xanchor="left"          
    ),
    margin=dict(l=10, r=10, t=110, b=10), 
    width=width, height=height,
    updatemenus=[dict(
        type="dropdown",
        x=0.52, y=1.085, xanchor="left", yanchor="top", 
        direction="right", showactive=True,
        buttons=[
            dict(
                label=lbl, method="update",
                args=[{"node":[node], "link":[link]}, {"title": {"text": t}}]
            )
            for (lbl,_,_), (node,link), t in zip(ranges, states, titles)
        ]
    )]
)
    
    return fig


In [None]:
fig = make_sankey_rider_3210_club_with_menu(
    df_data,
    value_mode="counts",
    cat_rgb={"3":(30,136,229), "2":(0,172,91), "1":(255,179,0), "0":(229,57,53)},
    alpha_rc=0.30, alpha_ck=0.75, alpha_mid_node=0.95,
    club_color_map={
        "Częstochowa":"#36af6b",
        "Gorzów":"#2ea2cc",
        "Grudziądz":"#000b99",
        "Lublin":"#caab53",        
        "Rybnik":"#008332",
        "Toruń":"#020381",
        "Wrocław":"#da251c",
        "Zielona Góra":"#0b7e45"
    },
    alpha_club_node=1.0,
    other_node_color="#f5f5f5",
    width=1400, height=1000,
    title_prefix="PGEE 2025"
)
fig.show()

html = fig.to_html(
    include_plotlyjs="https://cdn.plot.ly/plotly-latest.min.js",
    full_html=True
)

out = Path.cwd() / "output" / "index.html"
out.parent.mkdir(parents=True, exist_ok=True)
out.write_text(html, encoding="utf-8")
    
png_path = Path.cwd() / "output" / "sankey.png"
png_path.parent.mkdir(parents=True, exist_ok=True)