In [1]:
import numpy as np
import pandas as pd
import json
from sklearn.decomposition import PCA
import plotly.express as px

# --- 1) Carica la matrice di co-occorrenza ---
cxc = pd.read_csv("cxc.csv", header=None).values

# --- 2) Carica dizionario carte ---
with open('./royale/cards_dict.json', "r", encoding="utf-8") as f:
    cards_dict = json.load(f)

for i, (cid, c) in enumerate(cards_dict.items()):
    c['loc'] = i   # aggiungi la posizione
    
with open('./royale/cards_dict.json', "w", encoding="utf-8") as f:
    json.dump(cards_dict, f, indent=2, ensure_ascii=False)

# creo un mapping indice -> nome carta
idx_to_label = [None] * len(cards_dict)
for cid, info in cards_dict.items():
    idx = info['loc']   # se nel tuo dict hai la posizione della carta
    idx_to_label[idx] = info.get('name', cid)

# --- 3) PCA in 3 dimensioni ---
pca = PCA(n_components=3)
coords = pca.fit_transform(cxc)

# --- 4) Etichette ---
labels = idx_to_label

# --- 5) Visualizza ---
fig = px.scatter_3d(
    x=coords[:,0],
    y=coords[:,1],
    z=coords[:,2],
    text=labels,
    color=coords[:,0],
    title="Embedding 3D delle carte (PCA su co-occorrenze)"
)
fig.update_traces(marker=dict(size=5))
fig.show()


In [5]:
import json
import numpy as np
import pandas as pd
from pathlib import Path

from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import plotly.express as px

# ======================
# Config
# ======================
CXC_PATH = Path("cxc.csv")
CARD_DICT_PATH = Path("./royale/cards_dict.json")
OUT_CLUSTER_JSON = Path("./royale/card_clusters.json")

N_CLUSTERS = 5          # numero atteso di archetipi (modifica a piacere)
RANDOM_STATE = 42

# ======================
# Utils
# ======================
def load_cxc(path: Path) -> np.ndarray:
    cxc = pd.read_csv(path, header=None).values
    if cxc.shape[0] != cxc.shape[1]:
        raise ValueError(f"cxc deve essere quadrata, trovata {cxc.shape}")
    return cxc.astype(float)

def load_cards_dict(path: Path) -> dict:
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)

def build_idx_to_label(cards_dict: dict, n: int) -> list[str]:
    """Restituisce un array labels di lunghezza n con i nomi in posizione idx.
    Usa 'loc' se presente e consistente; altrimenti crea un mapping 0..n-1 in ordine di iterazione."""
    # raccogli eventuali loc
    locs = []
    for cid, info in cards_dict.items():
        if isinstance(info, dict) and "loc" in info:
            locs.append(info["loc"])

    use_loc = (
        len(locs) == len(cards_dict) and
        set(locs) == set(range(n))
    )

    labels = [None] * n
    if use_loc:
        for cid, info in cards_dict.items():
            idx = int(info["loc"])
            labels[idx] = info.get("name", cid)
    else:
        # fallback: assegna 0..n-1 in ordine, avvisa
        print("[AVVISO] 'loc' mancante o incoerente: associo 0..N-1 in ordine di cards_dict.")
        for i, (cid, info) in enumerate(cards_dict.items()):
            if i >= n:
                break
            labels[i] = info.get("name", cid)
    # riempi eventuali None con l'id carta
    for i, val in enumerate(labels):
        if val is None:
            labels[i] = f"card_{i}"
    return labels

def row_normalize_no_diag(C: np.ndarray) -> np.ndarray:
    """Rende la diagonale 0 e normalizza ogni riga a somma 1 (se > 0)."""
    C = C.copy().astype(float)
    np.fill_diagonal(C, 0.0)
    row_sum = C.sum(axis=1, keepdims=True)
    P = np.divide(C, row_sum, out=np.zeros_like(C), where=row_sum > 0)
    return P

def center_columns(X: np.ndarray) -> np.ndarray:
    """Rimuove la media colonna per colonna."""
    return X - X.mean(axis=0, keepdims=True)

# ======================
# 1) Carica dati
# ======================
C = load_cxc(CXC_PATH)                          # (n, n)
n_cards = C.shape[0]
cards_dict = load_cards_dict(CARD_DICT_PATH)    # dict
labels = build_idx_to_label(cards_dict, n_cards)

# ======================
# 2) Costruisci feature che enfatizzano le co-occorrenze (non la frequenza)
#    - normalizzazione per riga: P(j|i)
#    - centering colonne per togliere bias di carte molto popolari
# ======================
P = row_normalize_no_diag(C)    # (n, n)
X = center_columns(P)           # (n, n)

# ======================
# 3) PCA in 3D
# ======================
pca = PCA(n_components=3, random_state=RANDOM_STATE)
coords = pca.fit_transform(X)   # (n, 3)

# ======================
# 4) Clustering (KMeans) nello spazio PCA
# ======================
kmeans = KMeans(n_clusters=N_CLUSTERS, n_init="auto", random_state=RANDOM_STATE)
cluster_labels = kmeans.fit_predict(coords)     # (n,)

# ======================
# 5) Visualizza 3D
# ======================
fig = px.scatter_3d(
    x=coords[:, 0], y=coords[:, 1], z=coords[:, 2],
    text=labels,
    color=cluster_labels.astype(str),
    title="Carte: PCA 3D su P(j|i) normalizzato + KMeans"
)
fig.update_traces(marker=dict(size=5))
fig.show()

# ======================
# 6) Stampa carte per cluster per interpretazione
# ======================
print("\n=== Carte per cluster (per interpretare gli archetipi) ===")
for cl in range(N_CLUSTERS):
    cards_in_cluster = [labels[i] for i, lab in enumerate(cluster_labels) if lab == cl]
    print(f"\nCluster {cl}  (n={len(cards_in_cluster)}):")
    # stampa in righe larghe
    line = []
    for name in cards_in_cluster:
        line.append(name)
        if len(line) >= 8:
            print(", ".join(line))
            line = []
    if line:
        print(", ".join(line))

# ======================
# 7) Salva mappa carta -> cluster (per classificare i deck in seguito)
# ======================
cid_to_cluster = {}
# dobbiamo ricostruire l'indice per ogni cid in base a 'loc' coerente con labels
# creiamo una mappa name->idx dalle labels per associare i cluster
name_to_idx = {labels[i]: i for i in range(n_cards)}

for cid, info in cards_dict.items():
    name = info.get("name", cid)
    idx = name_to_idx.get(name, None)
    if idx is not None:
        cid_to_cluster[cid] = int(cluster_labels[idx])

OUT_CLUSTER_JSON.parent.mkdir(parents=True, exist_ok=True)
with open(OUT_CLUSTER_JSON, "w", encoding="utf-8") as f:
    json.dump(cid_to_cluster, f, indent=2, ensure_ascii=False)

print(f"\nSalvato mapping carta -> cluster in: {OUT_CLUSTER_JSON.resolve()}")

# ======================
# 8) Funzioni di supporto (facoltative) per classificare un deck
# ======================
def classify_deck_by_majority(deck_ids, cid2cluster: dict) -> int | None:
    cl = [cid2cluster[cid] for cid in deck_ids if cid in cid2cluster]
    if not cl:
        return None
    # cluster più frequente
    return max(set(cl), key=cl.count)

def deck_centroid(coords_3d: np.ndarray, card_indices: list[int]) -> np.ndarray:
    if not card_indices:
        return np.zeros(3)
    return coords_3d[card_indices].mean(axis=0)

# Esempio d'uso (commentato):
# my_deck_ids = ["26000021", "26000049", ...]  # 8 carte
# archetipo = classify_deck_by_majority(my_deck_ids, cid_to_cluster)
# print("Archetipo cluster:", archetipo)



=== Carte per cluster (per interpretare gli archetipi) ===

Cluster 0  (n=36):
P.E.K.K.A, Balloon, Witch, Barbarians, Valkyrie, Skeleton Army, Prince, Wizard
Mini P.E.K.K.A, Spear Goblins, Giant Skeleton, Minion Horde, Royal Giant, Guards, Lumberjack, Inferno Dragon
Electro Wizard, Elite Barbarians, Executioner, Bandit, Bats, Ram Rider, Mega Knight, Magic Archer
Golden Knight, Electro Giant, Rune Giant, Boss Bandit, Tombstone, Arrows, Freeze, Mirror
Lightning, Zap, Clone, Void

Cluster 1  (n=10):
Skeletons, Ice Spirit, Ice Golem, Mighty Miner, Electro Spirit, Cannon, X-Bow, Goblin Drill
Earthquake, Royal Delivery

Cluster 2  (n=29):
Knight, Archers, Goblins, Musketeer, Hog Rider, Princess, Fire Spirit, Miner
Dart Goblin, Goblin Gang, Hunter, Rascals, Skeleton Barrel, Wall Breakers, Firecracker, Archer Queen
Monk, Little Prince, Goblin Demolisher, Suspicious Bush, Goblinstein, Mortar, Inferno Tower, Bomb Tower
Tesla, Fireball, Rocket, Goblin Barrel, The Log

Cluster 3  (n=17):
Ice Wiza

In [15]:
from sklearn.cluster import KMeans
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
import plotly.io as pio

# === Clustering come prima ===
N_DECK_CLUSTERS = 20
mask_valid = ~np.isnan(deck_points).any(axis=1)
deck_points_valid = deck_points[mask_valid]

kmeans_decks = KMeans(n_clusters=N_DECK_CLUSTERS, n_init="auto", random_state=RANDOM_STATE)
deck_clusters = np.full(len(decks), -1)
deck_clusters[mask_valid] = kmeans_decks.fit_predict(deck_points_valid)

df_out["deck_cluster"] = deck_clusters
df_out.to_csv(OUT_CSV, index=False)
print(f"Coordinate + cluster deck salvate in: {OUT_CSV.resolve()}")

# === Preparazione palette e mapping ===
palette = px.colors.qualitative.Set1
unique_clusters = sorted([c for c in np.unique(deck_clusters) if c != -1])
cluster_to_color = {c: palette[i % len(palette)] for i, c in enumerate(unique_clusters)}
cluster_to_color[-1] = "rgba(150,150,150,0.35)"

# === Crea figura e trace “sfondo carte” ===
fig2 = go.Figure()
fig2.add_trace(go.Scatter3d(
    x=coords[:, 0], y=coords[:, 1], z=coords[:, 2],
    mode="markers",
    marker=dict(size=3, opacity=0.08),
    showlegend=False,
    hoverinfo="skip",
    name="Carte"
))

# === Per ogni cluster, traccia i deck che appartengono a quel cluster ===
for cl in unique_clusters + [-1]:  # includi -1 se vuoi mostrare “non assegnati”
    mask = (deck_clusters == cl)
    if not mask.any():
        continue
    clr = cluster_to_color[cl]
    name = f"Cluster {cl}" if cl != -1 else "Non assegnati"
    fig2.add_trace(go.Scatter3d(
        x=deck_points[mask, 0],
        y=deck_points[mask, 1],
        z=deck_points[mask, 2],
        mode="markers",
        marker=dict(size=6, color=clr),
        text=[f"Deck {i}" for i in np.where(mask)[0]],
        hovertext=[deck_labels[i] for i in np.where(mask)[0]],
        hoverinfo="text",
        name=name
    ))

fig2.update_layout(
    title="Clustering dei deck nello spazio PCA (con legenda)",
    margin=dict(l=0, r=0, b=0, t=40),
    legend=dict(
        itemsizing="trace",
        bgcolor="rgba(255,255,255,0.8)"
    )
)
fig2.show()

# === Salva HTML ===
pio.write_html(fig2, OUT_HTML.with_name("decks_clusters_legend.html").as_posix(),
               include_plotlyjs="cdn", full_html=True)
print(f"Grafico cluster con legenda salvato in: {OUT_HTML.with_name('decks_clusters_legend.html').resolve()}")


Coordinate + cluster deck salvate in: /home/simonetto/project_royale/Royale_Predict/decks/deck_coords.csv


Grafico cluster con legenda salvato in: /home/simonetto/project_royale/Royale_Predict/decks/decks_clusters_legend.html


In [7]:
from sklearn.cluster import KMeans

# ======================
# 7) Clustering dei deck nello spazio PCA
# ======================
N_DECK_CLUSTERS = 6   # numero atteso di archetipi di deck (cambialo se vuoi)

# rimuovi eventuali deck senza coordinate valide (NaN)
mask_valid = ~np.isnan(deck_points).any(axis=1)
deck_points_valid = deck_points[mask_valid]

kmeans_decks = KMeans(n_clusters=N_DECK_CLUSTERS, n_init="auto", random_state=RANDOM_STATE)
deck_clusters = np.full(len(decks), -1)   # default -1 = non assegnato
deck_clusters[mask_valid] = kmeans_decks.fit_predict(deck_points_valid)

df_out["deck_cluster"] = deck_clusters
df_out.to_csv(OUT_CSV, index=False)
print(f"Coordinate + cluster deck salvate in: {OUT_CSV.resolve()}")

# ======================
# 8) Plot interattivo con cluster dei deck
# ======================
fig2 = go.Figure()

# sfondo: carte (trasparenti)
fig2.add_trace(go.Scatter3d(
    x=coords[:, 0], y=coords[:, 1], z=coords[:, 2],
    mode="markers",
    marker=dict(size=3, opacity=0.08),
    showlegend=False,
    hoverinfo="skip",
    name="Carte"
))

# deck colorati per cluster
fig2.add_trace(go.Scatter3d(
    x=deck_points[:, 0], y=deck_points[:, 1], z=deck_points[:, 2],
    mode="markers",
    marker=dict(size=6, color=deck_clusters, colorscale="Set1"),
    text=[f"Deck {i}" for i in range(len(decks))],
    hovertext=deck_labels,
    hoverinfo="text",
    name="Deck (cluster)"
))

fig2.update_layout(
    title="Clustering dei deck nello spazio PCA",
    margin=dict(l=0, r=0, b=0, t=40)
)

fig2.show()
pio.write_html(fig2, OUT_HTML.with_name("decks_clusters.html").as_posix(),
               include_plotlyjs="cdn", full_html=True)
print(f"Grafico cluster dei deck salvato in: {OUT_HTML.with_name('decks_clusters.html').resolve()}")


Coordinate + cluster deck salvate in: /home/simonetto/project_royale/Royale_Predict/decks/deck_coords.csv


ValueError: 
    Invalid value of type 'builtins.str' received for the 'colorscale' property of scatter3d.marker
        Received value: 'Set1'

    The 'colorscale' property is a colorscale and may be
    specified as:
      - A list of colors that will be spaced evenly to create the colorscale.
        Many predefined colorscale lists are included in the sequential, diverging,
        and cyclical modules in the plotly.colors package.
      - A list of 2-element lists where the first element is the
        normalized color level value (starting at 0 and ending at 1),
        and the second item is a valid color string.
        (e.g. [[0, 'green'], [0.5, 'red'], [1.0, 'rgb(0, 0, 255)']])
      - One of the following named colorscales:
            ['aggrnyl', 'agsunset', 'algae', 'amp', 'armyrose', 'balance',
             'blackbody', 'bluered', 'blues', 'blugrn', 'bluyl', 'brbg',
             'brwnyl', 'bugn', 'bupu', 'burg', 'burgyl', 'cividis', 'curl',
             'darkmint', 'deep', 'delta', 'dense', 'earth', 'edge', 'electric',
             'emrld', 'fall', 'geyser', 'gnbu', 'gray', 'greens', 'greys',
             'haline', 'hot', 'hsv', 'ice', 'icefire', 'inferno', 'jet',
             'magenta', 'magma', 'matter', 'mint', 'mrybm', 'mygbm', 'oranges',
             'orrd', 'oryel', 'oxy', 'peach', 'phase', 'picnic', 'pinkyl',
             'piyg', 'plasma', 'plotly3', 'portland', 'prgn', 'pubu', 'pubugn',
             'puor', 'purd', 'purp', 'purples', 'purpor', 'rainbow', 'rdbu',
             'rdgy', 'rdpu', 'rdylbu', 'rdylgn', 'redor', 'reds', 'solar',
             'spectral', 'speed', 'sunset', 'sunsetdark', 'teal', 'tealgrn',
             'tealrose', 'tempo', 'temps', 'thermal', 'tropic', 'turbid',
             'turbo', 'twilight', 'viridis', 'ylgn', 'ylgnbu', 'ylorbr',
             'ylorrd'].
        Appending '_r' to a named colorscale reverses it.


In [4]:
import plotly.io as pio

# salva in HTML
pio.write_html(fig, "cluster_viewer.html", include_plotlyjs="cdn", full_html=True)

print("Grafico salvato in cluster_viewer.html")


Grafico salvato in cluster_viewer.html
