#CLUSTER PLOT#

In [1]:
#%pip install umap-learn

In [2]:
import json
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import plotly_express as px 


with open("opponent_decks.json", "r") as f:
    data = json.load(f)

In [3]:
all_cards = set()
for deck in data.values():
    for card in deck.keys():
        all_cards.add(card)

all_cards = sorted(list(all_cards))


rows = []
for battle_time, deck in data.items():
    row = {card: 0 for card in all_cards}
    for card in deck.keys():
        row[card] = 1
    rows.append(row)

df = pd.DataFrame(rows)
print(df.head())

   Archer Queen  Archers  Arrows  Baby Dragon  Balloon  Bandit  \
0             0        0       0            1        0       0   
1             1        0       1            0        0       1   
2             0        0       1            0        0       0   
3             0        0       1            0        0       0   
4             0        0       1            0        0       0   

   Barbarian Barrel  Barbarians  Bats  Battle Healer  ...  Tornado  Valkyrie  \
0                 1           0     0              0  ...        1         0   
1                 0           0     1              0  ...        0         0   
2                 0           0     0              0  ...        0         0   
3                 0           0     0              0  ...        0         0   
4                 0           0     0              0  ...        0         0   

   Vines  Void  Wall Breakers  Witch  Wizard  X-Bow  Zap  Zappies  
0      0     0              0      0       0      0   

In [4]:
pca = PCA(n_components=2)
X_pca = pca.fit_transform(df)
df["pc1"] = X_pca[:, 0]
df["pc2"] = X_pca[:, 1]

In [None]:
k = 2#PAS K AAN VOOR ANDERE FIT
kmeans = KMeans(n_clusters=k, random_state=42)
df["cluster"] = kmeans.fit_predict(df[all_cards])

In [6]:
from collections import Counter

cluster_profiles = {}

for cl in sorted(df["cluster"].unique()):
    subset = df[df["cluster"] == cl]
    counter = Counter()

    for idx, row in subset[all_cards].iterrows():
        for card in all_cards:
            if row[card] == 1:
                counter[card] += 1

    # sorteer op frequentie
    top_cards = counter.most_common()
    cluster_profiles[cl] = top_cards

In [7]:
def sort_deck_by_cluster_frequency(row, cluster_id):
    freq_map = dict(cluster_profiles[cluster_id])
    cards_in_deck = [c for c in all_cards if row[c] == 1]
    return sorted(cards_in_deck, key=lambda c: freq_map.get(c, 0), reverse=True)

df["sorted_deck"] = df.apply(
    lambda row: sort_deck_by_cluster_frequency(row, row["cluster"]),
    axis=1
)

In [13]:
# df["sorted_deck_str"] = df["sorted_deck"].apply(lambda L: ", ".join(L))

# fig = px.scatter(
#     df,
#     x="pc1",
#     y="pc2",
#     color="cluster",
#     title="Deck clusters (PCA + KMeans)",
#     hover_data={
#         "cluster": True,
#         "sorted_deck_str": True,
#         "pc1": True,
#         "pc2": True,
#         **{c: False for c in all_cards}
#     }
# )

# fig.show()

In [9]:
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
import plotly.graph_objects as go

# Training data
X = df[["pc1", "pc2"]].values
y = df["cluster"].values

clf = KNeighborsClassifier(n_neighbors=5)
clf.fit(X, y)

In [10]:
# Grid boundaries
x_min, x_max = X[:,0].min() - 0.5, X[:,0].max() + 0.5
y_min, y_max = X[:,1].min() - 0.5, X[:,1].max() + 0.5

xx, yy = np.meshgrid(
    np.linspace(x_min, x_max, 300),
    np.linspace(y_min, y_max, 300)
)

grid_points = np.c_[xx.ravel(), yy.ravel()]
Z = clf.predict(grid_points)
Z = Z.reshape(xx.shape)

In [11]:
fig = go.Figure()

# Decision zones (cluster regions)
fig.add_trace(
    go.Contour(
        x=np.linspace(x_min, x_max, 300),
        y=np.linspace(y_min, y_max, 300),
        z=Z,
        showscale=False,
        colorscale="Viridis",
        opacity=0.35,
        contours=dict(showlines=False)
    )
)

# Deck scatter
fig.add_trace(
    go.Scatter(
        x=df["pc1"],
        y=df["pc2"],
        mode="markers",
        marker=dict(
            size=12,
            color=df["cluster"],
            colorscale="Viridis",
            line=dict(width=1, color="white")
        ),
        text=df["sorted_deck_str"],
        hovertemplate="<b>Cluster %{marker.color}</b><br>%{text}<extra></extra>"
    )
)

fig.update_layout(
    title="Deck Classification Zones (PCA + KNN)",
    xaxis_title="PC1",
    yaxis_title="PC2",
    plot_bgcolor="#0a0f2c",
    paper_bgcolor="#0a0f2c",
    font=dict(color="white"),
    title_x=0.5
)

fig.show()


In [12]:
import json
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import umap.umap_ as umap
import plotly.express as px

# 1. Load decks
with open("opponent_decks.json", "r") as f:
    data = json.load(f)

all_cards = sorted({card for deck in data.values() for card in deck})
rows = []

for battle_time, deck in data.items():
    row = {card: 0 for card in all_cards}
    for c in deck:
        row[c] = 1
    rows.append(row)

df = pd.DataFrame(rows)

# 2. Deck–deck similarity
sim = cosine_similarity(df)

# 3. UMAP embedding
embedding = umap.UMAP(
    n_neighbors=10,
    min_dist=0.1,
    metric="precomputed"
).fit_transform(1 - sim)

df["dim1"] = embedding[:, 0]
df["dim2"] = embedding[:, 1]

# 4. Plot
fig = px.scatter(
    df,
    x="dim1",
    y="dim2",
    title="Deck similarity map (UMAP + cosine)",
)

fig.update_layout(
    plot_bgcolor="#0a0f2c",
    paper_bgcolor="#0a0f2c",
    font=dict(color="white"),
    title_x=0.5,
)

fig.show()



IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html


using precomputed metric; inverse_transform will be unavailable

