In [None]:
# ==========================================================
# FINAL K-MEANS CLUSTERING VISUALIZATION
# ==========================================================
import numpy as np
import pandas as pd
import plotly.graph_objects as go

# ==========================================================
# DATA PROVINSI (38 PROVINSI INDONESIA)
# ==========================================================
data = [
    # Cluster 2 - BURUK (16 provinsi)
    ("DKI JAKARTA", 68.46, 40.76, 27.20, 2),
    ("BANTEN", 66.67, 58.93, 43.10, 2),
    ("JAWA BARAT", 81.39, 46.87, 42.00, 2),
    ("JAWA TENGAH", 86.35, 51.23, 46.30, 2),
    ("JAWA TIMUR", 84.73, 55.86, 49.70, 2),
    ("BALI", 88.99, 55.17, 45.20, 2),
    ("LAMPUNG", 88.04, 55.36, 37.50, 2),
    ("KEP. BANGKA BELITUNG", 90.15, 57.43, 39.20, 2),
    ("SUMATERA SELATAN", 87.76, 58.16, 44.10, 2),
    ("SUMATERA UTARA", 90.90, 60.27, 49.80, 2),
    ("RIAU", 90.89, 50.84, 51.30, 2),
    ("JAMBI", 90.57, 46.06, 51.00, 2),
    ("DI YOGYAKARTA", 89.01, 40.28, 48.70, 2),
    ("KALIMANTAN SELATAN", 92.85, 55.64, 52.10, 2),
    ("KEP. RIAU", 90.13, 54.93, 66.00, 2),
    ("BENGKULU", 92.49, 48.98, 55.70, 2),

    # Cluster 1 - SEDANG (13 provinsi)
    ("ACEH", 90.94, 61.30, 76.50, 1),
    ("SUMATERA BARAT", 90.53, 57.05, 67.00, 1),
    ("NUSA TENGGARA BARAT", 90.21, 47.78, 66.60, 1),
    ("NUSA TENGGARA TIMUR", 92.03, 54.65, 59.30, 1),
    ("KALIMANTAN BARAT", 92.03, 52.97, 60.40, 1),
    ("KALIMANTAN TENGAH", 91.47, 53.90, 75.50, 1),
    ("SULAWESI UTARA", 93.52, 52.12, 62.30, 1),
    ("SULAWESI SELATAN", 90.58, 57.95, 56.20, 1),
    ("SULAWESI TENGGARA", 92.83, 61.28, 74.90, 1),
    ("SULAWESI BARAT", 93.33, 58.82, 73.00, 1),
    ("KALIMANTAN TIMUR", 89.64, 52.64, 83.60, 1),
    ("GORONTALO", 94.43, 58.70, 80.20, 1),
    ("SULAWESI TENGAH", 91.88, 63.63, 82.60, 1),

    # Cluster 0 - BAIK (9 provinsi)
    ("KALIMANTAN UTARA", 93.91, 55.93, 100.00, 0),
    ("MALUKU", 92.47, 55.87, 90.70, 0),
    ("MALUKU UTARA", 93.19, 61.05, 86.60, 0),
    ("PAPUA BARAT", 96.22, 59.52, 100.00, 0),
    ("PAPUA BARAT DAYA", 96.28, 55.61, 100.00, 0),
    ("PAPUA", 95.87, 54.73, 100.00, 0),
    ("PAPUA SELATAN", 95.44, 60.73, 93.20, 0),
    ("PAPUA TENGAH", 95.33, 64.67, 100.00, 0),
    ("PAPUA PEGUNUNGAN", 97.68, 63.18, 99.40, 0),
]

df = pd.DataFrame(data, columns=["Provinsi", "IKU", "IKA", "IKTL", "Cluster"])

# ==========================================================
# CENTROID FINAL (DARI HASIL K-MEANS SEBELUMNYA)
# ==========================================================
centroids = pd.DataFrame([
    {"Cluster": 0, "Label": "Baik", "IKU": 94.12, "IKA": 59.16, "IKTL": 92.82},
    {"Cluster": 1, "Label": "Sedang", "IKU": 91.09, "IKA": 54.93, "IKTL": 60.88},
    {"Cluster": 2, "Label": "Buruk", "IKU": 82.53, "IKA": 52.27, "IKTL": 41.42},
])

# ==========================================================
# OUTPUT TEKS
# ==========================================================
print("=" * 60)
print("VISUALISASI 3D K-MEANS CLUSTERING")
print("=" * 60)
print("\n📊 Statistik Cluster:")
print("  Cluster 0 - Baik: 9 provinsi")
print("  Cluster 1 - Sedang: 13 provinsi")
print("  Cluster 2 - Buruk: 16 provinsi")
print("\n  Total: 38 provinsi")

print("\n📍 Centroid:")
print(centroids[["Cluster", "Label", "IKU", "IKA", "IKTL"]].to_string(index=False))

print("\n🎨 Legenda Warna:")
print("  🟢 Cluster 0 - Baik (9 provinsi) - Hijau")
print("  🟠 Cluster 1 - Sedang (13 provinsi) - Oranye")
print("  🔴 Cluster 2 - Buruk (16 provinsi) - Merah")

print("\n📋 Daftar Provinsi per Cluster:")

for cluster in [0, 1, 2]:
    cluster_data = df[df["Cluster"] == cluster]
    label = cluster_data.iloc[0]["Cluster"]
    label_text = ["Baik", "Sedang", "Buruk"][cluster]
    print(f"\n  Cluster {cluster} - {label_text} ({len(cluster_data)} provinsi):")
    for i, prov in enumerate(cluster_data["Provinsi"], 1):
        print(f"    {i}. {prov}")

# ==========================================================
# VISUALISASI 3D
# ==========================================================
colors = {0: "#10b981", 1: "#f59e0b", 2: "#ef4444"}  # hijau, oranye, merah
color_names = {0: "Baik (Hijau)", 1: "Sedang (Oranye)", 2: "Buruk (Merah)"}

fig = go.Figure()

# Titik data per cluster
for cluster in [0, 1, 2]:
    cluster_data = df[df["Cluster"] == cluster]
    fig.add_trace(go.Scatter3d(
        x=cluster_data["IKU"],
        y=cluster_data["IKA"],
        z=cluster_data["IKTL"],
        mode="markers",
        name=f"Cluster {cluster} - {color_names[cluster]}",
        marker=dict(size=8, color=colors[cluster], opacity=0.8),
        text=cluster_data["Provinsi"],
        hovertemplate="<b>%{text}</b><br>IKU: %{x}<br>IKA: %{y}<br>IKTL: %{z}<extra></extra>"
    ))

# Centroid
for _, row in centroids.iterrows():
    fig.add_trace(go.Scatter3d(
        x=[row["IKU"]],
        y=[row["IKA"]],
        z=[row["IKTL"]],
        mode="markers",
        name=f"Centroid {int(row['Cluster'])} - {row['Label']}",
        marker=dict(size=14, color=colors[row["Cluster"]], symbol="diamond", line=dict(color="black", width=2))
    ))

# Layout
fig.update_layout(
    title=dict(
        text="<b>Visualisasi 3D K-Means Clustering</b><br><sub>Analisis Provinsi Indonesia berdasarkan IKU, IKA, dan IKTL</sub>",
        x=0.5,
        font=dict(size=20)
    ),
    scene=dict(
        xaxis_title="<b>IKU (Indeks Kualitas Udara)</b>",
        yaxis_title="<b>IKA (Indeks Kualitas Air)</b>",
        zaxis_title="<b>IKTL (Indeks Kualitas Tutupan Lahan)</b>",
    ),
    showlegend=True,
    legend=dict(
        x=0.02, y=0.98, bgcolor="rgba(255,255,255,0.8)", bordercolor="gray", borderwidth=1
    ),
    width=1400,
    height=900,
    template="plotly_white"
)

fig.show()


VALIDASI CLUSTERING - JARAK KE CENTROID

⚠️  PROVINSI YANG MUNGKIN SALAH CLUSTER:

  SUMATERA UTARA:
    Cluster saat ini: 2
    Cluster terdekat: 1
    Jarak ke C0: 43.15
    Jarak ke C1: 12.30
    Jarak ke C2: 14.29

  RIAU:
    Cluster saat ini: 2
    Cluster terdekat: 1
    Jarak ke C0: 42.47
    Jarak ke C1: 10.42
    Jarak ke C2: 13.02

  JAMBI:
    Cluster saat ini: 2
    Cluster terdekat: 1
    Jarak ke C0: 43.97
    Jarak ke C1: 13.29
    Jarak ke C2: 13.96

  KALIMANTAN SELATAN:
    Cluster saat ini: 2
    Cluster terdekat: 1
    Jarak ke C0: 40.89
    Jarak ke C1: 8.98
    Jarak ke C2: 15.23

  KEP. RIAU:
    Cluster saat ini: 2
    Cluster terdekat: 1
    Jarak ke C0: 27.44
    Jarak ke C1: 5.21
    Jarak ke C2: 25.87

  BENGKULU:
    Cluster saat ini: 2
    Cluster terdekat: 1
    Jarak ke C0: 38.53
    Jarak ke C1: 8.01
    Jarak ke C2: 17.72

  ACEH:
    Cluster saat ini: 1
    Cluster terdekat: 0
    Jarak ke C0: 16.76
    Jarak ke C1: 16.87
    Jarak ke C2: 37.19

  KA


📁 Menyimpan visualisasi...
✅ Tersimpan: kmeans_3d_visualization.html
⚠️  Untuk export PNG, install: pip install kaleido

🎉 Selesai!
