In [5]:
import numpy as np
import pandas as pd
import plotly.express as px

# -----------------------------
# Create Dummy Data
# -----------------------------

np.random.seed(42)

classes = ["B", "C", "M", "NF", "X"]
active_regions = np.random.choice(range(4200, 5200), size=50, replace=False)

data = []

for ar in active_regions:
    for cls in classes:
        if cls == "NF":
            count = np.random.randint(120, 240)
        elif cls == "C":
            count = np.random.randint(20, 180)
        elif cls == "M":
            count = np.random.randint(0, 70)
        elif cls == "B":
            count = np.random.randint(0, 40)
        elif cls == "X":
            count = np.random.choice([0]*9 + [np.random.randint(5, 100)])  # Rare X events
        
        data.append([cls, ar, count])

df = pd.DataFrame(data, columns=["Class", "ActiveRegion", "Count"])

# -----------------------------
# Pivot for Heatmap
# -----------------------------

pivot = (
    df.pivot_table(index="Class", columns="ActiveRegion", values="Count", fill_value=0)
      .reindex(["B", "C", "M", "NF", "X"])
)

# -----------------------------
# Plotly Heatmap
# -----------------------------

fig = px.imshow(
    pivot.values,
    x=pivot.columns.astype(str),
    y=pivot.index,
    color_continuous_scale="Viridis",
    aspect="auto",
    labels=dict(x="Active Region (Top 50)", y="Class", color="Count"),
    title="Class vs Active Region (Top 50 Most Active)",
)

fig.update_layout(
    xaxis=dict(tickangle=-90),
    margin=dict(l=80, r=40, t=60, b=120),
)

fig.show()

In [7]:
import plotly.io as pio

pio.write_html(fig,"testfig.html",full_html=False)

In [4]:
import numpy as np
import pandas as pd
import plotly.express as px

from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

np.random.seed(7)

# -----------------------------
# 1) Create clustered dummy data
# -----------------------------
classes = ["B", "C", "M", "NF", "X"]
class_order = ["B", "C", "M", "NF", "X"]

n_regions = 50
n_true_clusters = 5
regions_per_cluster = n_regions // n_true_clusters

base_ids = np.random.choice(np.arange(4200, 5200), size=n_regions, replace=False)
np.random.shuffle(base_ids)

cluster_profiles = {
    0: {"B": 10, "C": 40,  "M": 5,  "NF": 160, "X": 1},  # NF heavy
    1: {"B": 5,  "C": 120, "M": 25, "NF": 90,  "X": 2},  # C + some M
    2: {"B": 25, "C": 60,  "M": 40, "NF": 110, "X": 6},  # M/X spikier
    3: {"B": 40, "C": 30,  "M": 3,  "NF": 70,  "X": 0},  # quieter
    4: {"B": 2,  "C": 20,  "M": 10, "NF": 210, "X": 3},  # very NF heavy
}

rows = []
for k in range(n_true_clusters):
    cluster_region_ids = base_ids[k*regions_per_cluster:(k+1)*regions_per_cluster]
    prof = cluster_profiles[k]
    for ar in cluster_region_ids:
        for cls in classes:
            lam = prof[cls]
            if cls == "X":
                count = 0 if np.random.rand() < 0.8 else np.random.poisson(max(lam, 1))
            else:
                count = np.random.poisson(max(lam, 1))
            count = int(max(0, count + np.random.randint(-3, 4)))
            rows.append([cls, ar, count])

df = pd.DataFrame(rows, columns=["Class", "ActiveRegion", "Count"])

# -----------------------------
# 2) Build "region vectors" and run K-means
#    Each region = [B, C, M, NF, X]
# -----------------------------
region_matrix = (
    df.pivot_table(index="ActiveRegion", columns="Class", values="Count", fill_value=0)
      .reindex(columns=class_order)
)

# Scale features so NF doesn't dominate purely by magnitude
X = StandardScaler().fit_transform(region_matrix.values)

k = 5  # choose the number of clusters you want KMeans to find
kmeans = KMeans(n_clusters=k, n_init=20, random_state=7)
labels = kmeans.fit_predict(X)

region_clusters = pd.DataFrame({
    "ActiveRegion": region_matrix.index.astype(int),
    "KMeansCluster": labels
}).sort_values(["KMeansCluster", "ActiveRegion"])

# Order regions by discovered clusters (and ID within cluster)
region_order = region_clusters["ActiveRegion"].tolist()

# -----------------------------
# 3) Heatmap reordered by K-means clusters
# -----------------------------
pivot = (
    df.pivot_table(index="Class", columns="ActiveRegion", values="Count", fill_value=0)
      .reindex(class_order)
      .reindex(columns=region_order)
)

fig = px.imshow(
    pivot.values,
    x=[str(c) for c in pivot.columns],
    y=pivot.index,
    color_continuous_scale="Viridis",
    aspect="auto",
    labels=dict(x="Active Region (ordered by K-means)", y="Class", color="Count"),
    title=f"Class vs Active Region — Ordered by K-means (k={k})",
)

fig.update_layout(
    xaxis=dict(tickangle=-90),
    margin=dict(l=80, r=40, t=60, b=130),
)

fig.show()

# Optional: inspect cluster membership
print(region_clusters.head(15))

    ActiveRegion  KMeansCluster
3           4260              0
7           4325              0
9           4370              0
12          4471              0
15          4516              0
17          4539              0
19          4626              0
32          4915              0
36          4976              0
43          5086              0
44          5089              0
2           4225              1
13          4477              1
22          4687              1
24          4769              1
