In [None]:
!pip install plotly -q

import numpy as np
import requests
import plotly.graph_objects as go
from sklearn.datasets import make_blobs

def make_api_request(url, payload, error_message):
    try:
        response = requests.post(url, json=payload, timeout=30)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"{error_message}: {e}")
        return None

def find_optimal_k(url, data, max_k=15, fallback_k=3):
    payload = {"Data": data.tolist(), "MaxK": max_k}
    result = make_api_request(url, payload, "Error finding optimal k")
    if result:
        print(f"API suggests optimal k = {result}")
        return result
    return fallback_k

def run_kmeans_clustering(url, data, k):
    payload = {"Data": data.tolist(), "K": k}
    result = make_api_request(url, payload, "Error running k-means")
    if result:
        return np.array(result['centroids']), np.array(result['assignments'])
    return None, None

def add_data_points_trace(fig, data_points, assignments):
    fig.add_trace(go.Scatter3d(
        x=data_points[:, 0], y=data_points[:, 1], z=data_points[:, 2],
        mode='markers',
        name='Data Points',
        marker=dict(
            color=assignments,
            size=5,
            colorscale='Viridis',
            opacity=0.8,
            line=dict(color='black', width=1)
        )
    ))

def add_centroids_trace(fig, centroids):
    fig.add_trace(go.Scatter3d(
        x=centroids[:, 0], y=centroids[:, 1], z=centroids[:, 2],
        mode='markers',
        name='Centroids',
        marker=dict(color='red', size=10, symbol='x')
    ))

def update_figure_layout(fig, k, title):
    fig.update_layout(
        title=title,
        scene=dict(xaxis_title='Feature 1', yaxis_title='Feature 2', zaxis_title='Feature 3'),
        margin=dict(l=0, r=0, b=0, t=30)
    )

def plot_clustering_results(data_points, assignments, centroids, k, title):
    fig = go.Figure()
    add_data_points_trace(fig, data_points, assignments)
    add_centroids_trace(fig, centroids)
    update_figure_layout(fig, k, title)
    fig.show()

def generate_blob_data(samples=300, centers=10, features=3, random_state=42):
    X, Y_true = make_blobs(n_samples=samples, centers=centers,
                      n_features=features, random_state=random_state)
    return X, Y_true

def initial_clustering(centers):
    X_data, Y_true = generate_blob_data(centers=centers)
    cluster_centers = np.array([X_data[Y_true == i].mean(axis=0) for i in range(centers)])
    plot_clustering_results(X_data, Y_true, cluster_centers,
                            centers, f"Initial Clustering Results (k={centers})")
    return X_data

def run_clustering(centers=10, max_k=15):
    API_BASE_URL = "https://YOUR_NGROK_STRING.ngrok-free.app/clustering"
    X_data = initial_clustering(centers)
    optimal_k = find_optimal_k(f"{API_BASE_URL}/find-optimal-k", X_data, max_k)
    centroids, assignments = run_kmeans_clustering(f"{API_BASE_URL}/run-kmeans", X_data, optimal_k)
    if centroids is not None and assignments is not None:
        plot_clustering_results(X_data, assignments, centroids, optimal_k,
                                f"K-Means Clustering Results (k={optimal_k})")
def run_clustering_tests():
    run_clustering()
    run_clustering(1, 15)
    run_clustering(2, 15)
    run_clustering(3, 15)
    run_clustering(4, 15)
    run_clustering(1, 1)
    run_clustering(1, 2)
    run_clustering(2, 2)
    run_clustering(2, 3)

In [None]:
# Since run-kmeans and find-optimal-k use random generation to initialize first centroid,
# this may lead to unexpected results. In this case, try several times.
run_clustering()

In [None]:
run_clustering(1, 15)

In [None]:
run_clustering(2, 15)

In [None]:
run_clustering(3, 15)

In [7]:
run_clustering(4, 15)

API suggests optimal k = 4


In [None]:
run_clustering(1, 1)

In [9]:
run_clustering(1, 2)

API suggests optimal k = 1


In [None]:
run_clustering(2, 2)

In [None]:
run_clustering(2, 3)