Unsupervised learning algorithms


K means clustering

In [None]:
!pip install gradio


Collecting gradio
  Downloading gradio-5.25.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import gradio as gr
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_blobs

# Generate synthetic transaction data
def generate_data():
    np.random.seed(42)
    data, _ = make_blobs(n_samples=300, centers=3, cluster_std=1.0, random_state=42)
    scaler = StandardScaler()
    return scaler.fit_transform(data)

def detect_anomalies(threshold_percentile):
    data = generate_data()
    kmeans = KMeans(n_clusters=3, random_state=42)
    kmeans.fit(data)
    labels = kmeans.labels_
    centroids = kmeans.cluster_centers_

    distances = np.linalg.norm(data - centroids[labels], axis=1)
    threshold = np.percentile(distances, threshold_percentile)  # Mark top x% as anomalies
    anomalies = data[distances > threshold]

    # Plot results
    plt.figure()
    plt.scatter(data[:, 0], data[:, 1], c=labels, cmap='viridis', alpha=0.5)
    plt.scatter(anomalies[:, 0], anomalies[:, 1], c='red', marker='x', label='Anomalies')
    plt.scatter(centroids[:, 0], centroids[:, 1], c='black', marker='o', s=200, label='Centroids')
    plt.legend()
    plt.title('K-Means Anomaly Detection')
    plt.savefig("plot.png")
    return "plot.png"

# Create Gradio interface
demo = gr.Interface(
    fn=detect_anomalies,
    inputs=gr.Slider(90, 99, step=1, label="Anomaly Threshold Percentile"),
    outputs=gr.Image(),
    title="K-Means Anomaly Detection",
    description="Adjust the threshold percentile to detect anomalies in synthetic transaction data."
)

demo.launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://4bbf9f8b53d3db24e1.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




Hierarchical clustering


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import gradio as gr
from sklearn.cluster import AgglomerativeClustering
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_blobs
from scipy.cluster.hierarchy import dendrogram, linkage

# Generate synthetic transaction data
def generate_data():
    np.random.seed(42)
    data, _ = make_blobs(n_samples=300, centers=3, cluster_std=1.0, random_state=42)
    scaler = StandardScaler()
    return scaler.fit_transform(data)

def hierarchical_clustering(n_clusters):
    data = generate_data()
    model = AgglomerativeClustering(n_clusters=n_clusters)
    labels = model.fit_predict(data)

    # Plot the clusters
    plt.figure()
    plt.scatter(data[:, 0], data[:, 1], c=labels, cmap='viridis', alpha=0.5)
    plt.title(f'Hierarchical Clustering (n_clusters={n_clusters})')
    plt.savefig("plot.png")
    return "plot.png"

def plot_dendrogram():
    data = generate_data()
    linked = linkage(data, 'ward')

    plt.figure(figsize=(10, 5))
    dendrogram(linked)
    plt.title("Dendrogram")
    plt.xlabel("Data Points")
    plt.ylabel("Distance")
    plt.savefig("dendrogram.png")
    return "dendrogram.png"

# Create Gradio interface
demo = gr.Interface(
    fn=hierarchical_clustering,
    inputs=gr.Slider(2, 10, step=1, label="Number of Clusters"),
    outputs=gr.Image(),
    title="Hierarchical Clustering",
    description="Adjust the number of clusters to visualize hierarchical clustering."
)

demo2 = gr.Interface(
    fn=plot_dendrogram,
    inputs=[],
    outputs=gr.Image(),
    title="Dendrogram",
    description="View the hierarchical clustering dendrogram."
)

gr.TabbedInterface([demo, demo2], ["Clustering", "Dendrogram"]).launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://61aeabc7efad57f4b8.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


