In [None]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.31.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.10.1 (from gradio)
  Downloading gradio_client-1.10.1-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.

In [None]:
import pandas as pd
import numpy as np
import gradio as gr
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans, AgglomerativeClustering
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.decomposition import PCA
from sklearn.datasets import make_blobs  # We'll use synthetic data if download fails

# Load data with fallback to synthetic data
def load_data():
    try:
        # Try loading real credit card data
        url = "https://storage.googleapis.com/download.tensorflow.org/data/creditcard.csv"
        df = pd.read_csv(url)
        print("Successfully loaded real credit card data")
    except Exception as e:
        print(f"Error loading real data: {e}\nUsing synthetic data instead")
        # Generate synthetic data if real data fails
        X, _ = make_blobs(n_samples=1000, centers=3, n_features=10, random_state=42)
        df = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(10)])
        df['Class'] = np.random.randint(0, 2, size=len(df))  # Synthetic target

    df = df.dropna()
    return df

df = load_data()

def analyze(n_clusters=3, method="kmeans", max_samples=1000):
    # Sample data for faster dendrogram rendering
    sample_df = df.sample(min(max_samples, len(df)))

    # Use all features except target (if exists)
    if 'Class' in sample_df.columns:
        X = sample_df.drop(columns=['Class'])
    else:
        X = sample_df

    # Scale data
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Reduce dimensions for visualization
    pca = PCA(n_components=2)
    X_pca = pca.fit_transform(X_scaled)

    # Create figure
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

    if method == "kmeans":
        # K-Means clustering
        kmeans = KMeans(n_clusters=n_clusters, random_state=42)
        clusters = kmeans.fit_predict(X_scaled)

        # Scatter plot
        scatter = ax1.scatter(X_pca[:, 0], X_pca[:, 1], c=clusters, cmap='viridis')
        ax1.set_title(f'K-Means Clustering (K={n_clusters})')
        ax1.set_xlabel('PCA Component 1')
        ax1.set_ylabel('PCA Component 2')
        plt.colorbar(scatter, ax=ax1, label='Cluster')

        # Cluster info
        ax2.axis('off')
        ax2.text(0.5, 0.5,
                f"K-Means Results\n\nClusters: {n_clusters}\n"
                f"PCA Explained Variance: {pca.explained_variance_ratio_.sum():.2f}\n"
                f"Features used: {len(X.columns)}",
                ha='center', va='center')

    else:
        # Hierarchical clustering
        linked = linkage(X_scaled, 'ward')

        # Dendrogram
        dendrogram(linked, orientation='left', ax=ax1)
        ax1.set_title('Hierarchical Clustering Dendrogram')
        ax1.set_xlabel('Distance')
        ax1.set_ylabel('Samples')

        # PCA visualization with cluster colors
        clusters = AgglomerativeClustering(n_clusters=n_clusters).fit_predict(X_scaled)
        scatter = ax2.scatter(X_pca[:, 0], X_pca[:, 1], c=clusters, cmap='viridis')
        ax2.set_title(f'Agglomerative Clustering (K={n_clusters})')
        ax2.set_xlabel('PCA Component 1')
        ax2.set_ylabel('PCA Component 2')
        plt.colorbar(scatter, ax=ax2, label='Cluster')

    plt.tight_layout()
    return fig

# Gradio interface
with gr.Blocks(title="Credit Card Clustering") as demo:
    gr.Markdown("# Customer Segmentation Tool")
    gr.Markdown("Cluster users based on their financial behavior patterns")

    with gr.Row():
        with gr.Column():
            algorithm = gr.Radio(
                choices=["kmeans", "hierarchical"],
                value="kmeans",
                label="Clustering Algorithm"
            )
            clusters = gr.Slider(
                minimum=2,
                maximum=10,
                step=1,
                value=3,
                label="Number of Clusters"
            )
            samples = gr.Slider(
                minimum=100,
                maximum=2000,
                step=100,
                value=500,
                label="Max Samples (for performance)"
            )
            submit_btn = gr.Button("Analyze")

        with gr.Column():
            plot = gr.Plot(label="Clustering Results")

    submit_btn.click(
        fn=analyze,
        inputs=[clusters, algorithm, samples],
        outputs=plot
    )

    gr.Examples(
        examples=[
            [3, "kmeans", 500],
            [4, "hierarchical", 800],
            [5, "kmeans", 1000]
        ],
        inputs=[clusters, algorithm, samples]
    )

demo.launch()

Successfully loaded real credit card data
It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d0feef899634973194.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


