In [None]:
pip install gradio pandas numpy matplotlib scikit-learn

Collecting gradio
  Downloading gradio-5.23.1-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 

In [None]:
!pip install gradio pandas numpy matplotlib scikit-learn




In [None]:
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from io import StringIO

def kmeans_clustering(file, n_clusters):
    # Read CSV file
    data = pd.read_csv(file.name)

    # Use only numerical columns for clustering
    numerical_data = data.select_dtypes(include=[np.number])

    # Check if dataset has at least 2 numerical columns
    if numerical_data.shape[1] < 2:
        return "Dataset must have at least 2 numerical columns."

    # Apply K-Means clustering
    kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
    clusters = kmeans.fit_predict(numerical_data)

    # Add cluster labels to the dataset
    data["Cluster"] = clusters

    # Plot the clustered data (first two features)
    plt.figure(figsize=(6, 4))
    plt.scatter(numerical_data.iloc[:, 0], numerical_data.iloc[:, 1], c=clusters, cmap='viridis', alpha=0.6)
    plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], c='red', marker='x', label="Centroids")
    plt.xlabel(numerical_data.columns[0])
    plt.ylabel(numerical_data.columns[1])
    plt.legend()
    plt.title("K-Means Clustering")

    # Save the plot
    plot_path = "kmeans_plot.png"
    plt.savefig(plot_path)
    plt.close()

    return data.head(), plot_path

# Gradio UI
iface = gr.Interface(
    fn=kmeans_clustering,
    inputs=[
        gr.File(label="Upload CSV File"),
        gr.Slider(minimum=2, maximum=10, value=3, step=1, label="Number of Clusters")
    ],
    outputs=[
        gr.DataFrame(label="Clustered Data Sample"),
        gr.Image(label="Cluster Visualization")
    ],
    title="K-Means Clustering with AI",
    description="Upload a CSV file, select the number of clusters, and visualize the clustering results."
)

iface.launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://6e0cc93e3766f78371.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
!pip install gradio pandas numpy matplotlib scipy scikit-learn




In [None]:
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.cluster.hierarchy as sch
from sklearn.cluster import AgglomerativeClustering

def hierarchical_clustering(file, n_clusters, linkage):
    # Read CSV file
    data = pd.read_csv(file.name)

    # Select only numerical columns
    numerical_data = data.select_dtypes(include=[np.number])

    # Ensure there are at least 2 numerical columns
    if numerical_data.shape[1] < 2:
        return "Dataset must have at least 2 numerical columns."

    # Perform Hierarchical Clustering
    hc = AgglomerativeClustering(n_clusters=n_clusters, affinity='euclidean', linkage=linkage)
    clusters = hc.fit_predict(numerical_data)

    # Add cluster labels to the dataset
    data["Cluster"] = clusters

    # Plot Dendrogram
    plt.figure(figsize=(6, 4))
    sch.dendrogram(sch.linkage(numerical_data, method=linkage))
    plt.title("Dendrogram")
    plt.xlabel("Data Points")
    plt.ylabel("Distance")

    dendrogram_path = "dendrogram.png"
    plt.savefig(dendrogram_path)
    plt.close()

    # Scatter Plot (using first two features)
    plt.figure(figsize=(6, 4))
    plt.scatter(numerical_data.iloc[:, 0], numerical_data.iloc[:, 1], c=clusters, cmap='rainbow', alpha=0.6)
    plt.xlabel(numerical_data.columns[0])
    plt.ylabel(numerical_data.columns[1])
    plt.title("Hierarchical Clustering")

    scatter_path = "hierarchical_clusters.png"
    plt.savefig(scatter_path)
    plt.close()

    return data.head(), dendrogram_path, scatter_path

# Gradio UI
iface = gr.Interface(
    fn=hierarchical_clustering,
    inputs=[
        gr.File(label="Upload CSV File"),
        gr.Slider(minimum=2, maximum=10, value=3, step=1, label="Number of Clusters"),
        gr.Radio(choices=["ward", "complete", "average", "single"], value="ward", label="Linkage Method")
    ],
    outputs=[
        gr.DataFrame(label="Clustered Data Sample"),
        gr.Image(label="Dendrogram"),
        gr.Image(label="Cluster Visualization")
    ],
    title="Hierarchical Clustering AI",
    description="Upload a CSV file, choose the number of clusters and linkage method, and visualize hierarchical clustering results."
)

iface.launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://f7819f8297d8411616.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
import pandas as pd

# Create sample dataset
data = {
    "Feature1": [1.1, 3.3, 5.5, 7.7, 9.9],
    "Feature2": [2.2, 4.4, 6.6, 8.8, 10.1]
}

df = pd.DataFrame(data)

# Save as CSV
df.to_csv("data.csv", index=False)

print("CSV file 'data.csv' created successfully!")


CSV file 'data.csv' created successfully!
