<a href="https://colab.research.google.com/github/Naresh-0018-glitch/2023502009-generative-ai/blob/main/unsupervised_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install required packages
!pip install -q gradio scikit-learn matplotlib pandas

import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import numpy as np

# Load synthetic Mall Customers dataset
def load_data():
    data = {
        'CustomerID': range(1, 201),
        'Gender': ['Male', 'Female']*100,
        'Age': np.random.randint(18, 70, size=200),
        'Annual Income (k$)': np.random.randint(15, 137, size=200),
        'Spending Score (1-100)': np.random.randint(1, 101, size=200)
    }
    df = pd.DataFrame(data)
    df['Gender'] = df['Gender'].map({'Male':0, 'Female':1})  # encode gender
    return df

df = load_data()

def kmeans_cluster(n_clusters):
    X = df[['Gender', 'Age', 'Annual Income (k$)', 'Spending Score (1-100)']]

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    clusters = kmeans.fit_predict(X_scaled)

    df_result = df.copy()
    df_result['Cluster'] = clusters

    # PCA for visualization
    pca = PCA(n_components=2)
    components = pca.fit_transform(X_scaled)

    fig, ax = plt.subplots(figsize=(7,5))
    scatter = ax.scatter(components[:,0], components[:,1], c=clusters, cmap='tab10')
    legend1 = ax.legend(*scatter.legend_elements(), title="Clusters")
    ax.add_artist(legend1)
    ax.set_xlabel("PCA Component 1")
    ax.set_ylabel("PCA Component 2")
    ax.set_title(f"K-Means Clustering with {n_clusters} Clusters")
    plt.tight_layout()

    cluster_summary = df_result.groupby('Cluster').agg({
        'Age': ['mean', 'count'],
        'Annual Income (k$)': 'mean',
        'Spending Score (1-100)': 'mean'
    }).round(1)
    cluster_summary.columns = ['Age Mean', 'Count', 'Income Mean', 'Spending Score Mean']
    cluster_summary = cluster_summary.reset_index()

    return cluster_summary, fig

iface = gr.Interface(
    fn=kmeans_cluster,
    inputs=gr.Slider(2, 10, step=1, label="Number of Clusters"),
    outputs=[gr.Dataframe(label="Cluster Summary"), gr.Plot(label="Cluster Visualization")],
    title="Customer Segmentation with K-Means Clustering",
    description="Choose the number of clusters to segment customers based on demographics and spending."
)

iface.launch(share=True, debug=True)

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.2/54.2 MB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m323.1/323.1 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.2/95.2 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.5/11.5 MB[0m [31m33.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.5/62.5 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hColab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://c54035da4f5f7f5ab0.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, ru