# Análisis de Clustering: K-means y Jerárquico

Este notebook implementa dos modelos de clustering (agrupamiento) sobre el dataset *Mall Customer Segmentation*: **K-means** y **clustering jerárquico**. Se incluyen análisis exploratorio, entrenamiento, evaluación y visualización de resultados.

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
from scipy.cluster.hierarchy import dendrogram, linkage, fcluster


In [None]:

# Cargar el dataset
df = pd.read_csv("Mall_Customers.csv")
df.head()


In [None]:

# Información básica
df.info()


In [None]:

# Eliminar columnas irrelevantes y escalar datos
df_clean = df.drop(columns=['CustomerID', 'Gender'])
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df_clean)


## Modelo 1: K-means Clustering

In [None]:

kmeans = KMeans(n_clusters=5, random_state=42)
clusters_kmeans = kmeans.fit_predict(df_scaled)
df['Cluster_KMeans'] = clusters_kmeans


In [None]:

# Evaluación K-means
print("Silhouette Score:", silhouette_score(df_scaled, clusters_kmeans))
print("Calinski-Harabasz Index:", calinski_harabasz_score(df_scaled, clusters_kmeans))
print("Davies-Bouldin Index:", davies_bouldin_score(df_scaled, clusters_kmeans))


In [None]:

# Visualización K-means
plt.figure(figsize=(8, 6))
sns.scatterplot(x='Annual Income (k$)', y='Spending Score (1-100)', hue='Cluster_KMeans', data=df, palette='viridis')
plt.title("K-means Clustering")
plt.show()


## Modelo 2: Clustering Jerárquico

In [None]:

linked = linkage(df_scaled, method='ward')

plt.figure(figsize=(12, 6))
dendrogram(linked, orientation='top', distance_sort='descending', show_leaf_counts=False)
plt.title("Dendrograma")
plt.xlabel("Clientes")
plt.ylabel("Distancia Euclidiana")
plt.show()


In [None]:

# Crear etiquetas de cluster
cluster_labels_hc = fcluster(linked, 5, criterion='maxclust')
df['Cluster_HC'] = cluster_labels_hc


In [None]:

# Evaluación Clustering Jerárquico
print("Silhouette Score (HC):", silhouette_score(df_scaled, cluster_labels_hc))
print("Calinski-Harabasz Index (HC):", calinski_harabasz_score(df_scaled, cluster_labels_hc))
print("Davies-Bouldin Index (HC):", davies_bouldin_score(df_scaled, cluster_labels_hc))


In [None]:

# Visualización Clustering Jerárquico
plt.figure(figsize=(8, 6))
sns.scatterplot(x='Annual Income (k$)', y='Spending Score (1-100)', hue='Cluster_HC', data=df, palette='Set2')
plt.title("Clustering Jerárquico")
plt.show()
