In [6]:
# ============================
# IMPORTAR LIBRER√çAS NECESARIAS
# ============================

import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.models import Model

In [7]:
# ============================
# DEFINIR RUTA DEL DATASET
# ============================

path = '/kaggle/input/weather-dataset/dataset/'


In [8]:
# ============================
# LISTAR ARCHIVOS DE IM√ÅGENES
# ============================

image_names = os.listdir(path)
print(f"Total de carpetas encontradas: {len(image_names)}")

Total de carpetas encontradas: 11


In [9]:
# ============================
# CARGAR Y PREPROCESAR IM√ÅGENES
# ============================

# Lista para almacenar todas las im√°genes
images = []
image_paths = []

# Recorrer cada subcarpeta ('dew', 'frost', 'rime')
for folder in os.listdir(path):
    folder_path = os.path.join(path, folder)
    
    if os.path.isdir(folder_path):
        # Recorrer cada imagen dentro de la subcarpeta
        for img_file in os.listdir(folder_path):
            img_path = os.path.join(folder_path, img_file)
            
            # Cargar imagen solo si es un archivo
            img = image.load_img(img_path, target_size=(224, 224))
            img_array = image.img_to_array(img)
            img_array = np.expand_dims(img_array, axis=0)
            img_array = preprocess_input(img_array)
            
            images.append(img_array)
            image_paths.append(img_path)  # Guardar tambi√©n el path de la imagen

# Convertir la lista de im√°genes a array numpy
images = np.vstack(images)

print(f"Total im√°genes cargadas: {len(images)}")

Total im√°genes cargadas: 6862


In [1]:
# ============================
# CARGAR MODELO VGG16
# ============================

# Modelo VGG16 preentrenado sin la capa de clasificaci√≥n final
base_model = VGG16(weights=None)
model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc1').output)

NameError: name 'VGG16' is not defined

In [None]:
# ============================
# EXTRAER VECTORES DE CARACTER√çSTICAS
# ============================

features = model.predict(images, batch_size=32, verbose=1)
print(f"Forma de los vectores de caracter√≠sticas: {features.shape}")

In [None]:
# ============================
# REDUCCI√ìN DE DIMENSIONALIDAD
# ============================

# Aplicar PCA para reducir dimensiones a 50 componentes
pca = PCA(n_components=50)
features_pca = pca.fit_transform(features)

print(f"Forma despu√©s de PCA: {features_pca.shape}")

In [None]:
# ============================
# APLICAR CLUSTERING K-MEANS
# ============================

# Definir el n√∫mero de clusters
n_clusters = 11

# Crear y ajustar modelo de clustering
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
clusters = kmeans.fit_predict(features_pca)

print(f"Etiquetas de clustering asignadas: {np.unique(clusters)}")

In [None]:
# ============================
# EVALUAR CALIDAD DEL CLUSTERING
# ============================

silhouette_avg = silhouette_score(features_pca, clusters)
print(f"Coeficiente de silueta (Silhouette Score): {silhouette_avg:.4f}")

In [None]:
# ============================
# VISUALIZAR EJEMPLOS DE CLUSTERS
# ============================

def show_cluster_examples(cluster_label, n_examples=5):
    """
    Muestra ejemplos de im√°genes de un cluster espec√≠fico.
    
    Args:
    - cluster_label: N√∫mero de cluster que quieres visualizar.
    - n_examples: Cantidad de im√°genes a mostrar.
    """
    # Encontrar los √≠ndices de las im√°genes que pertenecen al cluster solicitado
    indices = np.where(clusters == cluster_label)[0]
    
    plt.figure(figsize=(15, 3))
    
    for i, idx in enumerate(indices[:n_examples]):
        img_path = image_paths[idx]  # üõ†Ô∏è CORREGIDO: usar la lista real de rutas
        img = image.load_img(img_path, target_size=(224, 224))
        
        plt.subplot(1, n_examples, i + 1)
        plt.imshow(img)
        plt.axis('off')
    
    plt.suptitle(f"Ejemplos del Cluster {cluster_label}", fontsize=16)
    plt.show()

In [None]:
# ============================
# MOSTRAR EJEMPLOS DE CLUSTERS
# ============================

# Cambia el n√∫mero para ver otros clusters (0,1,2,...)
show_cluster_examples(cluster_label=1, n_examples=5)