nnnn

In [None]:
# ============================================================
# Clasificación de Pingüinos Antárticos - KNN desde cero
# ============================================================

import numpy as np
import pandas as pd
from abc import ABC, abstractmethod
from collections import Counter
import seaborn as sns

# ============================================================
# 1. Clase abstracta KNNBase
# ============================================================

class KNNBase(ABC):
    """Clase abstracta que define la estructura del algoritmo KNN."""
    
    @abstractmethod
    def fit(self, X, y):
        pass
    
    @abstractmethod
    def distance(self, p1, p2):
        pass
    
    @abstractmethod
    def predict(self, X_new, k=3):
        pass

# ============================================================
# 2. Clase concreta KNNClassifier
# ============================================================

class KNNClassifier(KNNBase):
    def __init__(self):
        self.__X_train = None
        self.__y_train = None
    # -------------------------
    # Encapsulamiento
    # -------------------------
    def get_X_train(self):
        return self.__X_train
    
    def get_y_train(self):
        return self.__y_train
    
    def set_X_train(self, X):
        self.__X_train = X
    
    def set_y_train(self, y):
        self.__y_train = y
    
    # -------------------------
    # Métodos requeridos
    # -------------------------
    def fit(self, X, y):
        """Almacena los datos de entrenamiento."""
        self.__X_train = np.array(X)
        self.__y_train = np.array(y)
    
    def distance(self, p1, p2):
        """Calcula la distancia euclidiana entre dos puntos."""
        return np.sqrt(np.sum((p1 - p2)**2))
    
    def predict(self, X_new, k=3):
        """Predice la clase de cada muestra nueva."""
        X_new = np.array(X_new)
        predictions = []
        
        for x in X_new:
            # Calcular distancias a todos los puntos de entrenamiento
            distances = [self.distance(x, x_train) for x_train in self.__X_train]
            
            # Obtener los índices de los k vecinos más cercanos
            k_idx = np.argsort(distances)[:k]
            
            # Obtener las etiquetas correspondientes
            k_labels = [self.__y_train[i] for i in k_idx]
            
            # Votación por mayoría
            label = Counter(k_labels).most_common(1)[0][0]
            predictions.append(label)
        
        return predictions
    
    # -------------------------
    # Sobrecarga de operadores
    # -------------------------
    def __eq__(self, other):
        """Dos ejemplares son iguales si sus medidas son idénticas."""
        if not isinstance(other, KNNClassifier):
            return False
        return np.array_equal(self.__X_train, other.get_X_train()) and np.array_equal(self.__y_train, other.get_y_train())
    
    def __add__(self, other):
        """Combina dos conjuntos de entrenamiento."""
        if not isinstance(other, KNNClassifier):
            raise TypeError("Solo se pueden combinar objetos KNNClassifier.")
        
        X_combined = np.vstack((self.__X_train, other.get_X_train()))
        y_combined = np.hstack((self.__y_train, other.get_y_train()))
        
        new_model = KNNClassifier()
        new_model.fit(X_combined, y_combined)
        return new_model
    
    def __repr__(self):
        return f"KNNClassifier(entrenado={self.__X_train is not None}, muestras={len(self.__X_train) if self.__X_train is not None else 0})"

# ============================================================
# 3. Cargar y preparar el dataset de pingüinos
# ============================================================

penguins = sns.load_dataset("penguins").dropna()

# Seleccionamos las variables numéricas más representativas
features = ["bill_length_mm", "bill_depth_mm", "flipper_length_mm", "body_mass_g"]
X = penguins[features].values
y = penguins["species"].values

# Dividimos los datos en entrenamiento y prueba
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ============================================================
# 4. Entrenar y probar el modelo con diferentes valores de k
# ============================================================

knn = KNNClassifier()
knn.fit(X_train, y_train)

k_values = [1, 3, 5]
results = []

for k in k_values:
    preds = knn.predict(X_test[:10], k=k)
    results.append(pd.DataFrame({
        "Ejemplo": range(1, 11),
        "Predicción (k={})".format(k): preds
    }))
# Unimos resultados por columnas
tabla_resultados = pd.concat(results, axis=1)
tabla_resultados


Unnamed: 0,Ejemplo,Predicción (k=1),Ejemplo.1,Predicción (k=3),Ejemplo.2,Predicción (k=5)
0,1,Chinstrap,1,Chinstrap,1,Chinstrap
1,2,Gentoo,2,Gentoo,2,Gentoo
2,3,Adelie,3,Adelie,3,Adelie
3,4,Chinstrap,4,Chinstrap,4,Chinstrap
4,5,Adelie,5,Chinstrap,5,Chinstrap
5,6,Gentoo,6,Gentoo,6,Gentoo
6,7,Gentoo,7,Gentoo,7,Gentoo
7,8,Chinstrap,8,Adelie,8,Adelie
8,9,Chinstrap,9,Chinstrap,9,Chinstrap
9,10,Chinstrap,10,Chinstrap,10,Chinstrap
