In [None]:
# ===============================
# 1. IMPORTACIONES NECESARIAS
# ===============================
import pandas as pd
import numpy as np
from abc import ABC, abstractmethod
from collections import Counter


In [None]:
# ===============================
# 2. CLASE ABSTRACTA BASE
# ===============================
class KNNBase(ABC):
    @abstractmethod
    def fit(self, X, y):
        pass

    @abstractmethod
    def distance(self, p1, p2):
        pass

    @abstractmethod
    def predict(self, X_new, k=3):
        pass


In [None]:
# =============================================
# 3. CLASE CONCRETA: KNNClassifier
# Encapsulamiento, Herencia, Polimorfismo,
# Sobrecarga de operadores __eq__ y __add__
# =============================================
class KNNClassifier(KNNBase):
    def __init__(self):
        self.__X_train = None
        self.__y_train = None

    def fit(self, X, y):
        self.__X_train = X
        self.__y_train = y

    def distance(self, p1, p2):
        return np.linalg.norm(p1 - p2)

    def predict(self, X_new, k=3):
        predictions = []
        for x in X_new:
            distances = [self.distance(x, x_train) for x_train in self.__X_train]
            k_indices = np.argsort(distances)[:k]
            k_labels = [self.__y_train[i] for i in k_indices]
            most_common = Counter(k_labels).most_common(1)[0][0]
            predictions.append(most_common)
        return predictions

    def __repr__(self):
        return f"KNNClassifier(model trained: {self.__X_train is not None})"

    def __eq__(self, other):
        return np.array_equal(self.__X_train, other.__X_train) and np.array_equal(self.__y_train, other.__y_train)

    def __add__(self, other):
        new_X = np.vstack((self.__X_train, other.__X_train))
        new_y = np.hstack((self.__y_train, other.__y_train))
        combined = KNNClassifier()
        combined.fit(new_X, new_y)
        return combined


In [None]:
# ===============================
# 4. CARGAR Y PREPARAR EL DATASET
# ===============================
df = pd.read_csv("pinguins.csv")
df = df.dropna()
df['Species'] = df['Species'].str.extract(r'(Adelie|Chinstrap|Gentoo)', expand=False)

X = df[['Culmen Length (mm)', 'Culmen Depth (mm)', 'Flipper Length (mm)', 'Body Mass (g)']].values
y = df['Species'].values


In [None]:
# ===============================
# 5. ENTRENAR Y PROBAR EL MODELO
# ===============================
knn = KNNClassifier()
knn.fit(X, y)

X_test = X[:10]
y_real = y[:10]

resultados = {
    "k=1": knn.predict(X_test, k=1),
    "k=3": knn.predict(X_test, k=3),
    "k=5": knn.predict(X_test, k=5),
    "Real": y_real
}

# Mostrar resultados
import pandas as pd
pd.DataFrame(resultados)


In [None]:

from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import numpy as np

def plot_neighbors(self, x_new, k):
    '''
    Visualiza los k vecinos más cercanos a un nuevo ejemplar x_new
    después de proyectar los datos a 2D con PCA.

    Parámetros:
    - x_new: nuevo ejemplar (vector 1D)
    - k: número de vecinos más cercanos a mostrar
    '''
    # Calcular distancias del nuevo punto a todos los puntos de entrenamiento
    distances = [np.linalg.norm(x - x_new) for x in self.X_train]
    neighbors_idx = np.argsort(distances)[:k]

    # Obtener datos de entrenamiento + nuevo punto
    all_data = np.vstack([self.X_train, x_new])
    pca = PCA(n_components=2)
    all_data_2d = pca.fit_transform(all_data)

    # Separar datos proyectados
    X_train_2d = all_data_2d[:-1]
    x_new_2d = all_data_2d[-1]

    # Graficar todos los puntos del conjunto de entrenamiento
    species = np.array(self.y_train)
    unique_species = list(set(species))
    colors = ['blue', 'green', 'orange']
    color_map = {sp: colors[i] for i, sp in enumerate(unique_species)}

    plt.figure(figsize=(8, 6))
    for sp in unique_species:
        idx = np.where(species == sp)
        plt.scatter(X_train_2d[idx, 0], X_train_2d[idx, 1], 
                    label=f"Entrenamiento: {sp}", 
                    alpha=0.6, 
                    color=color_map[sp])

    # Graficar los vecinos más cercanos
    for idx in neighbors_idx:
        plt.scatter(X_train_2d[idx, 0], X_train_2d[idx, 1], 
                    edgecolors='red', 
                    facecolors='none', 
                    s=200, 
                    linewidths=2, 
                    label="Vecino más cercano" if idx == neighbors_idx[0] else "")

    # Graficar el nuevo punto
    plt.scatter(x_new_2d[0], x_new_2d[1], 
                c='black', 
                s=100, 
                marker='X', 
                label='Nuevo ejemplar')

    plt.title(f"{k} Vecinos más cercanos (PCA 2D)")
    plt.xlabel("PCA 1")
    plt.ylabel("PCA 2")
    plt.legend()
    plt.grid(True)
    plt.show()
