<a href="https://colab.research.google.com/github/HectorDelgadoJ/Laboratorio-7/blob/main/Laboratorio7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Laboratorio 7

En esta práctica, implementaremos y validaremos el clasificador K-Nearest Neighbors (KNN), un algoritmo de clasificación basado en la proximidad de puntos en el espacio de características. Se elegirá el valor óptimo de K mediante la partición de Hold-Out (70/30 estratificado) y se evaluará el desempeño del clasificador utilizando tres métodos de validación: Hold-Out estratificado, 10-Fold Cross-Validation estratificado y Leave-One-Out. Al final, mediremos la precisión (accuracy) y analizaremos la matriz de confusión para evaluar la calidad del modelo en tres datasets distintos.

## * Paso 1: Importar bibliotecas y cargar los datasets



In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris, load_wine, load_breast_cancer
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score, LeaveOneOut
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix


## * Paso 2: Cargar los datos y explorar

In [None]:
datasets = {
    'Iris': load_iris(),
    'Wine': load_wine(),
    'Breast Cancer': load_breast_cancer()
}

# Verificamos los primeros datos de cada dataset
for name, dataset in datasets.items():
    print(f"{name} dataset shape: {dataset.data.shape}")
    print(f"Features: {dataset.feature_names}")
    print(f"Classes: {dataset.target_names}\n")


## * Paso 3: Implementar el modelo KNN y seleccionar el valor de K

In [None]:
def choose_best_k(X, y, max_k=20):
    accuracies = []
    for k in range(1, max_k + 1):
        knn = KNeighborsClassifier(n_neighbors=k)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
        knn.fit(X_train, y_train)
        y_pred = knn.predict(X_test)
        accuracies.append(accuracy_score(y_test, y_pred))
    best_k = accuracies.index(max(accuracies)) + 1
    print(f"Best K: {best_k} with Accuracy: {max(accuracies):.2f}")
    return best_k


## * Paso 4: Validar con los métodos de validación

In [None]:
def validate_knn(X, y, k):
    knn = KNeighborsClassifier(n_neighbors=k)

    # Hold-Out 70/30
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    print("Hold-Out Accuracy:", accuracy_score(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

    # 10-Fold Cross-Validation
    skf = StratifiedKFold(n_splits=10)
    cv_scores = cross_val_score(knn, X, y, cv=skf)
    print("10-Fold Cross-Validation Accuracy:", np.mean(cv_scores))

    # Leave-One-Out
    loo = LeaveOneOut()
    loo_scores = cross_val_score(knn, X, y, cv=loo)
    print("Leave-One-Out Accuracy:", np.mean(loo_scores))


## *Paso 5: Ejecutar el código completo

In [None]:
for name, dataset in datasets.items():
    print(f"\n--- Validating on {name} dataset ---")
    X, y = dataset.data, dataset.target
    best_k = choose_best_k(X, y)
    validate_knn(X, y, best_k)


El código completo qudaria:

In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris, load_wine, load_breast_cancer
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score, LeaveOneOut
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

# Datasets
datasets = {
    'Iris': load_iris(),
    'Wine': load_wine(),
    'Breast Cancer': load_breast_cancer()
}

# Function to choose the best K
def choose_best_k(X, y, max_k=20):
    accuracies = []
    for k in range(1, max_k + 1):
        knn = KNeighborsClassifier(n_neighbors=k)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
        knn.fit(X_train, y_train)
        y_pred = knn.predict(X_test)
        accuracies.append(accuracy_score(y_test, y_pred))
    best_k = accuracies.index(max(accuracies)) + 1
    print(f"Best K: {best_k} with Accuracy: {max(accuracies):.2f}")
    return best_k

# Function to validate KNN with different validation methods
def validate_knn(X, y, k):
    knn = KNeighborsClassifier(n_neighbors=k)

    # Hold-Out 70/30
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    print("Hold-Out Accuracy:", accuracy_score(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

    # 10-Fold Cross-Validation
    skf = StratifiedKFold(n_splits=10)
    cv_scores = cross_val_score(knn, X, y, cv=skf)
    print("10-Fold Cross-Validation Accuracy:", np.mean(cv_scores))

    # Leave-One-Out
    loo = LeaveOneOut()
    loo_scores = cross_val_score(knn, X, y, cv=loo)
    print("Leave-One-Out Accuracy:", np.mean(loo_scores))

# Running the full validation on each dataset
for name, dataset in datasets.items():
    print(f"\n--- Validating on {name} dataset ---")
    X, y = dataset.data, dataset.target
    best_k = choose_best_k(X, y)
    validate_knn(X, y, best_k)
