In [None]:
import numpy as np
from sklearn.datasets import load_iris, load_breast_cancer, load_wine
from sklearn.model_selection import train_test_split, KFold, LeaveOneOut
from sklearn.metrics import accuracy_score, confusion_matrix
import pandas as pd
from collections import Counter

class EuclideanClassifier:
    def __init__(self):
        self.X_train = None
        self.y_train = None

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def predict(self, X_test):
        predictions = []
        for test_point in X_test:
            distances = [np.sqrt(np.sum((test_point - x_train)**2)) for x_train in self.X_train]
            nearest_neighbor_idx = np.argmin(distances)
            predictions.append(self.y_train[nearest_neighbor_idx])
        return np.array(predictions)

class KNNClassifier:
    def __init__(self, k=1):
        self.k = k
        self.X_train = None
        self.y_train = None

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def predict(self, X_test):
        predictions = []
        for test_point in X_test:
            distances = [np.sqrt(np.sum((test_point - x_train)**2)) for x_train in self.X_train]
            k_nearest = np.argsort(distances)[:self.k]
            k_nearest_labels = self.y_train[k_nearest]
            most_common = Counter(k_nearest_labels).most_common(1)[0][0]
            predictions.append(most_common)
        return np.array(predictions)

def evaluate_classifier(classifier, X, y, validation_method='holdout', test_size=0.3, n_folds=10):
    results = {}

    if validation_method == 'holdout':
        # Hold Out 70/30
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
        classifier.fit(X_train, y_train)
        y_pred = classifier.predict(X_test)

        accuracy = accuracy_score(y_test, y_pred)
        conf_matrix = confusion_matrix(y_test, y_pred)

        results['accuracy'] = [accuracy]
        results['confusion_matrix'] = conf_matrix

    elif validation_method == 'k-fold':
        # 10-Fold Cross-Validation
        kf = KFold(n_splits=n_folds, shuffle=True, random_state=42)
        accuracies = []
        conf_matrices = []

        for train_index, test_index in kf.split(X):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]

            classifier.fit(X_train, y_train)
            y_pred = classifier.predict(X_test)

            accuracies.append(accuracy_score(y_test, y_pred))
            conf_matrices.append(confusion_matrix(y_test, y_pred))

        results['accuracy'] = accuracies
        results['confusion_matrix'] = sum(conf_matrices)

    elif validation_method == 'leave-one-out':
        # Leave-One-Out
        loo = LeaveOneOut()
        accuracies = []
        all_predictions = []
        all_true = []

        for train_index, test_index in loo.split(X):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]

            classifier.fit(X_train, y_train)
            y_pred = classifier.predict(X_test)

            all_predictions.extend(y_pred)
            all_true.extend(y_test)
            accuracies.append(1 if y_pred == y_test else 0)

        results['accuracy'] = accuracies
        results['confusion_matrix'] = confusion_matrix(all_true, all_predictions)

    return results

def evaluate_datasets():
    # Cargar datasets
    datasets = {
        'Iris': load_iris(),
        'Breast Cancer': load_breast_cancer(),
        'Wine': load_wine()
    }

    classifiers = {
        'Euclidean': EuclideanClassifier(),
        '1NN': KNNClassifier(k=1)
    }

    validation_methods = ['holdout', 'k-fold', 'leave-one-out']

    for dataset_name, dataset in datasets.items():
        print(f"\nEvaluando dataset: {dataset_name}")
        X, y = dataset.data, dataset.target

        for clf_name, classifier in classifiers.items():
            print(f"\nClasificador: {clf_name}")

            for method in validation_methods:
                print(f"\nMétodo de validación: {method}")
                results = evaluate_classifier(classifier, X, y, method)

                # Mostrar resultados
                if method == 'holdout':
                    print(f"Accuracy: {results['accuracy'][0]:.4f}")
                else:
                    print(f"Accuracy promedio: {np.mean(results['accuracy']):.4f}")
                    print(f"Desviación estándar: {np.std(results['accuracy']):.4f}")

                print("\nMatriz de confusión:")
                print(results['confusion_matrix'])
                print("-" * 50)

# Ejecutar la evaluación
if __name__ == "__main__":
    evaluate_datasets()


Evaluando dataset: Iris

Clasificador: Euclidean

Método de validación: holdout
Accuracy: 1.0000

Matriz de confusión:
[[19  0  0]
 [ 0 13  0]
 [ 0  0 13]]
--------------------------------------------------

Método de validación: k-fold
Accuracy promedio: 0.9600
Desviación estándar: 0.0611

Matriz de confusión:
[[50  0  0]
 [ 0 47  3]
 [ 0  3 47]]
--------------------------------------------------

Método de validación: leave-one-out
Accuracy promedio: 0.9600
Desviación estándar: 0.1960

Matriz de confusión:
[[50  0  0]
 [ 0 47  3]
 [ 0  3 47]]
--------------------------------------------------

Clasificador: 1NN

Método de validación: holdout
Accuracy: 1.0000

Matriz de confusión:
[[19  0  0]
 [ 0 13  0]
 [ 0  0 13]]
--------------------------------------------------

Método de validación: k-fold
Accuracy promedio: 0.9600
Desviación estándar: 0.0611

Matriz de confusión:
[[50  0  0]
 [ 0 47  3]
 [ 0  3 47]]
--------------------------------------------------

Método de validación: lea