In [1]:
# Parte I - Implementación de SMOTE y Clasificadores

import numpy as np
import random
from sklearn.datasets import load_iris, fetch_openml

# Función para cargar datasets automáticamente
def load_datasets():
    datasets = {}

    # Cargar Iris (clases Setosa y Virginica únicamente)
    iris = load_iris()
    iris_X = iris['data']
    iris_y = iris['target']
    
    # Filtrar solo Setosa y Virginica
    filter_indices = np.where((iris_y == 0) | (iris_y == 1))
    datasets['Iris'] = (iris_X[filter_indices], iris_y[filter_indices])

    # Cargar Glass desde OpenML
    glass = fetch_openml(name="glass", version=1, as_frame=False)
    glass_X = glass.data
    glass_y = glass.target

    # Convertir etiquetas a valores numéricos únicos
    unique_labels = np.unique(glass_y)
    label_to_numeric = {label: idx for idx, label in enumerate(unique_labels)}
    numeric_glass_y = np.array([label_to_numeric[label] for label in glass_y])

    datasets['Glass'] = (glass_X, numeric_glass_y)

    return datasets

# SMOTE
def smote(X, y, minority_class, k=5):
    X_minority = X[y == minority_class]
    n_samples, n_features = X_minority.shape
    new_samples = []

    for _ in range(len(X_minority)):
        i = random.randint(0, n_samples - 1)
        neighbors = [random.randint(0, n_samples - 1) for _ in range(k)]
        neighbor = X_minority[random.choice(neighbors)]
        
        diff = neighbor - X_minority[i]
        new_sample = X_minority[i] + random.random() * diff
        new_samples.append(new_sample)
    
    new_samples = np.array(new_samples)
    X_augmented = np.vstack((X, new_samples))
    y_augmented = np.hstack((y, np.full(len(new_samples), minority_class)))
    
    return X_augmented, y_augmented

# Clasificador Euclidiano
def euclidean_classifier(X_train, y_train, X_test):
    predictions = []
    for x in X_test:
        distances = np.linalg.norm(X_train - x, axis=1)
        nearest_index = np.argmin(distances)
        predictions.append(y_train[nearest_index])
    return np.array(predictions)

# Validación Hold-Out
def hold_out_validation(X, y, test_ratio=0.3):
    n_samples = X.shape[0]
    indices = np.arange(n_samples)
    np.random.shuffle(indices)
    split_point = int(n_samples * (1 - test_ratio))
    train_indices, test_indices = indices[:split_point], indices[split_point:]
    return X[train_indices], y[train_indices], X[test_indices], y[test_indices]

# Validación Cruzada
def k_fold_cross_validation(X, y, k=10):
    n_samples = len(y)
    fold_size = n_samples // k
    indices = np.arange(n_samples)
    np.random.shuffle(indices)
    folds = [indices[i * fold_size:(i + 1) * fold_size] for i in range(k)]
    
    scores = []
    for i in range(k):
        test_indices = folds[i]
        train_indices = np.hstack([folds[j] for j in range(k) if j != i])
        
        X_train, y_train = X[train_indices], y[train_indices]
        X_test, y_test = X[test_indices], y[test_indices]
        
        predictions = euclidean_classifier(X_train, y_train, X_test)
        accuracy = np.mean(predictions == y_test)
        scores.append(accuracy)
    
    return np.mean(scores)

# Cargar los datasets
datasets = load_datasets()

# Trabajar con el dataset Glass
X, y = datasets['Glass']

# Identificar clase minoritaria
unique_classes, class_counts = np.unique(y, return_counts=True)
minority_class = unique_classes[np.argmin(class_counts)]

# Validación Hold-Out
X_train, y_train, X_test, y_test = hold_out_validation(X, y)
print("Hold-Out Accuracy (Original Data):", np.mean(euclidean_classifier(X_train, y_train, X_test) == y_test))

# Aplicar SMOTE
X_smote, y_smote = smote(X, y, minority_class)

X_train_smote, y_train_smote, X_test_smote, y_test_smote = hold_out_validation(X_smote, y_smote)
print("Hold-Out Accuracy (After SMOTE):", np.mean(euclidean_classifier(X_train_smote, y_train_smote, X_test_smote) == y_test_smote))

# Validación Cruzada
print("10-Fold CV Accuracy (Original Data):", k_fold_cross_validation(X, y))
print("10-Fold CV Accuracy (After SMOTE):", k_fold_cross_validation(X_smote, y_smote))

Hold-Out Accuracy (Original Data): 0.7076923076923077
Hold-Out Accuracy (After SMOTE): 0.6716417910447762
10-Fold CV Accuracy (Original Data): 0.7285714285714285
10-Fold CV Accuracy (After SMOTE): 0.740909090909091


In [2]:
# Parte II - Implementación del Perceptrón Simple

# Cargar dataset Iris (Setosa y Virginica)
def load_iris_binary():
    # Datos manuales para Setosa y Virginica
    from sklearn.datasets import load_iris
    iris = load_iris()
    X, y = iris.data, iris.target

    # Filtrar solo Setosa (0) y Virginica (1)
    filter_indices = (y == 0) | (y == 1)
    return X[filter_indices], y[filter_indices]

# Perceptrón Simple
def perceptron(X, y, epochs=1000, lr=0.01):
    n_samples, n_features = X.shape
    weights = np.zeros(n_features)  # Inicializar pesos
    bias = 0                       # Inicializar sesgo

    for epoch in range(epochs):
        for i in range(n_samples):
            # Salida lineal
            linear_output = np.dot(X[i], weights) + bias
            # Predicción
            y_pred = 1 if linear_output > 0 else 0
            # Actualización de pesos y sesgo
            update = lr * (y[i] - y_pred)
            weights += update * X[i]
            bias += update
    return weights, bias

# Predicción con el Perceptrón
def perceptron_predict(X, weights, bias):
    linear_output = np.dot(X, weights) + bias
    return (linear_output > 0).astype(int)

# Validación Hold-Out 70/30
def hold_out_validation(X, y, test_ratio=0.3):
    n_samples = X.shape[0]
    indices = np.arange(n_samples)
    np.random.shuffle(indices)
    split_point = int(n_samples * (1 - test_ratio))
    train_indices, test_indices = indices[:split_point], indices[split_point:]
    return X[train_indices], y[train_indices], X[test_indices], y[test_indices]

# Programa principal
# Cargar datos
X, y = load_iris_binary()

# Normalización de características (opcional pero recomendado)
X = (X - X.mean(axis=0)) / X.std(axis=0)

# Dividir datos en entrenamiento y prueba (70/30)
X_train, y_train, X_test, y_test = hold_out_validation(X, y)

# Entrenar el perceptrón
weights, bias = perceptron(X_train, y_train)

# Evaluar el modelo en los datos de prueba
predictions = perceptron_predict(X_test, weights, bias)

# Calcular precisión
accuracy = np.mean(predictions == y_test)
print(f"Hold-Out Accuracy (Perceptrón): {accuracy:.2f}")

Hold-Out Accuracy (Perceptrón): 1.00
