In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import LeaveOneOut, train_test_split, StratifiedKFold
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder

# Cargar el dataset
data = pd.read_csv("C:\\Users\\Polar\\Documents\\ESCUELA\\5TO_SEMESTRE\\MAKINITAS\\practica6\\iris\\iris.csv")

# Codificar la columna 'class' como valores numéricos
label_encoder = LabelEncoder()
data['class'] = label_encoder.fit_transform(data['class'])

# Separar características y etiquetas
X = data.drop(columns=['class']).values
y = data['class'].values

# Función para calcular la distancia Euclidiana
def euclidean_distance(x, y):
    return np.sqrt(np.sum((x - y) ** 2))

# Función para clasificar un nuevo punto usando el clasificador 1-NN
def classify_1nn(new_point, X_train, y_train):
    min_distance = float('inf')
    closest_class = None
    
    for i in range(len(X_train)):
        distance = euclidean_distance(new_point, X_train[i])
        if distance < min_distance:
            min_distance = distance
            closest_class = y_train[i]
    
    return closest_class

# --- Leave-One-Out Cross-Validation ---
loo = LeaveOneOut()
correct_predictions_loo = 0
y_pred_loo = []

for train_index, test_index in loo.split(X):
    # Dividir datos en entrenamiento y prueba
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Clasificar el punto de prueba usando 1-NN
    predicted_class = classify_1nn(X_test[0], X_train, y_train)
    y_pred_loo.append(predicted_class)
    
    # Comparar con la clase real
    if predicted_class == y_test[0]:
        correct_predictions_loo += 1

# Calcular y mostrar la precisión de Leave-One-Out
accuracy_loo = correct_predictions_loo / len(X)
print(f"Precisión de Leave-One-Out Cross-Validation: {accuracy_loo:.2f}")

# Imprimir la matriz de confusión para Leave-One-Out
print("Matriz de Confusión Leave-One-Out:")
print(confusion_matrix(y, y_pred_loo))

# --- Hold-Out Validation (70-30 split) ---
# Dividir en entrenamiento y prueba con un 70-30
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Validar en el conjunto de prueba
correct_predictions_holdout = 0
y_pred_holdout = []

for i, test_point in enumerate(X_test):
    predicted_class = classify_1nn(test_point, X_train, y_train)
    y_pred_holdout.append(predicted_class)
    if predicted_class == y_test[i]:
        correct_predictions_holdout += 1

# Calcular y mostrar la precisión de Hold-Out
accuracy_holdout = correct_predictions_holdout / len(X_test)
print(f"Precisión de Hold-Out Validation (70-30): {accuracy_holdout:.2f}")

# Imprimir la matriz de confusión para Hold-Out
print("Matriz de Confusión Hold-Out:")
print(confusion_matrix(y_test, y_pred_holdout))

# --- 10-Fold Cross-Validation Estratificado ---
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
correct_predictions_kfold = 0
total_test_samples = 0
y_pred_kfold = []

for train_index, test_index in skf.split(X, y):
    # Dividir datos en entrenamiento y prueba para el fold actual
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Validar en el conjunto de prueba
    for i, test_point in enumerate(X_test):
        predicted_class = classify_1nn(test_point, X_train, y_train)
        y_pred_kfold.append(predicted_class)
        if predicted_class == y_test[i]:
            correct_predictions_kfold += 1
    
    # Actualizar el conteo de muestras de prueba
    total_test_samples += len(X_test)

# Calcular y mostrar la precisión de 10-Fold Cross-Validation
accuracy_kfold = correct_predictions_kfold / total_test_samples
print(f"Precisión de 10-Fold Cross-Validation Estratificado: {accuracy_kfold:.2f}")

# Imprimir la matriz de confusión para 10-Fold Cross-Validation
print("Matriz de Confusión 10-Fold Cross-Validation:")
print(confusion_matrix(y, y_pred_kfold))


Precisión de Leave-One-Out Cross-Validation: 0.96
Matriz de Confusión Leave-One-Out:
[[50  0  0]
 [ 0 47  3]
 [ 0  3 47]]
Precisión de Hold-Out Validation (70-30): 1.00
Matriz de Confusión Hold-Out:
[[19  0  0]
 [ 0 13  0]
 [ 0  0 13]]
Precisión de 10-Fold Cross-Validation Estratificado: 0.96
Matriz de Confusión 10-Fold Cross-Validation:
[[20 15 15]
 [15 19 16]
 [15 16 19]]


In [2]:
# This is a subset of the original data available at kaggle.
data = pd.read_csv("C:\\Users\\Polar\\Documents\\ESCUELA\\5TO_SEMESTRE\\MAKINITAS\\practica6\\winequality\\winequality-red.csv", delimiter=";")

data.head()
print(data.columns)

Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'quality'],
      dtype='object')


In [3]:
# Codificar la columna 'class' como valores numéricos
label_encoder = LabelEncoder()
data['quality'] = label_encoder.fit_transform(data['quality'])

# Separar características y etiquetas
X = data.drop(columns=['quality']).values
y = data['quality'].values

# Función para calcular la distancia Euclidiana
def euclidean_distance(x, y):
    return np.sqrt(np.sum((x - y) ** 2))

# Función para clasificar un nuevo punto usando el clasificador 1-NN
def classify_1nn(new_point, X_train, y_train):
    min_distance = float('inf')
    closest_class = None
    
    for i in range(len(X_train)):
        distance = euclidean_distance(new_point, X_train[i])
        if distance < min_distance:
            min_distance = distance
            closest_class = y_train[i]
    
    return closest_class

# --- Leave-One-Out Cross-Validation ---
loo = LeaveOneOut()
correct_predictions_loo = 0
y_pred_loo = []

for train_index, test_index in loo.split(X):
    # Dividir datos en entrenamiento y prueba
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Clasificar el punto de prueba usando 1-NN
    predicted_class = classify_1nn(X_test[0], X_train, y_train)
    y_pred_loo.append(predicted_class)
    
    # Comparar con la clase real
    if predicted_class == y_test[0]:
        correct_predictions_loo += 1

# Calcular y mostrar la precisión de Leave-One-Out
accuracy_loo = correct_predictions_loo / len(X)
print(f"Precisión de Leave-One-Out Cross-Validation: {accuracy_loo:.2f}")

# Imprimir la matriz de confusión para Leave-One-Out
print("Matriz de Confusión Leave-One-Out:")
print(confusion_matrix(y, y_pred_loo))

# --- Hold-Out Validation (70-30 split) ---
# Dividir en entrenamiento y prueba con un 70-30
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Validar en el conjunto de prueba
correct_predictions_holdout = 0
y_pred_holdout = []

for i, test_point in enumerate(X_test):
    predicted_class = classify_1nn(test_point, X_train, y_train)
    y_pred_holdout.append(predicted_class)
    if predicted_class == y_test[i]:
        correct_predictions_holdout += 1

# Calcular y mostrar la precisión de Hold-Out
accuracy_holdout = correct_predictions_holdout / len(X_test)
print(f"Precisión de Hold-Out Validation (70-30): {accuracy_holdout:.2f}")

# Imprimir la matriz de confusión para Hold-Out
print("Matriz de Confusión Hold-Out:")
print(confusion_matrix(y_test, y_pred_holdout))

# --- 10-Fold Cross-Validation Estratificado ---
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
correct_predictions_kfold = 0
total_test_samples = 0
y_pred_kfold = []

for train_index, test_index in skf.split(X, y):
    # Dividir datos en entrenamiento y prueba para el fold actual
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Validar en el conjunto de prueba
    for i, test_point in enumerate(X_test):
        predicted_class = classify_1nn(test_point, X_train, y_train)
        y_pred_kfold.append(predicted_class)
        if predicted_class == y_test[i]:
            correct_predictions_kfold += 1
    
    # Actualizar el conteo de muestras de prueba
    total_test_samples += len(X_test)

# Calcular y mostrar la precisión de 10-Fold Cross-Validation
accuracy_kfold = correct_predictions_kfold / total_test_samples
print(f"Precisión de 10-Fold Cross-Validation Estratificado: {accuracy_kfold:.2f}")

# Imprimir la matriz de confusión para 10-Fold Cross-Validation
print("Matriz de Confusión 10-Fold Cross-Validation:")
print(confusion_matrix(y, y_pred_kfold))

Precisión de Leave-One-Out Cross-Validation: 0.62
Matriz de Confusión Leave-One-Out:
[[  1   4   3   1   1   0]
 [  4   2  19  23   3   2]
 [  3  13 475 165  24   1]
 [  1  18 163 394  52  10]
 [  0   2  23  60 110   4]
 [  0   0   2  11   3   2]]
Precisión de Hold-Out Validation (70-30): 0.53
Matriz de Confusión Hold-Out:
[[  0   1   0   0   0   0]
 [  0   1   3  12   0   1]
 [  1   6 114  62  11   1]
 [  0   4  68 108  18   2]
 [  0   1   3  23  28   6]
 [  0   0   1   3   1   1]]
Precisión de 10-Fold Cross-Validation Estratificado: 0.61
Matriz de Confusión 10-Fold Cross-Validation:
[[  1   0   4   4   1   0]
 [  0   1  23  18   9   2]
 [  1  13 299 279  80   9]
 [  4  18 262 263  84   7]
 [  1   4  87  78  28   1]
 [  0   0   9   8   1   0]]


In [4]:
# This is a subset of the original data available at kaggle.
data = pd.read_csv("C:\\Users\\Polar\\Documents\\ESCUELA\\5TO_SEMESTRE\\MAKINITAS\\practica6\\winequality\\winequality-white.csv", delimiter=";")

data.head()
print(data.columns)

Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'quality'],
      dtype='object')


In [5]:
# Codificar la columna 'class' como valores numéricos
label_encoder = LabelEncoder()
data['quality'] = label_encoder.fit_transform(data['quality'])

# Separar características y etiquetas
X = data.drop(columns=['quality']).values
y = data['quality'].values

# Función para calcular la distancia Euclidiana
def euclidean_distance(x, y):
    return np.sqrt(np.sum((x - y) ** 2))

# Función para clasificar un nuevo punto usando el clasificador 1-NN
def classify_1nn(new_point, X_train, y_train):
    min_distance = float('inf')
    closest_class = None
    
    for i in range(len(X_train)):
        distance = euclidean_distance(new_point, X_train[i])
        if distance < min_distance:
            min_distance = distance
            closest_class = y_train[i]
    
    return closest_class

# --- Leave-One-Out Cross-Validation ---
loo = LeaveOneOut()
correct_predictions_loo = 0
y_pred_loo = []

for train_index, test_index in loo.split(X):
    # Dividir datos en entrenamiento y prueba
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Clasificar el punto de prueba usando 1-NN
    predicted_class = classify_1nn(X_test[0], X_train, y_train)
    y_pred_loo.append(predicted_class)
    
    # Comparar con la clase real
    if predicted_class == y_test[0]:
        correct_predictions_loo += 1

# Calcular y mostrar la precisión de Leave-One-Out
accuracy_loo = correct_predictions_loo / len(X)
print(f"Precisión de Leave-One-Out Cross-Validation: {accuracy_loo:.2f}")

# Imprimir la matriz de confusión para Leave-One-Out
print("Matriz de Confusión Leave-One-Out:")
print(confusion_matrix(y, y_pred_loo))

# --- Hold-Out Validation (70-30 split) ---
# Dividir en entrenamiento y prueba con un 70-30
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Validar en el conjunto de prueba
correct_predictions_holdout = 0
y_pred_holdout = []

for i, test_point in enumerate(X_test):
    predicted_class = classify_1nn(test_point, X_train, y_train)
    y_pred_holdout.append(predicted_class)
    if predicted_class == y_test[i]:
        correct_predictions_holdout += 1

# Calcular y mostrar la precisión de Hold-Out
accuracy_holdout = correct_predictions_holdout / len(X_test)
print(f"Precisión de Hold-Out Validation (70-30): {accuracy_holdout:.2f}")

# Imprimir la matriz de confusión para Hold-Out
print("Matriz de Confusión Hold-Out:")
print(confusion_matrix(y_test, y_pred_holdout))

# --- 10-Fold Cross-Validation Estratificado ---
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
correct_predictions_kfold = 0
total_test_samples = 0
y_pred_kfold = []

for train_index, test_index in skf.split(X, y):
    # Dividir datos en entrenamiento y prueba para el fold actual
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Validar en el conjunto de prueba
    for i, test_point in enumerate(X_test):
        predicted_class = classify_1nn(test_point, X_train, y_train)
        y_pred_kfold.append(predicted_class)
        if predicted_class == y_test[i]:
            correct_predictions_kfold += 1
    
    # Actualizar el conteo de muestras de prueba
    total_test_samples += len(X_test)

# Calcular y mostrar la precisión de 10-Fold Cross-Validation
accuracy_kfold = correct_predictions_kfold / total_test_samples
print(f"Precisión de 10-Fold Cross-Validation Estratificado: {accuracy_kfold:.2f}")

# Imprimir la matriz de confusión para 10-Fold Cross-Validation
print("Matriz de Confusión 10-Fold Cross-Validation:")
print(confusion_matrix(y, y_pred_kfold))

Precisión de Leave-One-Out Cross-Validation: 0.62
Matriz de Confusión Leave-One-Out:
[[   2    4    9    4    1    0    0]
 [   6   35   50   49   21    2    0]
 [   7   37  921  380  100   11    1]
 [   1   42  358 1474  273   49    1]
 [   2    9   76  264  504   23    2]
 [   0    1   13   52   27   82    0]
 [   0    0    2    2    1    0    0]]
Precisión de Hold-Out Validation (70-30): 0.56
Matriz de Confusión Hold-Out:
[[  1   1   3   1   1   0   0]
 [  3   8  16  10   3   0   0]
 [  3  12 245 134  29   3   0]
 [  0  10 145 412  85  15   1]
 [  0   5  32  94 136  13   0]
 [  0   0   2  17  10  20   0]
 [  0   0   0   0   0   0   0]]




Precisión de 10-Fold Cross-Validation Estratificado: 0.59
Matriz de Confusión 10-Fold Cross-Validation:
[[  0   0   7   5   7   1   0]
 [  0   7  50  70  30   5   1]
 [  6  54 419 630 301  46   1]
 [  9  56 656 995 404  77   1]
 [  5  13 259 416 157  29   1]
 [  0   3  53  72  40   7   0]
 [  0   1   0   1   2   1   0]]
