In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import LeaveOneOut, train_test_split, StratifiedKFold
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder
# This is a subset of the original data available at kaggle.
data = pd.read_csv("C:\\Users\\Polar\\Documents\\ESCUELA\\5TO_SEMESTRE\\MAKINITAS\\practica6\\winequality\\winequality-white.csv", delimiter=";")

data.head()
print(data.columns)

Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'quality'],
      dtype='object')


In [3]:
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import LeaveOneOut, train_test_split, StratifiedKFold
from sklearn.metrics import confusion_matrix
from collections import Counter

# Codificar la columna 'class' como valores numéricos
label_encoder = LabelEncoder()
data['quality'] = label_encoder.fit_transform(data['quality'])

# Separar características y etiquetas
X = data.drop(columns=['quality']).values
y = data['quality'].values

# Función para calcular la distancia Euclidiana
def euclidean_distance(x, y):
    return np.sqrt(np.sum((x - y) ** 2))

# Función para clasificar un nuevo punto usando el clasificador K-NN
def classify_knn(new_point, X_train, y_train, k=1):
    distances = []
    
    # Calcular la distancia entre el nuevo punto y todos los puntos de entrenamiento
    for i in range(len(X_train)):
        distance = euclidean_distance(new_point, X_train[i])
        distances.append((distance, y_train[i]))
    
    # Ordenar las distancias y seleccionar las k más cercanas
    distances.sort(key=lambda x: x[0])
    k_nearest_classes = [distances[i][1] for i in range(k)]
    
    # Retornar la clase más común entre los k vecinos
    most_common_class = Counter(k_nearest_classes).most_common(1)[0][0]
    return most_common_class

# --- Leave-One-Out Cross-Validation ---
loo = LeaveOneOut()
correct_predictions_loo = 0
y_pred_loo = []
k = int(input("Seleccione el valor de k para Leave-One-Out Cross-Validation: "))

for train_index, test_index in loo.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    predicted_class = classify_knn(X_test[0], X_train, y_train, k=k)
    y_pred_loo.append(predicted_class)
    
    if predicted_class == y_test[0]:
        correct_predictions_loo += 1

accuracy_loo = correct_predictions_loo / len(X)
print(f"Precisión de Leave-One-Out Cross-Validation: {accuracy_loo:.2f}")
print("Matriz de Confusión Leave-One-Out:")
print(confusion_matrix(y, y_pred_loo))

# --- Hold-Out Validation (70-30 split) ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
k = int(input("Seleccione el valor de k para Hold-Out Validation: "))

correct_predictions_holdout = 0
y_pred_holdout = []

for i, test_point in enumerate(X_test):
    predicted_class = classify_knn(test_point, X_train, y_train, k=k)
    y_pred_holdout.append(predicted_class)
    if predicted_class == y_test[i]:
        correct_predictions_holdout += 1

accuracy_holdout = correct_predictions_holdout / len(X_test)
print(f"Precisión de Hold-Out Validation (70-30): {accuracy_holdout:.2f}")
print("Matriz de Confusión Hold-Out:")
print(confusion_matrix(y_test, y_pred_holdout))

# --- 10-Fold Cross-Validation Estratificado ---
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
correct_predictions_kfold = 0
total_test_samples = 0
y_pred_kfold = []
k = int(input("Seleccione el valor de k para 10-Fold Cross-Validation: "))

for train_index, test_index in skf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    for i, test_point in enumerate(X_test):
        predicted_class = classify_knn(test_point, X_train, y_train, k=k)
        y_pred_kfold.append(predicted_class)
        if predicted_class == y_test[i]:
            correct_predictions_kfold += 1
    
    total_test_samples += len(X_test)

accuracy_kfold = correct_predictions_kfold / total_test_samples
print(f"Precisión de 10-Fold Cross-Validation Estratificado: {accuracy_kfold:.2f}")
print("Matriz de Confusión 10-Fold Cross-Validation:")
print(confusion_matrix(y, y_pred_kfold))


Precisión de Leave-One-Out Cross-Validation: 0.51
Matriz de Confusión Leave-One-Out:
[[   0    2    8    9    1    0    0]
 [   1   11   66   71   13    1    0]
 [   1   21  717  598  113    7    0]
 [   0   18  485 1362  290   43    0]
 [   0    1   98  370  396   15    0]
 [   0    0   16   73   51   35    0]
 [   0    0    0    5    0    0    0]]
Precisión de Hold-Out Validation (70-30): 0.48
Matriz de Confusión Hold-Out:
[[  0   0   3   3   1   0]
 [  1   5  20  11   3   0]
 [  1   6 203 181  33   2]
 [  0   9 166 389  98   6]
 [  0   2  37 128 104   9]
 [  0   0   2  26  11  10]]




Precisión de 10-Fold Cross-Validation Estratificado: 0.50
Matriz de Confusión 10-Fold Cross-Validation:
[[   0    0    7    8    5    0    0]
 [   0    2   46   90   22    3    0]
 [   0   23  425  724  256   29    0]
 [   2   16  623 1127  384   46    0]
 [   1    7  248  441  167   16    0]
 [   0    0   49   85   36    5    0]
 [   0    1    0    3    1    0    0]]


In [4]:
# This is a subset of the original data available at kaggle.
data = pd.read_csv("C:\\Users\\Polar\\Documents\\ESCUELA\\5TO_SEMESTRE\\MAKINITAS\\practica6\\winequality\\winequality-red.csv", delimiter=";")

data.head()
print(data.columns)

Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'quality'],
      dtype='object')


In [5]:
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import LeaveOneOut, train_test_split, StratifiedKFold
from sklearn.metrics import confusion_matrix
from collections import Counter

# Codificar la columna 'class' como valores numéricos
label_encoder = LabelEncoder()
data['quality'] = label_encoder.fit_transform(data['quality'])

# Separar características y etiquetas
X = data.drop(columns=['quality']).values
y = data['quality'].values

# Función para calcular la distancia Euclidiana
def euclidean_distance(x, y):
    return np.sqrt(np.sum((x - y) ** 2))

# Función para clasificar un nuevo punto usando el clasificador K-NN
def classify_knn(new_point, X_train, y_train, k=1):
    distances = []
    
    # Calcular la distancia entre el nuevo punto y todos los puntos de entrenamiento
    for i in range(len(X_train)):
        distance = euclidean_distance(new_point, X_train[i])
        distances.append((distance, y_train[i]))
    
    # Ordenar las distancias y seleccionar las k más cercanas
    distances.sort(key=lambda x: x[0])
    k_nearest_classes = [distances[i][1] for i in range(k)]
    
    # Retornar la clase más común entre los k vecinos
    most_common_class = Counter(k_nearest_classes).most_common(1)[0][0]
    return most_common_class

# --- Leave-One-Out Cross-Validation ---
loo = LeaveOneOut()
correct_predictions_loo = 0
y_pred_loo = []
k = int(input("Seleccione el valor de k para Leave-One-Out Cross-Validation: "))

for train_index, test_index in loo.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    predicted_class = classify_knn(X_test[0], X_train, y_train, k=k)
    y_pred_loo.append(predicted_class)
    
    if predicted_class == y_test[0]:
        correct_predictions_loo += 1

accuracy_loo = correct_predictions_loo / len(X)
print(f"Precisión de Leave-One-Out Cross-Validation: {accuracy_loo:.2f}")
print("Matriz de Confusión Leave-One-Out:")
print(confusion_matrix(y, y_pred_loo))

# --- Hold-Out Validation (70-30 split) ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
k = int(input("Seleccione el valor de k para Hold-Out Validation: "))

correct_predictions_holdout = 0
y_pred_holdout = []

for i, test_point in enumerate(X_test):
    predicted_class = classify_knn(test_point, X_train, y_train, k=k)
    y_pred_holdout.append(predicted_class)
    if predicted_class == y_test[i]:
        correct_predictions_holdout += 1

accuracy_holdout = correct_predictions_holdout / len(X_test)
print(f"Precisión de Hold-Out Validation (70-30): {accuracy_holdout:.2f}")
print("Matriz de Confusión Hold-Out:")
print(confusion_matrix(y_test, y_pred_holdout))

# --- 10-Fold Cross-Validation Estratificado ---
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
correct_predictions_kfold = 0
total_test_samples = 0
y_pred_kfold = []
k = int(input("Seleccione el valor de k para 10-Fold Cross-Validation: "))

for train_index, test_index in skf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    for i, test_point in enumerate(X_test):
        predicted_class = classify_knn(test_point, X_train, y_train, k=k)
        y_pred_kfold.append(predicted_class)
        if predicted_class == y_test[i]:
            correct_predictions_kfold += 1
    
    total_test_samples += len(X_test)

accuracy_kfold = correct_predictions_kfold / total_test_samples
print(f"Precisión de 10-Fold Cross-Validation Estratificado: {accuracy_kfold:.2f}")
print("Matriz de Confusión 10-Fold Cross-Validation:")
print(confusion_matrix(y, y_pred_kfold))


Precisión de Leave-One-Out Cross-Validation: 0.52
Matriz de Confusión Leave-One-Out:
[[  0   0   6   2   2   0]
 [  0   0  32  17   4   0]
 [  0   1 445 212  23   0]
 [  0   3 257 313  62   3]
 [  0   2  32  93  71   1]
 [  0   0   3   9   6   0]]
Precisión de Hold-Out Validation (70-30): 0.49
Matriz de Confusión Hold-Out:
[[  0   0   1   0   0   0]
 [  0   0   6  10   1   0]
 [  0   1 114  70  10   0]
 [  0   1  77 105  17   0]
 [  0   0  10  35  16   0]
 [  0   0   1   4   1   0]]
Precisión de 10-Fold Cross-Validation Estratificado: 0.52
Matriz de Confusión 10-Fold Cross-Validation:
[[  0   0   3   5   2   0]
 [  0   0  23  24   5   1]
 [  0   2 323 291  64   1]
 [  0   3 314 259  59   3]
 [  0   0 105  77  17   0]
 [  0   0   9   8   1   0]]


In [6]:
# Cargar el dataset
data = pd.read_csv("C:\\Users\\Polar\\Documents\\ESCUELA\\5TO_SEMESTRE\\MAKINITAS\\practica6\\iris\\iris.csv")

In [7]:
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import LeaveOneOut, train_test_split, StratifiedKFold
from sklearn.metrics import confusion_matrix
from collections import Counter

# Codificar la columna 'class' como valores numéricos
label_encoder = LabelEncoder()
data['class'] = label_encoder.fit_transform(data['class'])

# Separar características y etiquetas
X = data.drop(columns=['class']).values
y = data['class'].values

# Función para calcular la distancia Euclidiana
def euclidean_distance(x, y):
    return np.sqrt(np.sum((x - y) ** 2))

# Función para clasificar un nuevo punto usando el clasificador K-NN
def classify_knn(new_point, X_train, y_train, k=1):
    distances = []
    
    # Calcular la distancia entre el nuevo punto y todos los puntos de entrenamiento
    for i in range(len(X_train)):
        distance = euclidean_distance(new_point, X_train[i])
        distances.append((distance, y_train[i]))
    
    # Ordenar las distancias y seleccionar las k más cercanas
    distances.sort(key=lambda x: x[0])
    k_nearest_classes = [distances[i][1] for i in range(k)]
    
    # Retornar la clase más común entre los k vecinos
    most_common_class = Counter(k_nearest_classes).most_common(1)[0][0]
    return most_common_class

# --- Leave-One-Out Cross-Validation ---
loo = LeaveOneOut()
correct_predictions_loo = 0
y_pred_loo = []
k = int(input("Seleccione el valor de k para Leave-One-Out Cross-Validation: "))

for train_index, test_index in loo.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    predicted_class = classify_knn(X_test[0], X_train, y_train, k=k)
    y_pred_loo.append(predicted_class)
    
    if predicted_class == y_test[0]:
        correct_predictions_loo += 1

accuracy_loo = correct_predictions_loo / len(X)
print(f"Precisión de Leave-One-Out Cross-Validation: {accuracy_loo:.2f}")
print("Matriz de Confusión Leave-One-Out:")
print(confusion_matrix(y, y_pred_loo))

# --- Hold-Out Validation (70-30 split) ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
k = int(input("Seleccione el valor de k para Hold-Out Validation: "))

correct_predictions_holdout = 0
y_pred_holdout = []

for i, test_point in enumerate(X_test):
    predicted_class = classify_knn(test_point, X_train, y_train, k=k)
    y_pred_holdout.append(predicted_class)
    if predicted_class == y_test[i]:
        correct_predictions_holdout += 1

accuracy_holdout = correct_predictions_holdout / len(X_test)
print(f"Precisión de Hold-Out Validation (70-30): {accuracy_holdout:.2f}")
print("Matriz de Confusión Hold-Out:")
print(confusion_matrix(y_test, y_pred_holdout))

# --- 10-Fold Cross-Validation Estratificado ---
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
correct_predictions_kfold = 0
total_test_samples = 0
y_pred_kfold = []
k = int(input("Seleccione el valor de k para 10-Fold Cross-Validation: "))

for train_index, test_index in skf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    for i, test_point in enumerate(X_test):
        predicted_class = classify_knn(test_point, X_train, y_train, k=k)
        y_pred_kfold.append(predicted_class)
        if predicted_class == y_test[i]:
            correct_predictions_kfold += 1
    
    total_test_samples += len(X_test)

accuracy_kfold = correct_predictions_kfold / total_test_samples
print(f"Precisión de 10-Fold Cross-Validation Estratificado: {accuracy_kfold:.2f}")
print("Matriz de Confusión 10-Fold Cross-Validation:")
print(confusion_matrix(y, y_pred_kfold))


Precisión de Leave-One-Out Cross-Validation: 0.97
Matriz de Confusión Leave-One-Out:
[[50  0  0]
 [ 0 48  2]
 [ 0  2 48]]
Precisión de Hold-Out Validation (70-30): 1.00
Matriz de Confusión Hold-Out:
[[19  0  0]
 [ 0 13  0]
 [ 0  0 13]]
Precisión de 10-Fold Cross-Validation Estratificado: 0.96
Matriz de Confusión 10-Fold Cross-Validation:
[[20 15 15]
 [15 20 15]
 [15 15 20]]
