Importacion de modulos y carga de los conjuntos de datos.

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split, KFold, LeaveOneOut
from sklearn.datasets import load_iris, load_wine
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Cargar conjuntos de datos del UCI ML Repository
iris = load_iris()
wine = load_wine()

# Convertir a DataFrame para una manipulación más fácil
iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
iris_df['target'] = iris.target

wine_df = pd.DataFrame(data=wine.data, columns=wine.feature_names)
wine_df['target'] = wine.target

Creacion de las funciones para los metodos de validacion.

In [3]:
# Función para realizar validación Hold-out asegurando conjuntos disjuntos
def hold_out_validation(df, test_size):
    X = df.drop(columns=['target'])
    y = df['target']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
    model = DecisionTreeClassifier()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return accuracy_score(y_test, y_pred)

In [4]:
# Función para realizar K-Fold Cross Validation asegurando conjuntos disjuntos
def k_fold_cross_validation(df, k):
    X = df.drop(columns=['target'])
    y = df['target']
    kf = KFold(n_splits=k)
    accuracies = []
    for train_index, test_index in kf.split(X):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        model = DecisionTreeClassifier()
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracies.append(accuracy_score(y_test, y_pred))
    return sum(accuracies) / len(accuracies)

In [5]:
# Función para realizar Leave-One-Out Cross Validation asegurando conjuntos disjuntos
def leave_one_out_validation(df):
    X = df.drop(columns=['target'])
    y = df['target']
    loo = LeaveOneOut()
    accuracies = []
    for train_index, test_index in loo.split(X):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        model = DecisionTreeClassifier()
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracies.append(accuracy_score(y_test, y_pred))
    return sum(accuracies) / len(accuracies)

Prueba de los metodos de validacion.

In [6]:
# Iris
hold_out_acc_iris = hold_out_validation(iris_df, 0.3)
k_fold_acc_iris = k_fold_cross_validation(iris_df, 5)
loo_acc_iris = leave_one_out_validation(iris_df)

print("Hold-out Validation (Iris):", hold_out_acc_iris)
print("K-Fold Cross Validation (Iris, k=5):", k_fold_acc_iris)
print("Leave-One-Out Validation (Iris):", loo_acc_iris)

Hold-out Validation (Iris): 1.0
K-Fold Cross Validation (Iris, k=5): 0.9133333333333333
Leave-One-Out Validation (Iris): 0.9533333333333334


In [7]:
# Wine
hold_out_acc_wine = hold_out_validation(wine_df, 0.3)
k_fold_acc_wine = k_fold_cross_validation(wine_df, 5)
loo_acc_wine = leave_one_out_validation(wine_df)

print("Hold-out Validation (Wine):", hold_out_acc_wine)
print("K-Fold Cross Validation (Wine, k=5):", k_fold_acc_wine)
print("Leave-One-Out Validation (Wine):", loo_acc_wine)

Hold-out Validation (Wine): 0.9629629629629629
K-Fold Cross Validation (Wine, k=5): 0.8425396825396826
Leave-One-Out Validation (Wine): 0.8820224719101124
