In [179]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, log_loss, matthews_corrcoef, roc_auc_score

# Modelos de clasificación
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
import kagglehub


# SVM(Support Vector Machine)

## Explicación del Algoritmo

## Aplicación del algoritmo

In [180]:
RANDOM_STATE = 6

### Vino

In [181]:
# Cargar dataset
wine_quality_dataset_path = kagglehub.dataset_download('yasserh/wine-quality-dataset')
df_vino = pd.read_csv(wine_quality_dataset_path+'/WineQT.csv')

In [182]:
df_vino.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,Id
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,0
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,1
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,2
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,3
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,4


In [183]:
# Convertir calidad en binario (1 = buena calidad, 0 = mala calidad)
df_vino['quality'] = df_vino['quality'].apply(lambda x: 1 if x > 5 else 0)

# Separar variables
X_vino = df_vino.drop('quality', axis=1)
y_vino = df_vino['quality']

# Dividir en entrenamiento y prueba
X_train_vino, X_test_vino, y_train_vino, y_test_vino = train_test_split(X_vino, y_vino, test_size=0.2, random_state=6)

In [184]:
# 1. SVM
svm_vino= SVC(random_state=RANDOM_STATE)

In [185]:
def evaluar_modelo(modelo, X_train, X_test, y_train, y_test):
    modelo.fit(X_train, y_train)
    y_pred = modelo.predict(X_test)
    y_probs = modelo.decision_function(X_test)
    
    # Métricas
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    logloss = log_loss(y_test, 1 / (1 + np.exp(-y_probs)))
    mcc_value = matthews_corrcoef(y_test, y_pred)
    
    print(f"Accuracy: {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall: {rec:.4f}")
    print(f"F1-Score: {f1:.4f}")
    print(f"logloss: {logloss:.4f}")
    print(f"MCC: {mcc_value:.4f}")

In [186]:
# 1. SVM
print("SVM - Calidad del Vino")
evaluar_modelo(svm_vino, X_train_vino, X_test_vino, y_train_vino, y_test_vino)

SVM - Calidad del Vino
Accuracy: 0.5721
Precision: 0.5421
Recall: 0.9035
F1-Score: 0.6776
logloss: 0.7151
MCC: 0.1955


In [187]:
def hiperparametros(mdoelo,hp,hps,X_train, X_test, y_train, y_test):
    for i in hps:
        print(f"\nSVM con {hp} = {i}")
        svm_model = SVC(**{hp: i})
        evaluar_modelo(svm_model, X_train, X_test, y_train, y_test)


In [188]:
hiperparametros(svm_vino,'C',[0.1, 10], X_train_vino, X_test_vino, y_train_vino, y_test_vino)


SVM con C = 0.1
Accuracy: 0.4978
Precision: 0.4978
Recall: 1.0000
F1-Score: 0.6647
logloss: 0.7554
MCC: 0.0000

SVM con C = 10
Accuracy: 0.6201
Precision: 0.5722
Recall: 0.9386
F1-Score: 0.7110
logloss: 0.6760
MCC: 0.3139


In [189]:
hiperparametros(svm_vino,'kernel',['linear', 'poly'], X_train_vino, X_test_vino, y_train_vino, y_test_vino)


SVM con kernel = linear
Accuracy: 0.7424
Precision: 0.7570
Recall: 0.7105
F1-Score: 0.7330
logloss: 0.5160
MCC: 0.4855

SVM con kernel = poly
Accuracy: 0.4978
Precision: 0.4978
Recall: 1.0000
F1-Score: 0.6647
logloss: 0.8034
MCC: 0.0000


In [190]:
hiperparametros(svm_vino,'tol',[1e-1, 1e-5], X_train_vino, X_test_vino, y_train_vino, y_test_vino)


SVM con tol = 0.1
Accuracy: 0.5721
Precision: 0.5421
Recall: 0.9035
F1-Score: 0.6776
logloss: 0.7168
MCC: 0.1955

SVM con tol = 1e-05
Accuracy: 0.5721
Precision: 0.5421
Recall: 0.9035
F1-Score: 0.6776
logloss: 0.7150
MCC: 0.1955


### Breast cancer

In [191]:
breast_cancer_wisconsin_diagnostic_dataset_path = kagglehub.dataset_download('utkarshx27/breast-cancer-wisconsin-diagnostic-dataset')
df_cancer = pd.read_csv(breast_cancer_wisconsin_diagnostic_dataset_path+'/brca.csv')

In [192]:
df_cancer.head()

Unnamed: 0.1,Unnamed: 0,x.radius_mean,x.texture_mean,x.perimeter_mean,x.area_mean,x.smoothness_mean,x.compactness_mean,x.concavity_mean,x.concave_pts_mean,x.symmetry_mean,...,x.texture_worst,x.perimeter_worst,x.area_worst,x.smoothness_worst,x.compactness_worst,x.concavity_worst,x.concave_pts_worst,x.symmetry_worst,x.fractal_dim_worst,y
0,1,13.54,14.36,87.46,566.3,0.09779,0.08129,0.06664,0.04781,0.1885,...,19.26,99.7,711.2,0.144,0.1773,0.239,0.1288,0.2977,0.07259,B
1,2,13.08,15.71,85.63,520.0,0.1075,0.127,0.04568,0.0311,0.1967,...,20.49,96.09,630.5,0.1312,0.2776,0.189,0.07283,0.3184,0.08183,B
2,3,9.504,12.44,60.34,273.9,0.1024,0.06492,0.02956,0.02076,0.1815,...,15.66,65.13,314.9,0.1324,0.1148,0.08867,0.06227,0.245,0.07773,B
3,4,13.03,18.42,82.61,523.8,0.08983,0.03766,0.02562,0.02923,0.1467,...,22.81,84.46,545.9,0.09701,0.04619,0.04833,0.05013,0.1987,0.06169,B
4,5,8.196,16.84,51.71,201.9,0.086,0.05943,0.01588,0.005917,0.1769,...,21.96,57.26,242.2,0.1297,0.1357,0.0688,0.02564,0.3105,0.07409,B


In [193]:
df_cancer['y'] = df_cancer['y'].map({'M': 0, 'B': 1})

X_cancer = df_cancer.drop(columns=['y'])
y_cancer = df_cancer['y']


X_train_cancer, X_test_cancer, y_train_cancer, y_test_cancer = train_test_split(X_cancer, y_cancer, test_size=0.2, random_state=6)

In [194]:
svm_cancer= SVC(random_state=RANDOM_STATE)

In [195]:
evaluar_modelo(svm_cancer, X_train_cancer, X_test_cancer, y_train_cancer, y_test_cancer)

Accuracy: 0.9561
Precision: 0.9420
Recall: 0.9848
F1-Score: 0.9630
logloss: 0.2311
MCC: 0.9106


In [196]:
hiperparametros(svm_cancer,'C',[0.1, 10], X_train_cancer, X_test_cancer, y_train_cancer, y_test_cancer)


SVM con C = 0.1
Accuracy: 0.9123
Precision: 0.8784
Recall: 0.9848
F1-Score: 0.9286
logloss: 0.3586
MCC: 0.8249

SVM con C = 10
Accuracy: 0.9825
Precision: 0.9848
Recall: 0.9848
F1-Score: 0.9848
logloss: 0.1354
MCC: 0.9640


In [197]:
hiperparametros(svm_cancer,'kernel',['linear', 'poly'], X_train_cancer, X_test_cancer, y_train_cancer, y_test_cancer)


SVM con kernel = linear
Accuracy: 1.0000
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
logloss: 0.0168
MCC: 1.0000

SVM con kernel = poly
Accuracy: 0.9474
Precision: 0.9286
Recall: 0.9848
F1-Score: 0.9559
logloss: 0.2153
MCC: 0.8932


In [198]:
hiperparametros(svm_cancer,'tol',[1e-1, 1e-5], X_train_cancer, X_test_cancer, y_train_cancer, y_test_cancer)


SVM con tol = 0.1
Accuracy: 0.9561
Precision: 0.9420
Recall: 0.9848
F1-Score: 0.9630
logloss: 0.2348
MCC: 0.9106

SVM con tol = 1e-05
Accuracy: 0.9561
Precision: 0.9420
Recall: 0.9848
F1-Score: 0.9630
logloss: 0.2311
MCC: 0.9106


### Phishing

In [199]:
phishing_website_detector_path = kagglehub.dataset_download('eswarchandt/phishing-website-detector')
df_phishing = pd.read_csv(phishing_website_detector_path+'/phishing.csv')

In [200]:
X_phishing = df_phishing.drop(columns=['class']) 
y_phishing = df_phishing['class']
X_train_phishing, X_test_phishing, y_train_phishing, y_test_phishing = train_test_split(X_phishing, y_phishing, test_size=0.2, random_state=6)

In [201]:
svm_phising= SVC(random_state=RANDOM_STATE)

In [202]:
evaluar_modelo(svm_phising, X_train_phishing, X_test_phishing, y_train_phishing, y_test_phishing)

Accuracy: 0.5577
Precision: 0.5577
Recall: 1.0000
F1-Score: 0.7160
logloss: 0.7554
MCC: 0.0000


In [203]:
hiperparametros(svm_phising,'C',[0.1, 10], X_train_phishing, X_test_phishing, y_train_phishing, y_test_phishing)


SVM con C = 0.1
Accuracy: 0.5577
Precision: 0.5577
Recall: 1.0000
F1-Score: 0.7160
logloss: 0.7556
MCC: 0.0000

SVM con C = 10
Accuracy: 0.5577
Precision: 0.5577
Recall: 1.0000
F1-Score: 0.7160
logloss: 0.7535
MCC: 0.0000


In [204]:
#hiperparametros(svm_phising,'kernel',['linear', 'poly'], X_train_phishing, X_test_phishing, y_train_phishing, y_test_phishing)

In [205]:
hiperparametros(svm_phising,'tol',[1e-1, 1e-5], X_train_phishing, X_test_phishing, y_train_phishing, y_test_phishing)


SVM con tol = 0.1
Accuracy: 0.5577
Precision: 0.5577
Recall: 1.0000
F1-Score: 0.7160
logloss: 0.7558
MCC: 0.0000

SVM con tol = 1e-05
Accuracy: 0.5577
Precision: 0.5577
Recall: 1.0000
F1-Score: 0.7160
logloss: 0.7554
MCC: 0.0000


In [206]:
breast_cancer_wisconsin_diagnostic_dataset_path = kagglehub.dataset_download('utkarshx27/breast-cancer-wisconsin-diagnostic-dataset')
wine_quality_dataset_path = kagglehub.dataset_download('yasserh/wine-quality-dataset')
accidentes_de_trfico_de_madrid_espaa_2019_a_2023_path = kagglehub.dataset_download('jairoordezpacheco/accidentes-de-trfico-de-madrid-espaa-2019-a-2023')
phishing_website_detector_path = kagglehub.dataset_download('eswarchandt/phishing-website-detector')

brca = pd.read_csv(breast_cancer_wisconsin_diagnostic_dataset_path+'/brca.csv')
wine = pd.read_csv(wine_quality_dataset_path+'/WineQT.csv')
phishing= pd.read_csv(phishing_website_detector_path+'/phishing.csv')
accidents = pd.read_csv(accidentes_de_trfico_de_madrid_espaa_2019_a_2023_path+'/datos_madrid.csv',encoding='MacRoman')

In [207]:
def preprocess_brca(df):
    X = df.drop(columns=['y'])
    y = df['y'].apply(lambda x: 1 if x == 'M' else 0)
    return X, y

In [208]:
def preprocess_wine(df):
    df['quality'] = df['quality'].apply(lambda x: 1 if x > 5 else 0)
    X = df.drop(columns=['quality', 'Id'])
    y = df['quality']
    return X, y

In [209]:
def preprocess_phishing(df):
    X = df.drop(columns=['class'])
    y = df['class']
    return X, y

In [210]:
def preprocess_accidents(df):
    df = df.drop(columns=['num_expediente', 'fecha', 'hora', 'localizacion', 'numero', 'coordenada_x_utm', 'coordenada_y_utm'])
    df = df.dropna()
    le = LabelEncoder()
    for col in df.columns:
        if df[col].dtype == 'object':
            df[col] = le.fit_transform(df[col])
    X = df.drop(columns=['lesividad'])
    y = df['lesividad'].apply(lambda x: 1 if x > 0 else 0)
    return X, y

In [211]:
X_brca, y_brca = preprocess_brca(brca)
X_wine, y_wine = preprocess_wine(wine)
X_phishing, y_phishing = preprocess_phishing(phishing)
X_accidents, y_accidents = preprocess_accidents(accidents)

In [212]:
X_train_brca, X_test_brca, y_train_brca, y_test_brca = train_test_split(X_brca, y_brca, test_size=0.2, random_state=6)
X_train_wine, X_test_wine, y_train_wine, y_test_wine = train_test_split(X_wine, y_wine, test_size=0.2, random_state=6)
X_train_accidents, X_test_accidents, y_train_accidents, y_test_accidents = train_test_split(X_accidents, y_accidents, test_size=0.2, random_state=6)

In [213]:
def evaluate_model(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    return accuracy, precision, recall, f1, roc_auc, cm

In [214]:
svm = SVC(random_state=RANDOM_STATE)
log_reg = LogisticRegression(max_iter=1000,random_state=RANDOM_STATE)
decision_tree = DecisionTreeClassifier(random_state=RANDOM_STATE)
random_forest = RandomForestClassifier(random_state=RANDOM_STATE)
knn = KNeighborsClassifier()

In [215]:
datasets = {
    'BRCA': (X_train_brca, X_test_brca, y_train_brca, y_test_brca),
    'Wine': (X_train_wine, X_test_wine, y_train_wine, y_test_wine),
    'Accidents': (X_train_accidents, X_test_accidents, y_train_accidents, y_test_accidents)
}

In [216]:
models = {
    'SVM': svm,
    'Logistic Regression': log_reg,
    'Decision Tree': decision_tree,
    'Random Forest': random_forest,
    'kNN': knn
}

In [217]:
results = {}

for dataset_name, (X_train, X_test, y_train, y_test) in datasets.items():
    results[dataset_name] = {}
    for model_name, model in models.items():
        accuracy, precision, recall, f1, roc_auc, cm = evaluate_model(model, X_train, X_test, y_train, y_test)
        results[dataset_name][model_name] = {
            'Accuracy': accuracy,
            'Precision': precision,
            'Recall': recall,
            'F1 Score': f1,
            'ROC AUC': roc_auc,
            'Confusion Matrix': cm
        }

In [218]:
for dataset_name, dataset_results in results.items():
    print(f"Results for {dataset_name} dataset:")
    for model_name, metrics in dataset_results.items():
        print(f"  {model_name}:")
        for metric_name, value in metrics.items():
            print(f"    {metric_name}: {value}")
        print()

Results for BRCA dataset:
  SVM:
    Accuracy: 0.956140350877193
    Precision: 0.9777777777777777
    Recall: 0.9166666666666666
    F1 Score: 0.946236559139785
    ROC AUC: 0.9507575757575757
    Confusion Matrix: [[65  1]
 [ 4 44]]

  Logistic Regression:
    Accuracy: 1.0
    Precision: 1.0
    Recall: 1.0
    F1 Score: 1.0
    ROC AUC: 1.0
    Confusion Matrix: [[66  0]
 [ 0 48]]

  Decision Tree:
    Accuracy: 1.0
    Precision: 1.0
    Recall: 1.0
    F1 Score: 1.0
    ROC AUC: 1.0
    Confusion Matrix: [[66  0]
 [ 0 48]]

  Random Forest:
    Accuracy: 1.0
    Precision: 1.0
    Recall: 1.0
    F1 Score: 1.0
    ROC AUC: 1.0
    Confusion Matrix: [[66  0]
 [ 0 48]]

  kNN:
    Accuracy: 0.9824561403508771
    Precision: 0.9791666666666666
    Recall: 0.9791666666666666
    F1 Score: 0.9791666666666666
    ROC AUC: 0.9820075757575757
    Confusion Matrix: [[65  1]
 [ 1 47]]

Results for Wine dataset:
  SVM:
    Accuracy: 0.6419213973799127
    Precision: 0.5941176470588235
    R

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

# Establecer la semilla aleatoria
RANDOM_STATE = 42

# Cargar los datasets
brca = pd.read_csv('brca.csv')
wine = pd.read_csv('WineQT.csv')
accidents = pd.read_csv('datos_madrid.csv')
phishing = pd.read_excel('phishing.xls')

# Preprocesamiento de los datasets
def preprocess_brca(df):
    X = df.drop(columns=['y'])
    y = df['y'].apply(lambda x: 1 if x == 'M' else 0)
    return X, y

def preprocess_wine(df):
    df['quality'] = df['quality'].apply(lambda x: 1 if x > 5 else 0)
    X = df.drop(columns=['quality', 'Id'])
    y = df['quality']
    return X, y

def preprocess_accidents(df):
    df = df.drop(columns=['num_expediente', 'fecha', 'hora', 'localizacion', 'numero', 'coordenada_x_utm', 'coordenada_y_utm'])
    df = df.dropna()
    le = LabelEncoder()
    for col in df.columns:
        if df[col].dtype == 'object':
            df[col] = le.fit_transform(df[col])
    X = df.drop(columns=['lesividad'])
    y = df['lesividad'].apply(lambda x: 1 if x > 0 else 0)
    return X, y

def preprocess_phishing(df):
    X = df.drop(columns=['class'])
    y = df['class']
    return X, y

X_brca, y_brca = preprocess_brca(brca)
X_wine, y_wine = preprocess_wine(wine)
X_accidents, y_accidents = preprocess_accidents(accidents)
X_phishing, y_phishing = preprocess_phishing(phishing)

# Dividir los datasets en conjuntos de entrenamiento y prueba
X_train_brca, X_test_brca, y_train_brca, y_test_brca = train_test_split(X_brca, y_brca, test_size=0.2, random_state=RANDOM_STATE)
X_train_wine, X_test_wine, y_train_wine, y_test_wine = train_test_split(X_wine, y_wine, test_size=0.2, random_state=RANDOM_STATE)
X_train_accidents, X_test_accidents, y_train_accidents, y_test_accidents = train_test_split(X_accidents, y_accidents, test_size=0.2, random_state=RANDOM_STATE)
X_train_phishing, X_test_phishing, y_train_phishing, y_test_phishing = train_test_split(X_phishing, y_phishing, test_size=0.2, random_state=RANDOM_STATE)

# Escalar los datos
scaler = StandardScaler()
X_train_brca = scaler.fit_transform(X_train_brca)
X_test_brca = scaler.transform(X_test_brca)
X_train_wine = scaler.fit_transform(X_train_wine)
X_test_wine = scaler.transform(X_test_wine)
X_train_accidents = scaler.fit_transform(X_train_accidents)
X_test_accidents = scaler.transform(X_test_accidents)
X_train_phishing = scaler.fit_transform(X_train_phishing)
X_test_phishing = scaler.transform(X_test_phishing)

# Función para entrenar y evaluar modelos
def evaluate_model(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    return accuracy, precision, recall, f1, roc_auc, cm

# Inicializar los modelos
svm = SVC(random_state=RANDOM_STATE)
log_reg = LogisticRegression(max_iter=1000, random_state=RANDOM_STATE)  # Aumentar el número de iteraciones
decision_tree = DecisionTreeClassifier(random_state=RANDOM_STATE)
random_forest = RandomForestClassifier(random_state=RANDOM_STATE)
knn = KNeighborsClassifier()

# Evaluar los modelos en los datasets
datasets = {
    'BRCA': (X_train_brca, X_test_brca, y_train_brca, y_test_brca),
    'Wine': (X_train_wine, X_test_wine, y_train_wine, y_test_wine),
    'Accidents': (X_train_accidents, X_test_accidents, y_train_accidents, y_test_accidents),
    'Phishing': (X_train_phishing, X_test_phishing, y_train_phishing, y_test_phishing)
}

models = {
    'SVM': svm,
    'Logistic Regression': log_reg,
    'Decision Tree': decision_tree,
    'Random Forest': random_forest,
    'kNN': knn
}

results = {}

for dataset_name, (X_train, X_test, y_train, y_test) in datasets.items():
    results[dataset_name] = {}
    for model_name, model in models.items():
        accuracy, precision, recall, f1, roc_auc, cm = evaluate_model(model, X_train, X_test, y_train, y_test)
        results[dataset_name][model_name] = {
            'Accuracy': accuracy,
            'Precision': precision,
            'Recall': recall,
            'F1 Score': f1,
            'ROC AUC': roc_auc,
            'Confusion Matrix': cm
        }

# Mostrar los resultados
for dataset_name, dataset_results in results.items():
    print(f"Results for {dataset_name} dataset:")
    for model_name, metrics in dataset_results.items():
        print(f"  {model_name}:")
        for metric_name, value in metrics.items():
            print(f"    {metric_name}: {value}")
        print()

# Ajuste de Hiperparámetros para Regresión Logística
param_grid = {
    'C': [0.1, 1, 10],
    'solver': ['lbfgs', 'liblinear'],
    'max_iter': [100, 200, 500]
}

best_params = {}

for dataset_name, (X_train, X_test, y_train, y_test) in datasets.items():
    grid_search = GridSearchCV(LogisticRegression(random_state=RANDOM_STATE), param_grid, cv=5, scoring='accuracy')
    grid_search.fit(X_train, y_train)
    best_params[dataset_name] = grid_search.best_params_
    best_log_reg = grid_search.best_estimator_
    accuracy, precision, recall, f1, roc_auc, cm = evaluate_model(best_log_reg, X_train, X_test, y_train, y_test)
    results[dataset_name]['Logistic Regression (Best)'] = {
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1,
        'ROC AUC': roc_auc,
        'Confusion Matrix': cm
    }

# Mostrar los mejores hiperparámetros y resultados
for dataset_name, params in best_params.items():
    print(f"Best hyperparameters for Logistic Regression on {dataset_name} dataset:")
    print(params)
    print(f"Results for Logistic Regression with best hyperparameters on {dataset_name} dataset:")
    for metric_name, value in results[dataset_name]['Logistic Regression (Best)'].items():
        print(f"  {metric_name}: {value}")
    print()

# Crear datasets de ejemplo con 10 registros
example_datasets = {
    'BRCA': brca.sample(n=10, random_state=RANDOM_STATE),
    'Wine': wine.sample(n=10, random_state=RANDOM_STATE),
    'Accidents': accidents.sample(n=10, random_state=RANDOM_STATE),
    'Phishing': phishing.sample(n=10, random_state=RANDOM_STATE)
}

for dataset_name, example_df in example_datasets.items():
    if dataset_name == 'BRCA':
        X_example, y_example = preprocess_brca(example_df)
    elif dataset_name == 'Wine':
        X_example, y_example = preprocess_wine(example_df)
    elif dataset_name == 'Accidents':
        X_example, y_example = preprocess_accidents(example_df)
    elif dataset_name == 'Phishing':
        X_example, y_example = preprocess_phishing(example_df)
    
    X_example = scaler.transform(X_example)
    best_log_reg = LogisticRegression(**best_params[dataset_name], random_state=RANDOM_STATE)
    best_log_reg.fit(X_train, y_train)
    y_pred_example = best_log_reg.predict(X_example)
    accuracy_example = accuracy_score(y_example, y_pred_example)
    precision_example = precision_score(y_example, y_pred_example)
    recall_example = recall_score(y_example, y_pred_example)
    f1_example = f1_score(y_example, y_pred_example)
    roc_auc_example = roc_auc_score(y_example, y_pred_example)
    cm_example = confusion_matrix(y_example, y_pred_example)
    
    print(f"Results for Logistic Regression with best hyperparameters on {dataset_name} example dataset:")
    print(f"  Accuracy: {accuracy_example}")
    print(f"  Precision: {precision_example}")
    print(f"  Recall: {recall_example}")
    print(f"  F1 Score: {f1_example}")
    print(f"  ROC AUC: {roc_auc_example}")
    print(f"  Confusion Matrix: {cm_example}")
    print()