

#Modelos KNN Beñat



In [None]:
import pandas as pd
import numpy as np

In [None]:
df_train = pd.read_csv("../data_train.csv")
df_test = pd.read_csv("../data_test.csv")

In [None]:
X_train = df_train.drop(columns=['winner'])
y_train = df_train['winner']

X_test = df_test.drop(columns=['winner'])
y_test = df_test['winner']

Configuramos el Grid Search para evaluar distintos modelos KNN con distintos hiperparámetros. Evaluamos cada modelo mediante las métricas de accuracy, precision, recall y f1-score. Para obtener más detalles, consultar la memoria del proyecto.

In [None]:
import time
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import ParameterGrid
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Escalar los datos
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Definir una grilla más extensa
param_grid = {
    'n_neighbors': [3, 5, 7, 9],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

results = []

# Ejecutar todas las combinaciones
for i, params in enumerate(ParameterGrid(param_grid)):
    print(f"\n🔍 Combinación {i+1}/{len(list(ParameterGrid(param_grid)))}: {params}")

    model = KNeighborsClassifier(**params)

    # Entrenamiento
    start_train = time.time()
    model.fit(X_train_scaled, y_train)
    end_train = time.time()
    train_time = end_train - start_train

    # Predicción
    start_eval = time.time()
    y_pred = model.predict(X_test_scaled)
    end_eval = time.time()
    eval_time = end_eval - start_eval

    # Métricas
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, zero_division=0)
    rec = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)

    # Guardar resultados
    results.append({
        'n_neighbors': params['n_neighbors'],
        'weights': params['weights'],
        'metric': params['metric'],
        'accuracy': acc,
        'precision': prec,
        'recall': rec,
        'f1_score': f1,
        'train_time': train_time,
        'eval_time': eval_time
    })

    # Imprimir resultados parciales
    print(f"Accuracy:  {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall:    {rec:.4f}")
    print(f"F1 Score:  {f1:.4f}")
    print(f"Tiempo entrenamiento: {train_time:.2f}s | Tiempo evaluación: {eval_time:.4f}s")

# Mostrar resumen ordenado
df_results = pd.DataFrame(results)
df_results = df_results.sort_values(by='f1_score', ascending=False)

print("\n=== 🏁 RESUMEN FINAL (ordenado por F1 Score) ===")
print(df_results[['n_neighbors', 'weights', 'metric', 'f1_score', 'accuracy', 'train_time']].to_string(index=False))


🔍 Combinación 1/16: {'metric': 'euclidean', 'n_neighbors': 3, 'weights': 'uniform'}
Accuracy:  0.5996
Precision: 0.5988
Recall:    0.5990
F1 Score:  0.5989
Tiempo entrenamiento: 0.05s | Tiempo evaluación: 37.2424s

🔍 Combinación 2/16: {'metric': 'euclidean', 'n_neighbors': 3, 'weights': 'distance'}
Accuracy:  0.5998
Precision: 0.5990
Recall:    0.5991
F1 Score:  0.5991
Tiempo entrenamiento: 0.04s | Tiempo evaluación: 37.9627s

🔍 Combinación 3/16: {'metric': 'euclidean', 'n_neighbors': 5, 'weights': 'uniform'}
Accuracy:  0.6129
Precision: 0.6119
Recall:    0.6135
F1 Score:  0.6127
Tiempo entrenamiento: 0.04s | Tiempo evaluación: 37.7435s

🔍 Combinación 4/16: {'metric': 'euclidean', 'n_neighbors': 5, 'weights': 'distance'}
Accuracy:  0.6131
Precision: 0.6121
Recall:    0.6137
F1 Score:  0.6129
Tiempo entrenamiento: 0.04s | Tiempo evaluación: 38.1126s

🔍 Combinación 5/16: {'metric': 'euclidean', 'n_neighbors': 7, 'weights': 'uniform'}
Accuracy:  0.6200
Precision: 0.6200
Recall:    0.6159

Se ha hecho una segunda búsqueda en rejilla con otros hiperparámetros

In [None]:
import time
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import ParameterGrid
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Escalar los datos
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Definir una grilla más extensa
param_grid = {
    'n_neighbors': [101, 301, 601, 1001],
    'weights': ['distance', 'uniform'],
    'metric': ['euclidean', 'manhattan']
}

results = []

# Ejecutar todas las combinaciones
for i, params in enumerate(ParameterGrid(param_grid)):
    print(f"\n🔍 Combinación {i+1}/{len(list(ParameterGrid(param_grid)))}: {params}")

    model = KNeighborsClassifier(**params)

    # Entrenamiento
    start_train = time.time()
    model.fit(X_train_scaled, y_train)
    end_train = time.time()
    train_time = end_train - start_train

    # Predicción
    start_eval = time.time()
    y_pred = model.predict(X_test_scaled)
    end_eval = time.time()
    eval_time = end_eval - start_eval

    # Métricas
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, zero_division=0)
    rec = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)

    # Guardar resultados
    results.append({
        'n_neighbors': params['n_neighbors'],
        'weights': params['weights'],
        'metric': params['metric'],
        'accuracy': acc,
        'precision': prec,
        'recall': rec,
        'f1_score': f1,
        'train_time': train_time,
        'eval_time': eval_time
    })

    # Imprimir resultados parciales
    print(f"Accuracy:  {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall:    {rec:.4f}")
    print(f"F1 Score:  {f1:.4f}")
    print(f"Tiempo entrenamiento: {train_time:.2f}s | Tiempo evaluación: {eval_time:.4f}s")

# Mostrar resumen ordenado
df_results = pd.DataFrame(results)
df_results = df_results.sort_values(by='f1_score', ascending=False)

print("\n=== 🏁 RESUMEN FINAL (ordenado por F1 Score) ===")
print(df_results[['n_neighbors', 'weights', 'metric', 'f1_score', 'accuracy', 'train_time']].to_string(index=False))



🔍 Combinación 1/16: {'metric': 'euclidean', 'n_neighbors': 101, 'weights': 'distance'}
Accuracy:  0.6543
Precision: 0.6533
Recall:    0.6547
F1 Score:  0.6540
Tiempo entrenamiento: 0.04s | Tiempo evaluación: 31.0746s

🔍 Combinación 2/16: {'metric': 'euclidean', 'n_neighbors': 101, 'weights': 'uniform'}
Accuracy:  0.6543
Precision: 0.6532
Recall:    0.6549
F1 Score:  0.6541
Tiempo entrenamiento: 0.04s | Tiempo evaluación: 31.5359s

🔍 Combinación 3/16: {'metric': 'euclidean', 'n_neighbors': 301, 'weights': 'distance'}
Accuracy:  0.6542
Precision: 0.6523
Recall:    0.6578
F1 Score:  0.6550
Tiempo entrenamiento: 0.04s | Tiempo evaluación: 32.5517s

🔍 Combinación 4/16: {'metric': 'euclidean', 'n_neighbors': 301, 'weights': 'uniform'}
Accuracy:  0.6544
Precision: 0.6526
Recall:    0.6575
F1 Score:  0.6551
Tiempo entrenamiento: 0.04s | Tiempo evaluación: 32.7438s

🔍 Combinación 5/16: {'metric': 'euclidean', 'n_neighbors': 601, 'weights': 'distance'}
Accuracy:  0.6532
Precision: 0.6517
Recall