In [3]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

In [1]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

In [4]:
# Cargar el dataset de penguins
df = sns.load_dataset("penguins").dropna()

# Seleccionar características y la variable objetivo
X = df[['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']]
y = df['species']

In [5]:
# Codificar la variable objetivo
y = LabelEncoder().fit_transform(y)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#DEFINIR HIPERPARAMETROS

In [10]:
param_grid = {
    'n_neighbors': np.arange(1, 20, 2),
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan', 'chebyshev', 'minkowski'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'leaf_size': [10, 20, 30, 40, 50],
    'p': [1, 2, 3]
}

In [12]:
knn = KNeighborsClassifier()
grid_search = GridSearchCV(knn,param_grid,cv=5,scoring='accuracy',n_jobs=-1)
grid_search.fit(X_train,y_train)
print("Mejores parámetros:", grid_search.best_params_)
print("Mejor precisión:", grid_search.best_score_)

Mejores parámetros: {'algorithm': 'auto', 'leaf_size': 10, 'metric': 'chebyshev', 'n_neighbors': 3, 'p': 1, 'weights': 'uniform'}
Mejor precisión: 0.9887491264849755


# CREAMOS MODELO CON LOS MEJORES PARAMETROS

In [13]:
best_knn = grid_search.best_estimator_
best_knn.fit(X_train,y_train)
y_pred = best_knn.predict(X_test)

accuracy = accuracy_score(y_test,y_pred)
print(f'Accuracy : {accuracy:.2f}')

Accuracy : 1.00
