## [K Nearest Neighbors](https://medium.com/@RobuRishabh/knn-k-nearest-neighbour-5ae18ae8e274)

![KNN](./assets/k-nn.png)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:

URL:str =r"https://docs.google.com/spreadsheets/d/e/2PACX-1vTVjHDHSf0d-qpQZrqCIlynVf0KkqZ8MEloq1OF0SOsAXquEBfGXAfKFWv1917fwAbjcxZn4wv8XX7K/pub?gid=333071130&single=true&output=csv"

poblacion_india_df = pd.read_csv(URL, encoding="utf8",sep=",")

poblacion_india_df.columns=poblacion_india_df.columns.str.lower()

df = poblacion_india_df.loc[:, ["glucose", "bmi", "age", "outcome"]]

In [None]:
target = 'outcome'
X = df.drop(target, axis=1)
y = df[target]

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error


In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Convertir de vuelta a DataFrame manteniendo nombres de columnas
X_scaled = pd.DataFrame(X_scaled, columns=X.columns)
X_scaled.head()

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    precision_score, recall_score, f1_score
)

In [None]:
knn = KNeighborsClassifier(
    n_neighbors=10,     # número de vecinos (puedes probar otros)
    weights='uniform', # todos los vecinos pesan igual ('distance' = pondera por distancia)
    metric='minkowski' # distancia euclídea
)
knn.fit(X_train, y_train)

In [None]:
predicciones_knn = knn.predict(X_test)


*metricas*

In [None]:
precision_global = accuracy_score(y_test, predicciones_knn)
print(f"Precisión general: {precision_global*100:.1f}%")
print(f"Precisión (Precision): {precision_score(y_test, predicciones_knn)*100:.1f}%")
print(f"Sensibilidad (Recall): {recall_score(y_test, predicciones_knn)*100:.1f}%")
print(f"F1-Score: {f1_score(y_test, predicciones_knn)*100:.1f}%")

In [None]:
print("\nMatriz de Confusión:")
matriz_knn = confusion_matrix(y_test, predicciones_knn)
print(matriz_knn)

plt.figure(figsize=(6,4))
sns.heatmap(matriz_knn, annot=True, fmt="d", cmap="Oranges",
            xticklabels=['No diabetes', 'Diabetes'],
            yticklabels=['No diabetes', 'Diabetes'])
plt.xlabel("Predicción")
plt.ylabel("Valor real")
plt.title("Matriz de Confusión - KNN")
plt.show()

In [None]:
print("\nReporte de Clasificación:")
print(classification_report(y_test, predicciones_knn))

In [None]:
resultados_knn = pd.DataFrame({
    'Valor_Real': y_test,
    'Prediccion': predicciones_knn,
    'Correcto': y_test == predicciones_knn
})

print("\nComparación Predicciones vs Real:")
print(resultados_knn.head(10))

print(f"\nResumen:")
print(f"Aciertos: {resultados_knn['Correcto'].sum()}")
print(f"Errores: {len(resultados_knn) - resultados_knn['Correcto'].sum()}")


In [None]:
vecinos = range(1, 21)
precisiones = []

for k in vecinos:
    knn_temp = KNeighborsClassifier(n_neighbors=k)
    knn_temp.fit(X_train, y_train)
    pred_temp = knn_temp.predict(X_test)
    precisiones.append(accuracy_score(y_test, pred_temp))

plt.figure(figsize=(8,5))
plt.plot(vecinos, precisiones, marker='o')
plt.title("Precisión vs Número de Vecinos (k)")
plt.xlabel("Número de vecinos (k)")
plt.ylabel("Precisión")
plt.grid(True)
plt.show()

### Extra para viz de estos tipos de modelos

In [None]:
import plotly.express as px
import pandas as pd

variables_3d=["glucose", "bmi", "age"]

# Crear DataFrame con las tres variables y la predicción
df_3d = X_test[variables_3d].copy()
df_3d['Prediccion'] = predicciones_knn.astype(str)  # convertir a string para color discreto


In [None]:
import plotly.express as px
import pandas as pd

# Variables a usar
variables_3d = ["glucose", "bmi", "age"]

# DataFrame con valores reales
df_real = X_test[variables_3d].copy()
df_real['ClaseReal'] = y_test.astype(str)

# Gráfico 3D interactivo de valores reales
fig_real = px.scatter_3d(
    df_real,
    x='glucose',
    y='bmi',
    z='age',
    color='ClaseReal',
    symbol='ClaseReal',
    size_max=8,
    opacity=0.8,
    labels={'ClaseReal':'Valor Real'},
    color_discrete_map={'0':'blue','1':'red'},
    symbol_map={'0':'circle','1':'diamond'}
)

fig_real.update_layout(
    title="Valores Reales",
    scene=dict(
        xaxis_title='Glucose',
        yaxis_title='BMI',
        zaxis_title='Age'
    )
)

fig_real.show()


# DataFrame con predicciones
df_pred = X_test[variables_3d].copy()
df_pred['Prediccion'] = predicciones_knn.astype(str)

# Gráfico 3D interactivo de predicciones
fig_pred = px.scatter_3d(
    df_pred,
    x='glucose',
    y='bmi',
    z='age',
    color='Prediccion',
    symbol='Prediccion',
    size_max=8,
    opacity=0.8,
    labels={'Prediccion':'Clase Predicha'},
    color_discrete_map={'0':'blue','1':'red'},
    symbol_map={'0':'circle','1':'diamond'}
)

fig_pred.update_layout(
    title="Predicciones KNN",
    scene=dict(
        xaxis_title='Glucose',
        yaxis_title='BMI',
        zaxis_title='Age'
    )
)

fig_pred.show()
