<a href="https://colab.research.google.com/github/OswaldGutierrez/Modelos-IA/blob/main/KNN_NATICUSdroid.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# =====================================================
# 1. Importaciones
# =====================================================
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import classification_report

# =====================================================
# 2. Cargar dataset
# =====================================================
!wget https://raw.githubusercontent.com/xiomara-udea/NATICUSdroid/main/data.csv
import pandas as pd
df = pd.read_csv("data.csv")

# Separar características (X) y etiqueta (y)
X = df.drop('Result', axis=1)
y = df['Result']

print(f"  - Características: {X.shape[1]}")
print(f"  - Muestras: {len(y)}")

# =====================================================
# 3. Dividir dataset Train–Val–Test (70–15–15)
# =====================================================
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.30, random_state=42, stratify=y
)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.50, random_state=42, stratify=y_temp
)

print(f"Train: {X_train.shape} Val: {X_val.shape} Test: {X_test.shape}")

# =====================================================
# 4. Hiperparámetros para GridSearch (versión rápida)
# =====================================================
param_grid_knn = {
    "clf__n_neighbors": [3, 5, 7, 9],
    "clf__weights": ["uniform", "distance"],
}

print("\nHiperparámetros KNN:")
print(pd.DataFrame({
    "Hiperparámetro": ["n_neighbors", "weights"],
    "Valores": [[3,5,7,9], ["uniform", "distance"]]
}))

# =====================================================
# 5. Crear pipeline KNN
# =====================================================
pipe_knn = Pipeline([
    ("scaler", StandardScaler()),
    ("clf", KNeighborsClassifier())
])

# =====================================================
# 6. Validación cruzada 5-Fold
# =====================================================
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

print("\nEntrenando GridSearch KNN...")

gs_knn = GridSearchCV(
    pipe_knn,
    param_grid_knn,
    cv=cv,
    scoring="f1",
    n_jobs=-1,
    verbose=1
)

gs_knn.fit(X_train, y_train)

print("\nMejores parámetros:", gs_knn.best_params_)
print("Mejor F1 en CV:", gs_knn.best_score_)

best_knn = gs_knn.best_estimator_

# =====================================================
# 7. Función de evaluación (corregida)
# =====================================================
def eval_model(model, X, y):
    y_pred = model.predict(X)
    return {
        "acc": accuracy_score(y, y_pred),
        "prec": precision_score(y, y_pred),
        "rec": recall_score(y, y_pred),
        "f1": f1_score(y, y_pred),
        "y_pred": np.array(y_pred)  # <<< CORRECCIÓN IMPORTANTE
    }

# =====================================================
# 8. Bootstrap para intervalos de confianza
# =====================================================
def bootstrap_ci(y_true, y_pred, metric_fn, B=400):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    values = []
    n = len(y_true)

    for _ in range(B):
        idx = np.random.randint(0, n, n)
        values.append(metric_fn(y_true[idx], y_pred[idx]))

    return np.percentile(values, [2.5, 97.5])

# =====================================================
# 9. Evaluar en Validación y Test
# =====================================================
val = eval_model(best_knn, X_val, y_val)
test = eval_model(best_knn, X_test, y_test)

# Intervalos de confianza
val_acc_ci = bootstrap_ci(y_val, val["y_pred"], accuracy_score)
val_f1_ci  = bootstrap_ci(y_val, val["y_pred"], f1_score)

test_acc_ci = bootstrap_ci(y_test, test["y_pred"], accuracy_score)
test_f1_ci  = bootstrap_ci(y_test, test["y_pred"], f1_score)

# =====================================================
# 10. Mostrar resultados
# =====================================================
print("\n=== Resultados VALIDACIÓN ===")
print(f"Accuracy: {val['acc']:.4f}  CI95: {val_acc_ci}")
print(f"Precision: {val['prec']:.4f}")
print(f"Recall: {val['rec']:.4f}")
print(f"F1: {val['f1']:.4f} CI95: {val_f1_ci}")

print("\n=== Resultados TEST ===")
print(f"Accuracy: {test['acc']:.4f}  CI95: {test_acc_ci}")
print(f"Precision: {test['prec']:.4f}")
print(f"Recall: {test['rec']:.4f}")
print(f"F1: {test['f1']:.4f} CI95: {test_f1_ci}")

print("\n=== Classification Report (TEST) ===")
print(classification_report(y_test, test["y_pred"]))


--2025-11-24 13:09:27--  https://raw.githubusercontent.com/xiomara-udea/NATICUSdroid/main/data.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5136567 (4.9M) [text/plain]
Saving to: ‘data.csv’


2025-11-24 13:09:27 (58.2 MB/s) - ‘data.csv’ saved [5136567/5136567]

  - Características: 86
  - Muestras: 29332
Train: (20532, 86) Val: (4400, 86) Test: (4400, 86)

Hiperparámetros KNN:
  Hiperparámetro              Valores
0    n_neighbors         [3, 5, 7, 9]
1        weights  [uniform, distance]

Entrenando GridSearch KNN...
Fitting 5 folds for each of 8 candidates, totalling 40 fits

Mejores parámetros: {'clf__n_neighbors': 3, 'clf__weights': 'distance'}
Mejor F1 en CV: 0.9600251159415445

=== Resultados VALIDACIÓN ===
Accuracy: 0.9580  CI95: [0.9518125  0.