In [1]:
# =======================
# 1. Importar librerías
# =======================
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report

# =======================
# 2. Cargar dataset
# =======================
df = pd.read_csv("Dataset_10min_con_calidad3.csv")

print("Columnas disponibles:", df.columns.tolist())
print("Tamaño del dataset:", df.shape)

# =======================
# 3. Definir X (características) y y (target)
# =======================
X = df.drop(columns=["timestamp", "calidad_senal"])  # quitamos timestamp y la etiqueta
y = df["calidad_senal"]

# =======================
# 4. División en train/test
# =======================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# =======================
# 5. Definir hiperparámetros y GridSearch
# =======================
param_grid = {
    'criterion': ['gini', 'entropy', 'log_loss'],
    'max_features': ['sqrt', 'log2', None],
    'n_estimators': np.arange(50, 201, 50)
}

rf = RandomForestClassifier(random_state=42)
grid = GridSearchCV(rf, param_grid, cv=5, n_jobs=-1)
grid.fit(X_train, y_train)

print("Mejores parámetros encontrados:", grid.best_params_)

# =======================
# 6. Modelo final
# =======================
rf_final = RandomForestClassifier(
    criterion=grid.best_params_['criterion'],
    max_features=grid.best_params_['max_features'],
    n_estimators=grid.best_params_['n_estimators'],
    random_state=42
)
rf_final.fit(X_train, y_train)

# =======================
# 7. Evaluación
# =======================
y_pred = rf_final.predict(X_test)
y_proba = rf_final.predict_proba(X_test)

print("\nResultados en test:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Recall (macro):", recall_score(y_test, y_pred, average="macro"))
print("Recall (weighted):", recall_score(y_test, y_pred, average="weighted"))
print("F1 (macro):", f1_score(y_test, y_pred, average="macro"))
print("F1 (weighted):", f1_score(y_test, y_pred, average="weighted"))
print("ROC-AUC (ovr):", roc_auc_score(y_test, y_proba, multi_class="ovr", average="macro"))
print("\nMatriz de confusión:\n", confusion_matrix(y_test, y_pred))
print("\nReporte de clasificación:\n", classification_report(y_test, y_pred))


Columnas disponibles: ['timestamp', 'ifInOctets_sum', 'ifInOctets_mean', 'ifInOctets_max', 'ifInOctets_std', 'ifOutOctets_sum', 'ifOutOctets_mean', 'ifOutOctets_max', 'ifOutOctets_std', 'ifInErrors_sum', 'ifInErrors_mean', 'ifInErrors_max', 'ifOutErrors_sum', 'ifOutErrors_mean', 'ifOutErrors_max', 'latencia_mean', 'latencia_median', 'latencia_max', 'latencia_std', 'latencia_kurtosis', 'perdida_sum', 'perdida_mean', 'perdida_max', 'RSRP_mean', 'RSRP_min', 'RSRP_std', 'SINR_mean', 'SINR_max', 'SINR_std', 'BW_in_Mbps_mean', 'BW_in_Mbps_max', 'BW_in_Mbps_std', 'BW_out_Mbps_mean', 'BW_out_Mbps_max', 'BW_out_Mbps_std', 'TasaErrorIn_mean', 'TasaErrorIn_max', 'TasaErrorOut_mean', 'TasaErrorOut_max', 'calidad_senal']
Tamaño del dataset: (501, 40)
Mejores parámetros encontrados: {'criterion': 'gini', 'max_features': None, 'n_estimators': np.int64(50)}

Resultados en test:
Accuracy: 1.0
Recall (macro): 1.0
Recall (weighted): 1.0
F1 (macro): 1.0
F1 (weighted): 1.0
ROC-AUC (ovr): 1.0

Matriz de con