In [None]:
# Importacion de libreria 
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
import numpy as np
import pandas as pd

In [2]:
csv = "Dataset_tratado.csv"

datos = pd.read_csv(csv, parse_dates=["timestamp"])
datos = datos.sort_values(["timestamp"]).reset_index(drop=True)

datos

Unnamed: 0,timestamp,latencia_mean,latencia_median,latencia_max,latencia_std,latencia_kurtosis,perdida_mean,RSRP_mean,RSRP_min,RSRP_std,SINR_mean,SINR_max,SINR_std,CalidadSenal
0,2023-08-23 02:10:00,15.820,9.580,37.56,12.707769,-0.196208,0.0,-92.8,-102.0,7.981228,13.440,22.49,7.654806,0
1,2023-08-23 02:20:00,25.859,28.185,49.37,15.333421,-1.264487,0.1,-93.7,-103.0,5.457920,14.635,23.84,7.459311,0
2,2023-08-23 02:30:00,27.759,32.490,46.04,15.167649,-1.551258,0.1,-93.3,-109.0,8.124722,16.270,25.62,6.133304,1
3,2023-08-23 02:40:00,52.184,48.920,95.97,31.838426,-1.415968,0.2,-93.2,-105.0,7.146095,11.961,25.27,6.806783,0
4,2023-08-23 02:50:00,39.118,34.255,88.43,29.438563,-1.169311,0.1,-97.2,-111.0,11.292082,17.064,26.26,6.495571,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
496,2023-08-26 12:50:00,43.669,36.140,94.77,27.119710,-0.351730,0.1,-94.7,-113.0,11.747813,11.726,25.51,6.916162,0
497,2023-08-26 13:00:00,44.809,44.325,73.59,25.167173,-1.337035,0.2,-102.5,-115.0,8.644202,14.169,22.45,7.389579,0
498,2023-08-26 13:10:00,48.599,50.030,91.51,22.996692,-0.554924,0.0,-97.0,-110.0,10.509255,12.308,29.76,9.322825,0
499,2023-08-26 13:20:00,35.695,31.855,96.57,26.798702,0.696146,0.0,-99.3,-115.0,12.728359,19.348,29.85,8.621580,1


In [3]:
X = datos.drop(columns=["timestamp", "CalidadSenal"])

y = datos["CalidadSenal"]

X.shape, y.shape

((501, 12), (501,))

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.4,stratify=y)

In [5]:
indices = np.arange(len(y_train))
ind_train, ind_val = train_test_split(indices,test_size=0.33,stratify=y_train)
cv = zip([ind_train],[ind_val])
params = {
    'criterion': ('gini', 'entropy', 'log_loss'),
    'max_features': ('sqrt', 'log2', None),
    'n_estimators': np.arange(10,101,10)
}
rf = RandomForestClassifier()
clf = GridSearchCV(rf, params, cv=cv,n_jobs=-1)
clf.fit(X_train, y_train)

0,1,2
,estimator,RandomForestClassifier()
,param_grid,"{'criterion': ('gini', ...), 'max_features': ('sqrt', ...), 'n_estimators': array([ 10, ...80, 90, 100])}"
,scoring,
,n_jobs,-1
,refit,True
,cv,<zip object a...001EC0D5047C0>
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,n_estimators,np.int64(20)
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [6]:
print(f'Los mejores hyperparámetros son: {clf.best_params_}, con exactitud de {clf.best_score_}')

Los mejores hyperparámetros son: {'criterion': 'gini', 'max_features': 'sqrt', 'n_estimators': np.int64(20)}, con exactitud de 1.0


In [9]:
rf_final = RandomForestClassifier(criterion=clf.best_params_['criterion'],
                                  max_features=clf.best_params_['max_features'],
                                  n_estimators=clf.best_params_['n_estimators'])
rf_final.fit(X_train, y_train)

0,1,2
,n_estimators,np.int64(20)
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [10]:
from sklearn.metrics import accuracy_score, recall_score, f1_score, roc_auc_score, confusion_matrix

# Predicciones
y_pred = rf_final.predict(X_test)
y_proba = rf_final.predict_proba(X_test)  # matriz [n_muestras, n_clases]

# Métricas
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Recall (macro):", recall_score(y_test, y_pred, average="macro"))
print("Recall (weighted):", recall_score(y_test, y_pred, average="weighted"))
print("F1 (macro):", f1_score(y_test, y_pred, average="macro"))
print("F1 (weighted):", f1_score(y_test, y_pred, average="weighted"))

# 🔑 ROC-AUC corregido (solo para clase positiva en binario)
print("ROC-AUC:", roc_auc_score(y_test, y_proba[:, 1]))

print("\nMatriz de confusión:\n", confusion_matrix(y_test, y_pred))


Accuracy: 0.9751243781094527
Recall (macro): 0.9342105263157895
Recall (weighted): 0.9751243781094527
F1 (macro): 0.9572358623037318
F1 (weighted): 0.9744363582707712
ROC-AUC: 0.998062641265741

Matriz de confusión:
 [[163   0]
 [  5  33]]


In [11]:
X_test

Unnamed: 0,latencia_mean,latencia_median,latencia_max,latencia_std,latencia_kurtosis,perdida_mean,RSRP_mean,RSRP_min,RSRP_std,SINR_mean,SINR_max,SINR_std
175,47.909,52.970,90.53,26.049549,-1.133449,0.2,-94.7,-114.0,9.626930,11.777,25.79,7.723372
431,60.130,63.835,99.43,33.167038,-1.319252,0.1,-97.4,-106.0,8.126773,13.415,25.64,9.123516
481,56.994,52.550,93.07,21.664050,-0.995408,0.0,-94.8,-114.0,9.727395,13.740,28.21,8.134317
21,30.849,22.265,76.89,22.031245,0.012090,0.1,-94.9,-112.0,10.918384,15.706,28.70,11.150461
338,44.448,36.055,95.57,30.741256,-1.243605,0.0,-98.5,-113.0,10.405661,15.403,29.77,8.880818
...,...,...,...,...,...,...,...,...,...,...,...,...
13,42.310,43.025,82.45,25.781182,-1.167154,0.1,-97.2,-107.0,9.211104,8.798,17.43,6.311149
310,44.981,41.465,86.30,26.075925,-1.310865,0.0,-101.2,-111.0,10.962563,18.858,28.58,9.246247
500,67.260,58.960,91.72,18.383601,-1.545772,0.0,-89.4,-98.0,6.693280,14.732,26.68,8.869897
438,62.637,65.690,99.62,27.994275,-1.607588,0.2,-96.9,-110.0,8.875059,15.126,28.18,8.581619


In [12]:
#Segunda forma con Cross validator
rfCV = RandomForestClassifier()
clfCV = GridSearchCV(rfCV, params, n_jobs=-1)
clfCV.fit(X_train, y_train)

0,1,2
,estimator,RandomForestClassifier()
,param_grid,"{'criterion': ('gini', ...), 'max_features': ('sqrt', ...), 'n_estimators': array([ 10, ...80, 90, 100])}"
,scoring,
,n_jobs,-1
,refit,True
,cv,
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,n_estimators,np.int64(10)
,criterion,'entropy'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [13]:
print(f'Los mejores hyperparámetros son: {clfCV.best_params_}, con exactitud de {clfCV.best_score_}')

Los mejores hyperparámetros son: {'criterion': 'entropy', 'max_features': None, 'n_estimators': np.int64(10)}, con exactitud de 0.9933333333333334


In [14]:
rf_final_cv = RandomForestClassifier(criterion=clfCV.best_params_['criterion'],
                                  max_features=clfCV.best_params_['max_features'],
                                  n_estimators=clfCV.best_params_['n_estimators'])
rf_final_cv.fit(X_train, y_train)

0,1,2
,n_estimators,np.int64(10)
,criterion,'entropy'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [15]:
from sklearn.metrics import accuracy_score, recall_score, f1_score, roc_auc_score, confusion_matrix

# Predicciones
y_pred = rf_final.predict(X_test)
y_proba = rf_final.predict_proba(X_test)  # matriz [n_muestras, n_clases]

# Métricas
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Recall (macro):", recall_score(y_test, y_pred, average="macro"))
print("Recall (weighted):", recall_score(y_test, y_pred, average="weighted"))
print("F1 (macro):", f1_score(y_test, y_pred, average="macro"))
print("F1 (weighted):", f1_score(y_test, y_pred, average="weighted"))

# 🔑 ROC-AUC corregido (solo para clase positiva en binario)
print("ROC-AUC:", roc_auc_score(y_test, y_proba[:, 1]))

print("\nMatriz de confusión:\n", confusion_matrix(y_test, y_pred))


Accuracy: 0.9751243781094527
Recall (macro): 0.9342105263157895
Recall (weighted): 0.9751243781094527
F1 (macro): 0.9572358623037318
F1 (weighted): 0.9744363582707712
ROC-AUC: 0.998062641265741

Matriz de confusión:
 [[163   0]
 [  5  33]]


In [16]:
X

Unnamed: 0,latencia_mean,latencia_median,latencia_max,latencia_std,latencia_kurtosis,perdida_mean,RSRP_mean,RSRP_min,RSRP_std,SINR_mean,SINR_max,SINR_std
0,15.820,9.580,37.56,12.707769,-0.196208,0.0,-92.8,-102.0,7.981228,13.440,22.49,7.654806
1,25.859,28.185,49.37,15.333421,-1.264487,0.1,-93.7,-103.0,5.457920,14.635,23.84,7.459311
2,27.759,32.490,46.04,15.167649,-1.551258,0.1,-93.3,-109.0,8.124722,16.270,25.62,6.133304
3,52.184,48.920,95.97,31.838426,-1.415968,0.2,-93.2,-105.0,7.146095,11.961,25.27,6.806783
4,39.118,34.255,88.43,29.438563,-1.169311,0.1,-97.2,-111.0,11.292082,17.064,26.26,6.495571
...,...,...,...,...,...,...,...,...,...,...,...,...
496,43.669,36.140,94.77,27.119710,-0.351730,0.1,-94.7,-113.0,11.747813,11.726,25.51,6.916162
497,44.809,44.325,73.59,25.167173,-1.337035,0.2,-102.5,-115.0,8.644202,14.169,22.45,7.389579
498,48.599,50.030,91.51,22.996692,-0.554924,0.0,-97.0,-110.0,10.509255,12.308,29.76,9.322825
499,35.695,31.855,96.57,26.798702,0.696146,0.0,-99.3,-115.0,12.728359,19.348,29.85,8.621580


In [17]:
y

0      0
1      0
2      1
3      0
4      1
      ..
496    0
497    0
498    0
499    1
500    0
Name: CalidadSenal, Length: 501, dtype: int64