In [1]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
import numpy as np
import pandas as pd

In [None]:
csv = "Dataset_transmision.csv"

datos = pd.read_csv(csv, parse_dates=["timestamp"])
datos = datos.sort_values(["timestamp"]).reset_index(drop=True)
mapa_trafico = {
    "Mala": 0,
    "Buena": 1,
    "Regular": 2
}

# Reemplazar los valores de la columna CaidadSenal por su código numérico
if "CalidadSenal" in datos.columns:
    datos["CalidadSenal"] = datos["CalidadSenal"].map(mapa_trafico).fillna(6).astype(int)


In [3]:
X = datos.drop(columns=["timestamp", "CalidadSenal"])

y = datos["CalidadSenal"]

X.shape, y.shape

((1000, 12), (1000,))

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.4,stratify=y)

In [5]:
indices = np.arange(len(y_train))
ind_train, ind_val = train_test_split(indices,test_size=0.33,stratify=y_train)
cv = zip([ind_train],[ind_val])
params = {
    'criterion': ('gini', 'entropy', 'log_loss'),
    'max_features': ('sqrt', 'log2', None),
    'n_estimators': np.arange(10,101,10)
}
rf = RandomForestClassifier()
clf = GridSearchCV(rf, params, cv=cv,n_jobs=-1)
clf.fit(X_train, y_train)

0,1,2
,estimator,RandomForestClassifier()
,param_grid,"{'criterion': ('gini', ...), 'max_features': ('sqrt', ...), 'n_estimators': array([ 10, ...80, 90, 100])}"
,scoring,
,n_jobs,-1
,refit,True
,cv,<zip object a...0026CD0CB1C00>
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,n_estimators,np.int64(50)
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [6]:
print(f'Los mejores hyperparámetros son: {clf.best_params_}, con exactitud de {clf.best_score_}')

Los mejores hyperparámetros son: {'criterion': 'gini', 'max_features': 'sqrt', 'n_estimators': np.int64(50)}, con exactitud de 1.0


In [7]:
rf_final = RandomForestClassifier(criterion=clf.best_params_['criterion'],
                                  max_features=clf.best_params_['max_features'],
                                  n_estimators=clf.best_params_['n_estimators'])
rf_final.fit(X_train, y_train)

0,1,2
,n_estimators,np.int64(50)
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [8]:

# Predicciones
y_pred = rf_final.predict(X_test)
y_proba = rf_final.predict_proba(X_test)  # matriz [n_muestras, n_clases]

# Métricas
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Recall (macro):", recall_score(y_test, y_pred, average="macro"))
print("Recall (weighted):", recall_score(y_test, y_pred, average="weighted"))
print("F1 (macro):", f1_score(y_test, y_pred, average="macro"))
print("F1 (weighted):", f1_score(y_test, y_pred, average="weighted"))
print("ROC-AUC (ovr):", roc_auc_score(y_test, y_proba, multi_class="ovr", average="macro"))
print("Matriz de confusión:\n", confusion_matrix(y_test, y_pred))

Accuracy: 0.9975
Recall (macro): 0.9938271604938271
Recall (weighted): 0.9975
F1 (macro): 0.9961254337598194
F1 (weighted): 0.9974911651374194
ROC-AUC (ovr): 1.0
Matriz de confusión:
 [[127   0   0]
 [  0  53   1]
 [  0   0 219]]


In [9]:
X_test

Unnamed: 0,ifInOctets,ifOutOctets,ifInErrors,ifOutErrors,latencia_ms,perdida_paquetes,RSRP_dBm,SINR_dB,BW_in_Mbps,BW_out_Mbps,TasaErrorIn_%,TasaErrorOut_%
340,1118834,2402897,9,1,37.78,0,-95,27.23,0.03,0.06,0.0008,0.0000
890,9888923,4542879,15,7,44.17,0,-108,24.17,0.26,0.12,0.0002,0.0002
985,1611213,2819124,18,0,25.55,0,-101,27.15,0.04,0.08,0.0011,0.0000
124,7510425,928988,15,3,51.41,0,-111,18.03,0.20,0.02,0.0002,0.0003
590,4314692,1797076,3,7,14.52,0,-89,24.20,0.12,0.05,0.0001,0.0004
...,...,...,...,...,...,...,...,...,...,...,...,...
209,2659066,897710,16,8,44.77,0,-109,4.22,0.07,0.02,0.0006,0.0009
132,1874371,2992118,7,4,38.72,0,-112,16.86,0.05,0.08,0.0004,0.0001
23,3234489,4316446,12,6,22.83,0,-93,9.47,0.09,0.12,0.0004,0.0001
193,6500306,2872280,11,2,51.03,1,-81,28.43,0.17,0.08,0.0002,0.0001


In [10]:
#Segunda forma con Cross validator
rfCV = RandomForestClassifier()
clfCV = GridSearchCV(rfCV, params, n_jobs=-1)
clfCV.fit(X_train, y_train)

0,1,2
,estimator,RandomForestClassifier()
,param_grid,"{'criterion': ('gini', ...), 'max_features': ('sqrt', ...), 'n_estimators': array([ 10, ...80, 90, 100])}"
,scoring,
,n_jobs,-1
,refit,True
,cv,
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,n_estimators,np.int64(60)
,criterion,'entropy'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'log2'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [11]:
print(f'Los mejores hyperparámetros son: {clfCV.best_params_}, con exactitud de {clfCV.best_score_}')

Los mejores hyperparámetros son: {'criterion': 'entropy', 'max_features': 'log2', 'n_estimators': np.int64(60)}, con exactitud de 1.0


In [12]:
rf_final_cv = RandomForestClassifier(criterion=clfCV.best_params_['criterion'],
                                  max_features=clfCV.best_params_['max_features'],
                                  n_estimators=clfCV.best_params_['n_estimators'])
rf_final_cv.fit(X_train, y_train)

0,1,2
,n_estimators,np.int64(60)
,criterion,'entropy'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'log2'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [13]:

# Predicciones
y_pred = rf_final.predict(X_test)
y_proba = rf_final.predict_proba(X_test)  # matriz [n_muestras, n_clases]

# Métricas
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Recall (macro):", recall_score(y_test, y_pred, average="macro"))
print("Recall (weighted):", recall_score(y_test, y_pred, average="weighted"))
print("F1 (macro):", f1_score(y_test, y_pred, average="macro"))
print("F1 (weighted):", f1_score(y_test, y_pred, average="weighted"))
print("ROC-AUC (ovr):", roc_auc_score(y_test, y_proba, multi_class="ovr", average="macro"))
print("Matriz de confusión:\n", confusion_matrix(y_test, y_pred))

Accuracy: 0.9975
Recall (macro): 0.9938271604938271
Recall (weighted): 0.9975
F1 (macro): 0.9961254337598194
F1 (weighted): 0.9974911651374194
ROC-AUC (ovr): 1.0
Matriz de confusión:
 [[127   0   0]
 [  0  53   1]
 [  0   0 219]]


In [14]:
X

Unnamed: 0,ifInOctets,ifOutOctets,ifInErrors,ifOutErrors,latencia_ms,perdida_paquetes,RSRP_dBm,SINR_dB,BW_in_Mbps,BW_out_Mbps,TasaErrorIn_%,TasaErrorOut_%
0,3120156,827726,1,0,8.18,0,-89,12.23,0.08,0.02,0.0000,0.0000
1,4291575,1494630,1,1,9.58,0,-83,22.49,0.11,0.04,0.0000,0.0001
2,9931946,1994334,7,3,37.56,0,-102,20.12,0.26,0.05,0.0001,0.0002
3,9066853,2500175,6,3,7.14,0,-100,5.48,0.24,0.07,0.0001,0.0001
4,4344369,1376057,8,3,16.64,0,-90,6.88,0.12,0.04,0.0002,0.0002
...,...,...,...,...,...,...,...,...,...,...,...,...
995,8507324,1465188,17,6,64.99,0,-95,9.74,0.23,0.04,0.0002,0.0004
996,7202258,2661088,13,5,79.52,0,-82,29.44,0.19,0.07,0.0002,0.0002
997,5339235,2820484,19,9,61.86,0,-105,4.04,0.14,0.08,0.0004,0.0003
998,4526737,4605223,14,6,44.34,0,-94,9.44,0.12,0.12,0.0003,0.0001


In [15]:
y

0      2
1      1
2      2
3      0
4      0
      ..
995    0
996    1
997    0
998    0
999    2
Name: CalidadSenal, Length: 1000, dtype: int64