In [1]:
# TRATAMIENTO DE DATOS PARA LA DATASET 

#IMPORTACION DE LIBRERIAS
import pandas as pd
from scipy.stats import kurtosis

In [2]:
# 1. Cargar dataset original
df = pd.read_csv("Dataset_transmision_act.csv")
df["timestamp"] = pd.to_datetime(df["timestamp"])
df = df.set_index("timestamp")

In [3]:
# 2. Función para calcular métricas en ventanas de 10 minutos
def calculate_stats(group):
    stats = {}
    stats["latencia_mean"] = group["latencia_ms"].mean()
    stats["latencia_median"] = group["latencia_ms"].median()
    stats["latencia_max"] = group["latencia_ms"].max()
    stats["latencia_std"] = group["latencia_ms"].std()
    stats["latencia_kurtosis"] = kurtosis(group["latencia_ms"], fisher=True, nan_policy="omit")
    
    stats["perdida_mean"] = group["perdida_paquetes"].mean()
    
    stats["RSRP_mean"] = group["RSRP_dBm"].mean()
    stats["RSRP_min"] = group["RSRP_dBm"].min()
    stats["RSRP_std"] = group["RSRP_dBm"].std()
    
    stats["SINR_mean"] = group["SINR_dB"].mean()
    stats["SINR_max"] = group["SINR_dB"].max()
    stats["SINR_std"] = group["SINR_dB"].std()
    
    return pd.Series(stats)


In [4]:
# 3. Crear dataset agregado cada 10 min
df_t = df.resample("10T").apply(calculate_stats).reset_index()

  df_t = df.resample("10T").apply(calculate_stats).reset_index()


In [5]:
# 4. Clasificación binaria de la señal  
def clasificar_senal(sinr, latencia):
    """
    Clasificación binaria:
    0 = Mala
    1 = Buena
    (Las 'Regulares' se redistribuyen en Mala o Buena)
    """
    # Buena señal
    if (sinr >= 15 and latencia < 50):
        return 1
    # Todo lo demás se considera mala
    else:
        return 0

df_t["CalidadSenal"] = df_t.apply(
    lambda row: clasificar_senal(row["SINR_mean"], row["latencia_mean"]), axis=1
)


In [6]:
# 5. Ver distribución de clases
print("Distribución de la calidad de señal (0=Mala, 1=Buena):")
print(df_t["CalidadSenal"].value_counts())

Distribución de la calidad de señal (0=Mala, 1=Buena):
CalidadSenal
0    407
1     94
Name: count, dtype: int64


In [7]:
# 6. Guardar dataset limpio
df_t.to_csv("Dataset_tratado.csv", index=False)
print("Dataset tratado guardado")
df_t


Dataset tratado guardado


Unnamed: 0,timestamp,latencia_mean,latencia_median,latencia_max,latencia_std,latencia_kurtosis,perdida_mean,RSRP_mean,RSRP_min,RSRP_std,SINR_mean,SINR_max,SINR_std,CalidadSenal
0,2023-08-23 02:10:00,15.820,9.580,37.56,12.707769,-0.196208,0.0,-92.8,-102.0,7.981228,13.440,22.49,7.654806,0
1,2023-08-23 02:20:00,25.859,28.185,49.37,15.333421,-1.264487,0.1,-93.7,-103.0,5.457920,14.635,23.84,7.459311,0
2,2023-08-23 02:30:00,27.759,32.490,46.04,15.167649,-1.551258,0.1,-93.3,-109.0,8.124722,16.270,25.62,6.133304,1
3,2023-08-23 02:40:00,52.184,48.920,95.97,31.838426,-1.415968,0.2,-93.2,-105.0,7.146095,11.961,25.27,6.806783,0
4,2023-08-23 02:50:00,39.118,34.255,88.43,29.438563,-1.169311,0.1,-97.2,-111.0,11.292082,17.064,26.26,6.495571,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
496,2023-08-26 12:50:00,43.669,36.140,94.77,27.119710,-0.351730,0.1,-94.7,-113.0,11.747813,11.726,25.51,6.916162,0
497,2023-08-26 13:00:00,44.809,44.325,73.59,25.167173,-1.337035,0.2,-102.5,-115.0,8.644202,14.169,22.45,7.389579,0
498,2023-08-26 13:10:00,48.599,50.030,91.51,22.996692,-0.554924,0.0,-97.0,-110.0,10.509255,12.308,29.76,9.322825,0
499,2023-08-26 13:20:00,35.695,31.855,96.57,26.798702,0.696146,0.0,-99.3,-115.0,12.728359,19.348,29.85,8.621580,1
