In [46]:
#Importacion de Librerias 
import pandas as pd
from scipy.stats import kurtosis

In [47]:
# 1. Cargar dataset cruda
df = pd.read_csv('Dataset_transmision.csv', sep=";")
df

Unnamed: 0,timestamp,ifInOctets,ifOutOctets,ifInErrors,ifOutErrors,latencia_ms,perdida_paquetes,RSRP_dBm,SINR_dB,BW_in_Mbps,BW_out_Mbps,TasaErrorIn_%,TasaErrorOut_%,CalidadSenal
0,23/8/2025 2:15,3120156,827726,1,0,8.18,0,-89,12.23,0.08,0.02,0.0000,0.0000,Regular
1,23/8/2025 2:16,4291575,1494630,1,1,9.58,0,-83,22.49,0.11,0.04,0.0000,0.0001,Buena
2,23/8/2025 2:17,9931946,1994334,7,3,37.56,0,-102,20.12,0.26,0.05,0.0001,0.0002,Regular
3,23/8/2025 2:18,9066853,2500175,6,3,7.14,0,-100,5.48,0.24,0.07,0.0001,0.0001,Mala
4,23/8/2025 2:19,4344369,1376057,8,3,16.64,0,-90,6.88,0.12,0.04,0.0002,0.0002,Mala
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,23/8/2025 18:50,8507324,1465188,17,6,64.99,0,-95,9.74,0.23,0.04,0.0002,0.0004,Mala
996,23/8/2025 18:51,7202258,2661088,13,5,79.52,0,-82,29.44,0.19,0.07,0.0002,0.0002,Buena
997,23/8/2025 18:52,5339235,2820484,19,9,61.86,0,-105,4.04,0.14,0.08,0.0004,0.0003,Mala
998,23/8/2025 18:53,4526737,4605223,14,6,44.34,0,-94,9.44,0.12,0.12,0.0003,0.0001,Mala


In [48]:
# 2. Convertir timestamp a datetime
df['timestamp'] = pd.to_datetime(df['timestamp'], format="%d/%m/%Y %H:%M")

In [49]:
# 3. Función optimizada solo para métricas relevantes según el tipo de dato
def calculate_stats(group):
    stats = {}

    # Tráfico / numero de bits recibidos y envidos
    for col in ['ifInOctets', 'ifOutOctets']:
        stats[f'{col}_sum'] = group[col].sum()
        stats[f'{col}_mean'] = group[col].mean()
        stats[f'{col}_max'] = group[col].max()
        stats[f'{col}_std'] = group[col].std()

    # Errores /Error en el paquete recibido y en paquetes enviados
    for col in ['ifInErrors', 'ifOutErrors']:
        stats[f'{col}_sum'] = group[col].sum()
        stats[f'{col}_mean'] = group[col].mean()
        stats[f'{col}_max'] = group[col].max()

    # Latencia / Retardo de la transmision
    stats['latencia_mean'] = group['latencia_ms'].mean()
    stats['latencia_median'] = group['latencia_ms'].median()
    stats['latencia_max'] = group['latencia_ms'].max()
    stats['latencia_std'] = group['latencia_ms'].std()
    stats['latencia_kurtosis'] = kurtosis(group['latencia_ms'], fisher=True, nan_policy='omit')

    # Pérdida de paquetes en la transmision 
    stats['perdida_sum'] = group['perdida_paquetes'].sum()
    stats['perdida_mean'] = group['perdida_paquetes'].mean()
    stats['perdida_max'] = group['perdida_paquetes'].max()

    # Señal / mide la potencia de la señal recibida 
    stats['RSRP_mean'] = group['RSRP_dBm'].mean()
    stats['RSRP_min'] = group['RSRP_dBm'].min()
    stats['RSRP_std'] = group['RSRP_dBm'].std()

    stats['SINR_mean'] = group['SINR_dB'].mean()
    stats['SINR_max'] = group['SINR_dB'].max()
    stats['SINR_std'] = group['SINR_dB'].std()

    # Ancho de banda de entrada y salida
    for col in ['BW_in_Mbps', 'BW_out_Mbps']:
        stats[f'{col}_mean'] = group[col].mean()
        stats[f'{col}_max'] = group[col].max()
        stats[f'{col}_std'] = group[col].std()

    # Tasas de error Porcentaje de error en le trafico recibido y enviado
    stats['TasaErrorIn_mean'] = group['TasaErrorIn_%'].mean()
    stats['TasaErrorIn_max'] = group['TasaErrorIn_%'].max()
    stats['TasaErrorOut_mean'] = group['TasaErrorOut_%'].mean()
    stats['TasaErrorOut_max'] = group['TasaErrorOut_%'].max()

    #  Etiqueta: Calidad de señal  Es ua calidad cualitativa de la senal
    if not group['CalidadSenal'].empty:
        stats['CalidadSenal_dominante'] = group['CalidadSenal'].mode()[0]  # categoría más frecuente

    return pd.Series(stats)

In [50]:
# 4. Agrupar en intervalos de 5 minutos
df_grouped = (
    df.set_index('timestamp')                # convertir a índice de tiempo
      .resample('5min')                      # resamplear a intervalos de 5 min
      .apply(calculate_stats)                # aplicar función
      .reset_index()                         # devolver timestamp como columna
)


In [51]:
# 5. Guardar en CSV
df_grouped.to_csv("Dataset_trabajo.csv", index=False)
print("✅ Dataset enriquecido guardado como Dataset_trabajo.csv")
df_grouped

✅ Dataset enriquecido guardado como Dataset_trabajo.csv


Unnamed: 0,timestamp,ifInOctets_sum,ifInOctets_mean,ifInOctets_max,ifInOctets_std,ifOutOctets_sum,ifOutOctets_mean,ifOutOctets_max,ifOutOctets_std,ifInErrors_sum,...,BW_in_Mbps_max,BW_in_Mbps_std,BW_out_Mbps_mean,BW_out_Mbps_max,BW_out_Mbps_std,TasaErrorIn_mean,TasaErrorIn_max,TasaErrorOut_mean,TasaErrorOut_max,CalidadSenal_dominante
0,2025-08-23 02:15:00,30754899,6150979.8,9931946,3.110673e+06,8192922,1638584.4,2500175,6.356160e+05,23,...,0.26,0.081976,0.044,0.07,0.018166,0.00008,0.0002,0.00012,0.0002,Mala
1,2025-08-23 02:20:00,36929421,7385884.2,9577392,2.407450e+06,17652004,3530400.8,4393211,1.319722e+06,20,...,0.26,0.064187,0.094,0.12,0.037815,0.00006,0.0001,0.00010,0.0002,Regular
2,2025-08-23 02:25:00,34422064,6884412.8,9817208,2.248032e+06,18547525,3709505.0,4532418,8.684125e+05,22,...,0.26,0.058566,0.100,0.12,0.024495,0.00010,0.0002,0.00006,0.0002,Regular
3,2025-08-23 02:30:00,32530320,6506064.0,9529870,3.559427e+06,11747308,2349461.6,3539367,1.123510e+06,23,...,0.25,0.095760,0.060,0.09,0.029155,0.00008,0.0002,0.00012,0.0003,Mala
4,2025-08-23 02:35:00,31717295,6343459.0,8204212,2.051274e+06,16245088,3249017.6,4955214,1.737325e+06,47,...,0.22,0.053852,0.088,0.13,0.044385,0.00016,0.0004,0.00012,0.0002,Regular
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,2025-08-23 18:30:00,21846190,4369238.0,8021855,2.363672e+06,13109979,2621995.8,4596392,1.401174e+06,34,...,0.21,0.061887,0.072,0.12,0.034928,0.00020,0.0004,0.00016,0.0005,Regular
196,2025-08-23 18:35:00,31797222,6359444.4,9997354,3.849000e+06,14265669,2853133.8,4914473,1.668292e+06,54,...,0.27,0.102956,0.076,0.13,0.045056,0.00026,0.0005,0.00028,0.0011,Mala
197,2025-08-23 18:40:00,23883057,4776611.4,8750575,3.085618e+06,17224488,3444897.6,4423227,8.920232e+05,55,...,0.23,0.080808,0.094,0.12,0.024083,0.00046,0.0011,0.00012,0.0003,Regular
198,2025-08-23 18:45:00,26336898,5267379.6,8229039,2.517048e+06,14737472,2947494.4,4064375,1.029465e+06,75,...,0.22,0.066332,0.080,0.11,0.029155,0.00034,0.0006,0.00018,0.0006,Regular
