In [44]:
import pandas as pd
from scipy.stats import kurtosis

In [46]:
# 1. Cargar dataset
df = pd.read_csv("Dataset_transmision_act.csv")

In [47]:
# 2. Convertir timestamp a datetime y poner como índice
df['timestamp'] = pd.to_datetime(df['timestamp'])
df = df.set_index('timestamp')

In [48]:
# 3. Función para calcular métricas en cada ventana de 10 minutos
def calculate_stats(group):
    stats = {}

    # Tráfico
    for col in ['ifInOctets', 'ifOutOctets']:
        stats[f'{col}_sum'] = group[col].sum()
        stats[f'{col}_mean'] = group[col].mean()
        stats[f'{col}_max'] = group[col].max()
        stats[f'{col}_std'] = group[col].std()

    # Errores
    for col in ['ifInErrors', 'ifOutErrors']:
        stats[f'{col}_sum'] = group[col].sum()
        stats[f'{col}_mean'] = group[col].mean()
        stats[f'{col}_max'] = group[col].max()

    # Latencia
    stats['latencia_mean'] = group['latencia_ms'].mean()
    stats['latencia_median'] = group['latencia_ms'].median()
    stats['latencia_max'] = group['latencia_ms'].max()
    stats['latencia_std'] = group['latencia_ms'].std()
    stats['latencia_kurtosis'] = kurtosis(group['latencia_ms'], fisher=True, nan_policy='omit')

    # Pérdida de paquetes
    stats['perdida_sum'] = group['perdida_paquetes'].sum()
    stats['perdida_mean'] = group['perdida_paquetes'].mean()
    stats['perdida_max'] = group['perdida_paquetes'].max()

    # Señal
    stats['RSRP_mean'] = group['RSRP_dBm'].mean()
    stats['RSRP_min'] = group['RSRP_dBm'].min()
    stats['RSRP_std'] = group['RSRP_dBm'].std()

    stats['SINR_mean'] = group['SINR_dB'].mean()
    stats['SINR_max'] = group['SINR_dB'].max()
    stats['SINR_std'] = group['SINR_dB'].std()

    # Ancho de banda
    for col in ['BW_in_Mbps', 'BW_out_Mbps']:
        stats[f'{col}_mean'] = group[col].mean()
        stats[f'{col}_max'] = group[col].max()
        stats[f'{col}_std'] = group[col].std()

    # Tasas de error
    stats['TasaErrorIn_mean'] = group['TasaErrorIn_%'].mean()
    stats['TasaErrorIn_max'] = group['TasaErrorIn_%'].max()
    stats['TasaErrorOut_mean'] = group['TasaErrorOut_%'].mean()
    stats['TasaErrorOut_max'] = group['TasaErrorOut_%'].max()

    return pd.Series(stats)

In [49]:
# 4. Agrupar en ventanas de 10 minutos
df_t = df.resample('10T').apply(calculate_stats)

  df_t = df.resample('10T').apply(calculate_stats)


In [50]:
# 5. Clasificación de la calidad de la señal según SINR promedio
def clasificar_senal(sinr):
    if sinr < 10:
        return 0  # Mala
    elif 10 <= sinr < 20:
        return 1  # Regular
    else:
        return 2  # Buena

df_t['calidad_senal'] = df_t['SINR_mean'].apply(clasificar_senal)

In [51]:
# 6. Resetear índice para tener timestamp como columna
df_t = df_t.reset_index()

In [53]:
# 7. Verificar tamaño
print("Tamaño final del dataset:", df_t.shape)

Tamaño final del dataset: (501, 40)


In [54]:
# 8. Guardar dataset resultante
df_t.to_csv("Dataset_tratado.csv", index=False)
print("Dataset tratado guardado como 'Dataset_tratado.csv'")
df_t

Dataset tratado guardado como 'Dataset_tratado.csv'


Unnamed: 0,timestamp,ifInOctets_sum,ifInOctets_mean,ifInOctets_max,ifInOctets_std,ifOutOctets_sum,ifOutOctets_mean,ifOutOctets_max,ifOutOctets_std,ifInErrors_sum,...,BW_in_Mbps_max,BW_in_Mbps_std,BW_out_Mbps_mean,BW_out_Mbps_max,BW_out_Mbps_std,TasaErrorIn_mean,TasaErrorIn_max,TasaErrorOut_mean,TasaErrorOut_max,calidad_senal
0,2023-08-23 02:10:00,30754899.0,6150979.8,9931946.0,3.110673e+06,8192922.0,1638584.4,2500175.0,6.356160e+05,23.0,...,0.26,0.081976,0.044,0.07,0.018166,0.00008,0.0002,0.00012,0.0002,1
1,2023-08-23 02:20:00,71351485.0,7135148.5,9817208.0,2.211750e+06,36199529.0,3619952.9,4532418.0,1.057430e+06,42.0,...,0.26,0.058271,0.097,0.12,0.030203,0.00008,0.0002,0.00008,0.0002,1
2,2023-08-23 02:30:00,64247615.0,6424761.5,9529870.0,2.740136e+06,27173516.0,2717351.6,4367239.0,1.152618e+06,80.0,...,0.25,0.073250,0.071,0.12,0.032813,0.00014,0.0004,0.00014,0.0004,1
3,2023-08-23 02:40:00,48401514.0,4840151.4,8404852.0,2.242085e+06,27088230.0,2708823.0,4432623.0,1.051585e+06,127.0,...,0.22,0.060000,0.072,0.12,0.028206,0.00037,0.0013,0.00019,0.0004,1
4,2023-08-23 02:50:00,53376350.0,5337635.0,9242680.0,2.391761e+06,28003248.0,2800324.8,4208201.0,1.304548e+06,112.0,...,0.25,0.064429,0.073,0.11,0.035606,0.00035,0.0018,0.00023,0.0009,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
496,2023-08-26 12:50:00,67187404.0,6718740.4,9973326.0,2.266365e+06,28486632.0,2848663.2,4904309.0,1.412130e+06,78.0,...,0.27,0.061065,0.076,0.13,0.037771,0.00014,0.0003,0.00025,0.0006,1
497,2023-08-26 13:00:00,46702587.0,4670258.7,8568538.0,2.827584e+06,26965041.0,2696504.1,4740843.0,1.509222e+06,119.0,...,0.23,0.075902,0.074,0.13,0.041150,0.00038,0.0008,0.00033,0.0010,1
498,2023-08-26 13:10:00,58606840.0,5860684.0,8587973.0,2.234014e+06,32661895.0,3266189.5,4234341.0,9.683134e+05,110.0,...,0.23,0.060406,0.088,0.11,0.024855,0.00027,0.0011,0.00011,0.0004,1
499,2023-08-26 13:20:00,51516486.0,5151648.6,9774821.0,2.899329e+06,31397520.0,3139752.0,4861909.0,1.543966e+06,114.0,...,0.26,0.078003,0.083,0.13,0.040565,0.00039,0.0013,0.00020,0.0005,1
