In [1]:
from cesnet_datazoo.datasets import CESNET_QUIC22, CESNET_TLS22
from cesnet_datazoo.config import DatasetConfig
from networkx.drawing import to_latex

dataset = CESNET_QUIC22(data_root="data/CESNET_QUIC22/", size="XS", silent=True)

dataset_config = DatasetConfig(
    dataset=dataset,
    train_period_name="W-2022-44",
    train_size=100_000,
    use_packet_histograms=True,
    return_other_fields=True,
)
dataset.set_dataset_config_and_initialize(dataset_config)

data_df = dataset.get_train_df()

data_df["APP_NAME"] = None

# get the app number from 'app' column and translate it using the 'app_labels' dictionary
data_df["APP_NAME"] = data_df["APP"].apply(lambda x: dataset._tables_app_enum.get(x, "Unknown"))

categories_enum = {category: i for i, category in dataset._tables_cat_enum.items()}
app_to_categories = dataset.class_info.categories_mapping
app_enum = {i: app for i, app in dataset._tables_app_enum.items()}
app_to_categories.update({
    "bongacams": "Streaming media",
    "gothbb": "E-commerce",
    "vkontakte": "Social",
    "poe-ninja": "Games",
    "unpkg": "Streaming media",
    "easylist": "Other services and APIs",
    "default-background": "default",
    "uber": "Other services and APIs",
})

data_df["CATEGORY"] = data_df["APP"].apply(lambda x: app_to_categories[app_enum[x]])

columns = ["PACKETS", "PACKETS_REV", "BYTES", "BYTES_REV", "DURATION", "PPI_LEN", "PPI_ROUNDTRIPS", "PPI_DURATION"]

# Whole dataset

In [5]:
data_df[columns].describe(percentiles=[0.05, 0.5, 0.75, 0.95]).round(2)

Unnamed: 0,PACKETS,PACKETS_REV,BYTES,BYTES_REV,DURATION,PPI_LEN,PPI_ROUNDTRIPS,PPI_DURATION
count,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0
mean,44.65,150.65,16399.26,169256.5,9.47,23.14,4.55,3.25
std,626.01,2088.69,688135.5,2636015.0,29.61,7.27,1.88,12.31
min,1.0,1.0,1228.0,59.0,0.0,2.0,0.0,0.0
5%,5.0,5.0,1614.0,2345.0,0.04,10.0,2.0,0.03
50%,12.0,13.0,4178.5,5257.0,0.22,25.0,4.0,0.16
75%,22.0,24.0,7168.0,10512.0,4.98,30.0,6.0,0.58
95%,113.05,257.0,35247.3,249776.0,47.37,30.0,8.0,19.47
max,162848.0,489305.0,206054900.0,624267000.0,356.12,30.0,15.0,291.12


# Easybrain

In [6]:
data_df[columns][data_df["APP_NAME"] == "easybrain"].describe(percentiles=[0.05, 0.5, 0.75, 0.95]).round(2)

Unnamed: 0,PACKETS,PACKETS_REV,BYTES,BYTES_REV,DURATION,PPI_LEN,PPI_ROUNDTRIPS,PPI_DURATION
count,4924.0,4924.0,4924.0,4924.0,4924.0,4924.0,4924.0,4924.0
mean,19.5,43.18,3811.88,42155.79,4.38,20.0,3.72,2.55
std,145.71,498.89,10475.12,610825.56,12.27,6.08,1.51,7.72
min,1.0,2.0,1228.0,1910.0,0.02,4.0,1.0,0.02
5%,4.0,7.0,1886.15,2963.0,0.06,11.0,2.0,0.05
50%,8.0,9.0,2532.0,3206.0,0.09,17.0,3.0,0.09
75%,12.0,13.0,3389.25,4042.0,0.31,25.0,4.0,0.14
95%,58.0,101.7,7232.5,101176.55,27.99,30.0,7.0,20.04
max,9447.0,22663.0,617954.0,27725098.0,150.3,30.0,11.0,90.15


# Gamedock

In [7]:
data_df[columns][data_df["APP_NAME"] == "gamedock"].describe(percentiles=[0.05, 0.5, 0.75, 0.95]).round(2)

Unnamed: 0,PACKETS,PACKETS_REV,BYTES,BYTES_REV,DURATION,PPI_LEN,PPI_ROUNDTRIPS,PPI_DURATION
count,283.0,283.0,283.0,283.0,283.0,283.0,283.0,283.0
mean,1525.14,276.75,1913596.0,37168.46,7.88,27.48,5.5,1.62
std,10089.01,1207.42,12782190.0,111053.34,15.95,4.9,1.44,4.67
min,1.0,3.0,1228.0,3369.0,0.02,4.0,1.0,0.02
5%,6.1,10.0,3844.4,3862.4,0.2,19.0,4.0,0.11
50%,19.0,19.0,6832.0,9935.0,1.42,30.0,6.0,0.49
75%,148.0,96.5,81683.0,22042.5,6.96,30.0,7.0,0.78
95%,4730.6,1087.8,5914807.0,133064.5,33.48,30.0,8.0,6.04
max,162848.0,16729.0,206054900.0,1001368.0,137.42,30.0,10.0,35.68


# Games category

In [8]:
data_df[columns][data_df["CATEGORY"] == "Games"].describe(percentiles=[0.05, 0.5, 0.75, 0.95]).round(2)

Unnamed: 0,PACKETS,PACKETS_REV,BYTES,BYTES_REV,DURATION,PPI_LEN,PPI_ROUNDTRIPS,PPI_DURATION
count,10261.0,10261.0,10261.0,10261.0,10261.0,10261.0,10261.0,10261.0
mean,63.51,59.06,56833.23,55672.07,3.98,19.66,3.66,2.01
std,1696.87,527.43,2142102.0,611714.81,11.06,7.62,1.62,6.49
min,1.0,2.0,1228.0,1588.0,0.02,4.0,0.0,0.0
5%,4.0,4.0,1610.0,2353.0,0.04,9.0,2.0,0.04
50%,8.0,9.0,2591.0,3426.0,0.1,17.0,3.0,0.09
75%,15.0,17.0,3899.0,8058.0,0.7,30.0,4.0,0.22
95%,77.0,192.0,9321.0,203995.0,25.71,30.0,7.0,15.07
max,162848.0,22663.0,206054900.0,27725098.0,175.16,30.0,11.0,90.15
