# Corpora Generation

This notebook recreates all the necessary corpora for the experiments in the paper.

The flow reader however needs to be implemented to suit your flow record dataset.

In [None]:
import collections as cll
import dataclasses
import datetime as dt
from ipaddress import (
    IPv4Address,
    IPv4Network,
    IPv6Address,
    IPv6Network,
    ip_address,
    ip_network,
)
import multiprocessing as mp
import os
import pickle
import gc
import gzip
import typing as T

In [None]:
# Used for locking the `log` function.
LOG_LOCK = mp.Lock()

# This is where all corpora is stored. Other notebooks will expect to find
# corpora here.
ARTEFACTS_PATH = "./Artefacts/"

# The month used for comparison between the various approaches. This is the
# exact month we used in the paper.
COMPARISON_TRAINING_DATES = (
    (
        dt.datetime(2019, 11, 1, tzinfo=dt.timezone.utc),
        dt.datetime(2019, 12, 1, tzinfo=dt.timezone.utc),
        "month",
    ),
)

# Notebook options.
FORCE_OVERWRITE = True
PACKET_SIZE_INTERVAL = 100  # The default we used in the paper.


def log(message: str):
    with LOG_LOCK:
        print(
            f"[{dt.datetime.now().strftime('%d-%m %I:%M:%S %p')}]:",
            message,
            flush=True,
        )


# Your flow reader implementation must return this for each flow record
# observed. Not all fields are necessary, but the more you provide, the more
# you can experiment with flow corpus generation.
@dataclasses.dataclass(frozen=True, repr=True, order=True)
class Flow:
    timestamp: int  # Milliseconds.
    protocol: int
    source_address: T.Union[IPv4Address, IPv6Address]
    source_port: int
    destination_address: T.Union[IPv4Address, IPv6Address]
    destination_port: int
    bytes: int
    packets: int
    rule: T.Optional[str] = None  # This is specific to the IXP Scrubber rules.
    rule_confidence: T.Optional[float] = None
    source_ASN: T.Optional[int] = None
    source_prefix: T.Optional[T.Union[IPv4Network, IPv6Network]] = None
    source_country: T.Optional[str] = None
    source_region: T.Optional[str] = None
    destination_ASN: T.Optional[int] = None
    destination_prefix: T.Optional[T.Union[IPv4Network, IPv6Network]] = None
    destination_country: T.Optional[str] = None
    destination_region: T.Optional[str] = None


def datetime_intervals(
    start: dt.datetime, end: dt.datetime, interval: dt.timedelta
):
    interval_start = start
    while interval_start < end:
        interval_end = min(interval_start + interval, end)
        yield (interval_start, interval_end)
        interval_start = interval_end


def load_pickle(path: str, compressed=True) -> T.Any:
    try:
        gc.disable()
        if compressed:
            assert path.endswith(".gz")
            with gzip.open(path, "rb") as file:
                return pickle.load(file)
        else:
            with open(path, "rb") as file:
                return pickle.load(file)
    except Exception as exception:
        raise exception from None
    finally:
        gc.enable()


def save_pickle(
    object: T.Any, path: str, overwrite=FORCE_OVERWRITE, compress=True
):
    if overwrite or not os.path.isfile(path):
        if compress:
            assert path.endswith(".gz")
            with gzip.open(path, "wb", compresslevel=6) as file:
                pickle.dump(object, file, protocol=pickle.HIGHEST_PROTOCOL)
        else:
            with open(path, "wb") as file:
                pickle.dump(object, file, protocol=pickle.HIGHEST_PROTOCOL)
        return True
    return False

# Flow Reader

In [None]:
# NOTE: This is for you to implement.
def flow_reader(start: dt.datetime, end: dt.datetime) -> T.Iterable[Flow]:
    raise NotImplementedError("You must implement this for your dataset.")

In [None]:
class Service:
    def __init__(
        self,
        name: str,
        packet_size_interval: int = 0,  # Negative values disable intervals.
        packet_size_bins: T.Iterable[
            T.Tuple[T.Optional[int], T.Optional[int]]
        ] = (),
        source_ports: T.Iterable[T.Union[int, T.Tuple[int, int]]] = (),
        destination_ports: T.Iterable[T.Union[int, T.Tuple[int, int]]] = (),
        generic=False,
    ):
        """Ranges (both ports and bins) are inclusive."""

        self.name = name
        self.packet_size_interval = packet_size_interval
        self.packet_size_bins = tuple(packet_size_bins)
        self.source_ports = tuple(source_ports)
        self.destination_ports = tuple(destination_ports)
        self.generic = generic
        if not self.source_ports:
            self.source_ports = ((0, 65535),)
        if not self.destination_ports:
            self.destination_ports = ((0, 65535),)

    def __str__(self):
        return self.name

    def __hash__(self) -> int:
        return hash(str(self))

    @staticmethod
    def _port_check(port, defined_ports):
        for defined_port in defined_ports:
            if isinstance(defined_port, int):
                if port == defined_port:
                    return True
            elif defined_port[0] <= port <= defined_port[1]:
                return True
        return False

    def __contains__(self, flow: Flow) -> bool:
        return self._port_check(
            flow.source_port, self.source_ports
        ) and self._port_check(flow.destination_port, self.destination_ports)

    def __getitem__(self, value: T.Union[int, Flow]) -> T.Optional[str]:
        if isinstance(value, Flow):
            value = round(value.bytes / value.packets)
        elif not isinstance(value, int):
            raise ValueError(
                "Invalid interval calculation call. Must be a value or a flow."
            )

        for bin_range in self.packet_size_bins:
            if bin_range[0] is None:
                if bin_range[1] is not None and value <= bin_range[1]:
                    return f"<= {bin_range[1]}"
            elif bin_range[1] is None:
                if bin_range[0] is not None and bin_range[0] <= value:
                    return f">= {bin_range[0]}"
            elif bin_range[0] <= value <= bin_range[1]:
                return f"{bin_range[0]}<->{bin_range[1]}"

        if self.packet_size_interval < 0:
            return None
        if self.packet_size_interval == 0:
            return "-"

        # Fall back to the interval binning approach if available.
        interval = value // self.packet_size_interval
        lower_interval = interval * self.packet_size_interval
        upper_interval = (interval + 1) * self.packet_size_interval - 1
        return f"{lower_interval}<->{upper_interval}"


# The rules in this are based off our paper. You can add more if you want.
# Correctness is extremely unlikely and not necessary.
def UDP_DDoS_services(
    generic_interval: int = PACKET_SIZE_INTERVAL,
    domain_knowledge: bool = True,
) -> T.Tuple[Service, ...]:
    domain_knowledge_services = (
        Service(
            "DNS DrDoS (UDP)",
            source_ports=(53, 853, 5353),
            packet_size_bins=((551, None),),
            packet_size_interval=-1,
        ),
        Service(
            "DNS (UDP)",
            source_ports=(53, 853, 5353),
            packet_size_interval=100,
        ),
        Service(
            "Memcached DrDoS (UDP)",
            source_ports=(11211,),
            packet_size_bins=((256, None),),
        ),
        Service(
            "SNMP DrDoS (UDP)",
            source_ports=(161,),
            packet_size_bins=((151, None),),
            packet_size_interval=-1,
        ),
        Service(
            "NTP DrDoS (UDP)",
            source_ports=(123,),
            packet_size_bins=((100, None),),
            packet_size_interval=-1,
        ),
        Service(
            "NTP (UDP)",
            source_ports=(123,),
        ),
        Service(
            "CLDAP DrDoS (UDP)",
            source_ports=(389,),
            packet_size_bins=((None, 150), (151, None)),
        ),
        Service(
            "MSSQL (UDP)",
            source_ports=(1434,),
            packet_size_interval=300,
        ),
        Service(
            "SSDP (UDP)",
            source_ports=(1900,),
            packet_size_bins=((300, None),),
            packet_size_interval=100,
        ),
        Service(
            "NetBIOS (UDP)",
            source_ports=(137, 138, 139),
            packet_size_interval=100,
        ),
        Service(
            "CharGEN (UDP)",
            source_ports=(19,),
        ),
        Service(
            "TFTP (UDP)",
            source_ports=(69,),
        ),
        Service(
            "Kerberos DrDoS (UDP)",
            destination_ports=(88,),
            packet_size_interval=32,
        ),
    )

    generic_services = (
        Service(
            "Generic (System Ports -> System Ports) (UDP)",
            source_ports=((0, 1023),),
            destination_ports=((0, 1023),),
            packet_size_interval=generic_interval,
            generic=True,
        ),
        Service(
            "Generic (System Ports -> Registered Ports) (UDP)",
            source_ports=((0, 1023),),
            destination_ports=((1024, 49151),),
            packet_size_interval=generic_interval,
            generic=True,
        ),
        Service(
            "Generic (System Ports -> Ephemeral Ports) (UDP)",
            source_ports=((0, 1023),),
            destination_ports=((49152, 65535),),
            packet_size_interval=generic_interval,
            generic=True,
        ),
        Service(
            "Generic (Registered Ports -> System Ports) (UDP)",
            source_ports=((1024, 49151),),
            destination_ports=((0, 1023),),
            packet_size_interval=generic_interval,
            generic=True,
        ),
        Service(
            "Generic (Registered Ports -> Registered Ports) (UDP)",
            source_ports=((1024, 49151),),
            destination_ports=((1024, 49151),),
            packet_size_interval=generic_interval,
            generic=True,
        ),
        Service(
            "Generic (Registered Ports -> Ephemeral Ports) (UDP)",
            source_ports=((1024, 49151),),
            destination_ports=((49152, 65535),),
            packet_size_interval=generic_interval,
            generic=True,
        ),
        Service(
            "Generic (Ephemeral Ports -> System Ports) (UDP)",
            source_ports=((49152, 65535),),
            destination_ports=((0, 1023),),
            packet_size_interval=generic_interval,
            generic=True,
        ),
        Service(
            "Generic (Ephemeral Ports -> Registered Ports) (UDP)",
            source_ports=((49152, 65535),),
            destination_ports=((1024, 49151),),
            packet_size_interval=generic_interval,
            generic=True,
        ),
        Service(
            "Generic (Ephemeral Ports -> Ephemeral Ports) (UDP)",
            source_ports=((49152, 65535),),
            destination_ports=((49152, 65535),),
            packet_size_interval=generic_interval,
            generic=True,
        ),
    )

    return (
        domain_knowledge_services + generic_services
        if domain_knowledge
        else generic_services
    )

# Statistics Generation
Only used for some plots. Likely not interesting for you.

In [None]:
Collection = T.Tuple[
    T.Tuple[dt.datetime, dt.datetime],
    int,  # Bytes.
    int,  # Packets.
    T.Dict[int, int],  # Flows using a source port.
    T.Dict[int, int],  # Flows using a destination port.
    T.Dict[T.Tuple[int, int], int],  # Flows using a port pair.
]


def statistics_collection(
    datetime_interval: T.Tuple[dt.datetime, dt.datetime]
) -> Collection:
    byte_count = 0
    packet_count = 0
    source_ports: T.DefaultDict[int, int] = cll.defaultdict(int)
    destination_ports: T.DefaultDict[int, int] = cll.defaultdict(int)
    port_pairs: T.DefaultDict[T.Tuple[int, int], int] = cll.defaultdict(int)
    for flow in flow_reader(*datetime_interval):
        byte_count += flow.bytes
        packet_count += flow.packets
        source_ports[flow.source_port] += 1
        destination_ports[flow.destination_port] += 1
        port_pairs[(flow.source_port, flow.destination_port)] += 1
    return (
        datetime_interval,
        byte_count,
        packet_count,
        dict(source_ports),
        dict(destination_ports),
        dict(port_pairs),
    )


def flow_dataset_statistics(
    start: dt.datetime,
    end: dt.datetime,
    interval: dt.timedelta = dt.timedelta(weeks=1.0),
):
    with mp.Pool(os.cpu_count() or 8) as pool:
        return pool.map(
            statistics_collection, datetime_intervals(start, end, interval)
        )

# Standard Flow Corpus Generation (LSA, Doc2Vec, anything that uses documents, etc.)

In [None]:
Corpus = T.Tuple[  # `DefaultDict` has trouble pickling here.
    T.Dict[str, T.List[str]],
    T.Dict[str, T.Dict[T.Optional[str], int]],
]


def flow_corpus_generation(
    datetime_interval: T.Tuple[dt.datetime, dt.datetime],
    services: T.Tuple[Service, ...],
) -> Corpus:
    documents: T.Dict[str, T.List[str]] = {}
    labels: T.Dict[str, T.Dict[T.Optional[str], int]] = {}

    for flow in flow_reader(*datetime_interval):
        if flow.protocol != 17 or flow.source_address.version != 4:
            continue
        assert flow.destination_address.version == 4

        tag = str(flow.destination_address)
        words = documents.setdefault(tag, [])

        for service in services:
            if flow not in service:
                continue

            interval = service[flow]
            if not interval:
                continue

            name_word = str(service)
            interval_word = str(interval)
            source_port_word = f"{flow.source_port}->"
            destination_port_word = f"->{flow.destination_port}"
            for _ in range(flow.packets):
                words.append(name_word)
                words.append(interval_word)
                words.append(source_port_word)
                words.append(destination_port_word)

            break
        else:
            log(f"Flow matched no services: {flow}")

        label = f"{flow.rule}:{flow.rule_confidence}" if flow.rule else None
        if tag not in labels:
            labels[tag] = {label: flow.bytes}
        else:
            tag_labels = labels[tag]
            if label not in tag_labels:
                tag_labels[label] = flow.bytes
            else:
                tag_labels[label] += flow.bytes

    return (documents, labels)


def flows_to_corpus(
    start: dt.datetime, end: dt.datetime, domain_knowledge: bool
) -> Corpus:
    datetime_splits = tuple(
        datetime_intervals(start, end, dt.timedelta(minutes=30))
    )
    services = UDP_DDoS_services(domain_knowledge=domain_knowledge)
    with mp.Pool(os.cpu_count() or 8) as pool:
        corpora: T.Deque[Corpus] = cll.deque(  # Already sorted.
            pool.starmap(
                flow_corpus_generation,
                ((split, services) for split in datetime_splits),
                chunksize=1,
            ),
            maxlen=len(datetime_splits),
        )

    final_documents: T.Dict[str, T.List[str]] = {}
    final_labels: T.Dict[str, T.Dict[T.Optional[str], int]] = {}

    log(f"Finished corpora collection. Merging {len(corpora)} corpora...")
    while corpora:
        documents, labels = corpora.popleft()

        while documents:
            tag, words = documents.popitem()
            if tag in final_documents:
                final_documents[tag].extend(words)
            else:
                final_documents[tag] = words

        while labels:
            tag, label_counts = labels.popitem()
            if tag not in final_labels:
                final_labels[tag] = {}
            tag_labels = final_labels[tag]

            while label_counts:
                label, count = label_counts.popitem()
                if label in tag_labels:
                    tag_labels[label] += count
                else:
                    tag_labels[label] = count

    return (final_documents, final_labels)

# Word2Vec Flow Corpus Generation

In [None]:
Word2VecCorpus = T.Tuple[  # `DefaultDict` has trouble pickling here.
    T.Dict[str, T.List[str]],
    T.Dict[str, T.Dict[T.Optional[str], int]],
]


def flow_corpus_generation_word2vec(
    datetime_interval: T.Tuple[dt.datetime, dt.datetime],
    services: T.Tuple[Service, ...],
) -> Word2VecCorpus:
    sentences: T.DefaultDict[str, T.List[str]] = cll.defaultdict(list)
    labels: T.Dict[str, T.Dict[T.Optional[str], int]] = {}

    for flow in flow_reader(*datetime_interval):
        if flow.protocol != 17 or flow.source_address.version != 4:
            continue
        assert flow.destination_address.version == 4

        word = str(flow.destination_address)
        sentences[f"{flow.source_port}->"].append(word)
        sentences[f"->{flow.destination_port}"].append(word)

        for service in services:
            if flow not in service:
                continue

            interval = service[flow]
            if not interval:
                continue

            sentences[str(service)].append(word)
            sentences[interval].append(word)
            break
        else:
            log(f"Flow matched no services: {flow}")

        label = f"{flow.rule}:{flow.rule_confidence}" if flow.rule else None
        if word not in labels:
            labels[word] = {label: flow.bytes}
        else:
            tag_labels = labels[word]
            if label not in tag_labels:
                tag_labels[label] = flow.bytes
            else:
                tag_labels[label] += flow.bytes

    return (dict(sentences), labels)


def flows_to_word2vec_corpus(
    start: dt.datetime, end: dt.datetime, domain_knowledge: bool
) -> Word2VecCorpus:
    datetime_splits = tuple(
        datetime_intervals(start, end, dt.timedelta(minutes=30))
    )
    services = UDP_DDoS_services(domain_knowledge=domain_knowledge)
    with mp.Pool(os.cpu_count() or 8) as pool:
        corpora: T.Deque[Word2VecCorpus] = cll.deque(  # Already sorted.
            pool.starmap(
                flow_corpus_generation_word2vec,
                ((split, services) for split in datetime_splits),
                chunksize=1,
            ),
            maxlen=len(datetime_splits),
        )

    final_documents: T.Dict[str, T.List[str]] = {}
    final_labels: T.Dict[str, T.Dict[T.Optional[str], int]] = {}

    log(f"Finished corpora collection. Merging {len(corpora)} corpora...")
    while corpora:
        documents, labels = corpora.popleft()

        while documents:
            tag, words = documents.popitem()
            if tag in final_documents:
                final_documents[tag].extend(words)
            else:
                final_documents[tag] = words

        while labels:
            tag, label_counts = labels.popitem()
            if tag not in final_labels:
                final_labels[tag] = {}
            tag_labels = final_labels[tag]

            while label_counts:
                label, count = label_counts.popitem()
                if label in tag_labels:
                    tag_labels[label] += count
                else:
                    tag_labels[label] = count

    return (final_documents, final_labels)

# Baseline Counts Generation

In [None]:
Counts = T.Tuple[
    T.Dict[
        str,
        T.Dict[
            str,
            T.Tuple[
                int,  # Flow count.
                int,  # Byte count.
                int,  # Packet count.
            ],
        ],
    ],
    T.Dict[str, T.Dict[T.Optional[str], int]],
]


def flow_counts(
    datetime_interval: T.Tuple[dt.datetime, dt.datetime]
) -> Counts:
    counts: T.DefaultDict[
        str, T.Dict[str, T.Tuple[int, int, int]]
    ] = cll.defaultdict(dict)
    labels: T.Dict[str, T.Dict[T.Optional[str], int]] = {}

    for flow in flow_reader(*datetime_interval):
        if flow.protocol != 17 or flow.source_address.version != 4:
            continue
        assert flow.destination_address.version == 4

        tag = str(flow.destination_address)
        tag_counts = counts[tag]

        for port in (f"{flow.source_port}->", f"->{flow.destination_port}"):
            if port in tag_counts:
                tag_counts_port = tag_counts[port]
                tag_counts[port] = (
                    tag_counts_port[0] + 1,
                    tag_counts_port[1] + flow.bytes,
                    tag_counts_port[2] + flow.packets,
                )
            else:
                tag_counts[port] = (1, flow.bytes, flow.packets)

        label = f"{flow.rule}:{flow.rule_confidence}" if flow.rule else None
        if tag not in labels:
            labels[tag] = {label: flow.bytes}
        else:
            tag_labels = labels[tag]
            if label not in tag_labels:
                tag_labels[label] = flow.bytes
            else:
                tag_labels[label] += flow.bytes

    return (dict(counts), labels)


def flows_to_counts(start: dt.datetime, end: dt.datetime) -> Counts:
    datetime_splits = tuple(
        datetime_intervals(start, end, dt.timedelta(minutes=30))
    )
    with mp.Pool(os.cpu_count() or 8) as pool:
        split_counts: T.Deque[Counts] = cll.deque(  # Already sorted.
            pool.imap(flow_counts, datetime_splits),
            maxlen=len(datetime_splits),
        )

    final_counts: T.Dict[str, T.Dict[str, T.Tuple[int, int, int]]] = {}
    final_labels: T.Dict[str, T.Dict[T.Optional[str], int]] = {}

    log(f"Finished collection. Merging {len(split_counts)} split counts...")
    while split_counts:
        counts, labels = split_counts.popleft()

        while counts:
            tag, tag_counts = counts.popitem()
            if tag not in final_counts:
                final_tag_counts = {}
                final_counts[tag] = final_tag_counts
            else:
                final_tag_counts = final_counts[tag]

            while tag_counts:
                service, new_counts = tag_counts.popitem()
                if service in final_tag_counts:
                    old_counts = final_tag_counts[service]
                    final_tag_counts[service] = (
                        old_counts[0] + new_counts[0],
                        old_counts[1] + new_counts[1],
                        old_counts[2] + new_counts[2],
                    )
                else:
                    final_tag_counts[service] = new_counts

        while labels:
            tag, label_counts = labels.popitem()
            if tag not in final_labels:
                final_labels[tag] = {}
            tag_labels = final_labels[tag]

            while label_counts:
                label, count = label_counts.popitem()
                if label in tag_labels:
                    tag_labels[label] += count
                else:
                    tag_labels[label] = count

    return (final_counts, final_labels)

# Flow Corpus Selection

In [None]:
def create_dataset(
    mode: T.Literal["Standard", "Word2Vec", "Baseline"],
    start: dt.datetime,
    end: dt.datetime,
    name: str,
    domain_knowledge: bool = True,
):
    os.makedirs(ARTEFACTS_PATH, mode=0o770, exist_ok=True)

    log(f'Generating training data "{name}"...')
    if os.path.isfile(os.path.join(ARTEFACTS_PATH, f"{name}.data.pickle.gz")):
        log(f'Training data "{name}" already exists.')
        return

    if mode == "Standard":
        data, labels = flows_to_corpus(
            start, end, domain_knowledge=domain_knowledge
        )
    elif mode == "Word2Vec":
        data, labels = flows_to_word2vec_corpus(
            start, end, domain_knowledge=domain_knowledge
        )
    elif mode == "Baseline":
        data, labels = flows_to_counts(start, end)
    save_pickle(data, os.path.join(ARTEFACTS_PATH, f"{name}.data.pickle.gz"))
    save_pickle(
        labels, os.path.join(ARTEFACTS_PATH, f"{name}.labels.pickle.gz")
    )
    log(f'Generated training data "{name}".')

The below cell will create the datasets necessary for the baseline comparison.

In [None]:
for start, end, prefix in COMPARISON_TRAINING_DATES:
    create_dataset("Standard", start, end, f"{prefix}.standard")
    create_dataset("Word2Vec", start, end, f"{prefix}.word2vec")
    create_dataset("Baseline", start, end, f"{prefix}.baseline")
    # No Domain Knowledge (NDK) versions.
    create_dataset(
        "Standard",
        start,
        end,
        f"{prefix}.standard-ndk",
        domain_knowledge=False,
    )
    create_dataset(
        "Word2Vec",
        start,
        end,
        f"{prefix}.word2vec-ndk",
        domain_knowledge=False,
    )

The below cell will create the datasets necessary for the longitudinal analysis of a DDoS2Vec model's performance.

**Note**: Our experiment covered all of 2019, but your dataset might not cover that range in monthly intervals. Modify accordingly.

In [None]:
for month_start, month_end in zip(range(1, 12), range(2, 13)):
    create_dataset(
        "Standard",
        dt.datetime(2019, month_start, 1, tzinfo=dt.timezone.utc),
        dt.datetime(2019, month_end, 1, tzinfo=dt.timezone.utc),
        f"{month_start}.standard",
    )

create_dataset(
    "Standard",
    dt.datetime(2019, 12, 1, tzinfo=dt.timezone.utc),
    dt.datetime(2020, 1, 1, tzinfo=dt.timezone.utc),
    "12.standard",
)

# Statistics Selection

Plotting code specific to our private dataset has been stripped, but we keep the code for generating the statistics.

**Note**: Again, you must modify the year.

In [None]:
statistics_file_path = os.path.join(ARTEFACTS_PATH, "statistics.pickle.gz")
if os.path.isfile(statistics_file_path):
    log("Statistics already exist. Loading...")
    statistics: T.List[Collection] = load_pickle(statistics_file_path)
else:
    log("Collecting statistics (weekly)...")
    statistics = flow_dataset_statistics(
        dt.datetime(2019, 1, 1, tzinfo=dt.timezone.utc),
        dt.datetime(2020, 1, 1, tzinfo=dt.timezone.utc),
    )
    save_pickle(statistics, statistics_file_path)
    log("Collected statistics.")

In [None]:
weekly_flow_counts: T.List[int] = [
    sum(collection[3].values()) for collection in statistics
]

print(f"Total UDP flow samples in dataset: {sum(weekly_flow_counts):,}")

average_daily_flow_counts = [
    sum(collection[3].values()) / 7 for collection in statistics
]
average_daily_flow_count = round(
    sum(average_daily_flow_counts) / len(average_daily_flow_counts)
)

print(f"Daily mean UDP flow sample count: {average_daily_flow_count:,}")