# Visual reprezentation of FlowPics 
#### Author: Jakub Čoček (xcocek00)

Creates visual reprezentation of FlowPics with and without augmentations

In [1]:
# -- IMPORTS --

import os 
os.chdir('/workplace/xcocek00/common/')
from dataloader import create_flowpic_dataloader

os.chdir('/workplace/xcocek00/common/')
from augmentations import (
    augment_iat,
    augment_rtt,
    packet_loss,
)

# set working dir to correct folder
os.chdir('/workplace/xcocek00/flowpics/')

import torchvision.transforms as T
import torchvision.transforms.functional as F
import torch
from PIL import Image, ImageOps
import numpy as np

# sets csv limit
import csv
import sys
csv.field_size_limit(sys.maxsize)

################################################################################
The 'datapipes', 'dataloader2' modules are deprecated and will be removed in a
future torchdata release! Please see https://github.com/pytorch/data/issues/1196
to learn more and leave feedback.
################################################################################



131072

In [2]:
def flowpic_to_img(tensor, filename, index=0, upscale=16, border=4):
    '''
    Converts FlowPic into upscaled grayscale image.

    Args:
        tensor: tensor of shape [batch_size, 1, H, W]
        index: index of the FlowPic within the batch to save
        filename: path where the output will be stored
        upscale: factor to upscale the original FlowPic resolution
    '''

    # tensor -> np
    arr = tensor[index].squeeze().cpu().numpy()

    # Optional: enhance contrast using log scale
    arr = np.log1p(arr)

    # normalize to 0–255
    arr = arr / arr.max() if arr.max() > 0 else arr
    arr = (1 - arr) * 255  # 0 -> white, max -> black

    img = Image.fromarray(arr.astype(np.uint8), mode='L')

    # resize
    new_size = (img.width * upscale, img.height * upscale)
    img = img.resize(new_size, resample=Image.NEAREST)

    img = ImageOps.expand(img, border=border, fill='black')

    img.save(filename)


In [3]:
def process(fp, upscale, border=4):
    '''
    Process flowpic to visual reprezentation

    Args:
        fp: flowpic tensor
        upscale: the upscale factor
        border: border around final img

    Returns:
        final img
    
    '''
    arr = fp.squeeze().cpu().numpy()
    arr = np.log1p(arr)
    arr = arr / arr.max() if arr.max() > 0 else arr
    arr = (1 - arr) * 255
    
    img = Image.fromarray(arr.astype(np.uint8), mode='L')
    img = img.resize((img.width * upscale, img.height * upscale), resample=Image.NEAREST)
    img = ImageOps.expand(img, border=border, fill='black')
    
    return img

def combine_flowpics_to_img(tensor_orig, tensor_aug, filename, index=0, index2=0, upscale=16, gap=10):
    '''
    Combines original and augmented FlowPic from into single
    side-by-side upscaled grayscale image.

    Args:
        tensor_orig: original FlowPics
        tensor_aug:  augmented FlowPics
        filename: path where the output will be stored
        index: index of the FlowPic within the batch to save
        index2: index of the second FlowPic within the batch to save
        upscale: factor to upscale the original FlowPic resolution
        gap: gap between 2 imgs
    '''

    img_orig = process(tensor_orig[index], upscale)
    img_aug  = process(tensor_aug[index2], upscale)

    # Calculate combined size with gap
    width = img_orig.width + gap + img_aug.width
    height = max(img_orig.height, img_aug.height)

    combined = Image.new('L', (width, height), color='white')
    combined.paste(img_orig, (0, 0))
    combined.paste(img_aug, (img_orig.width + gap, 0))

    combined.save(filename)


---

FlowPics are created from the Ucdavis-icdm19 dataset, as it achieved the best performance according to my results.

In [9]:
# NO AUGMENTATION

dl_orig = create_flowpic_dataloader(
    dir_path="/workplace/datasets/ucdavis/final-splits/pretraining.csv",
    batch_size=32,
    meta_key="app",    
    time_bins = [i * (15 / 32) for i in range(33)],
    length_bins = [i * (1500 / 32) for i in range(33)],
    bidirectional = False,
)

# one FlowPic only
for fp1_orig, _, _ in dl_orig:
    flowpic_to_img(tensor=fp1_orig, filename="flowpic_no_augmentation.png")
    break

In [10]:
# RTT augmentation

dl = create_flowpic_dataloader(
    dir_path="/workplace/datasets/ucdavis/final-splits/pretraining.csv",
    batch_size=32,
    meta_key="app",    
    time_bins = [i * (15 / 32) for i in range(33)],
    length_bins = [i * (1500 / 32) for i in range(33)],
    bidirectional = False,
    flow_transform_1=augment_rtt,
    flow_transform_2=augment_rtt,
)

# one FlowPic only
for fp1, _, _ in dl:
    flowpic_to_img(tensor=fp1, filename="flowpic_rtt.png")
    combine_flowpics_to_img(tensor_orig=fp1_orig, tensor_aug=fp1, filename="rtt_and_orig.png")
    break

In [11]:
# IAT augmentation

dl = create_flowpic_dataloader(
    dir_path="/workplace/datasets/ucdavis/final-splits/pretraining.csv",
    batch_size=32,
    meta_key="app",    
    time_bins = [i * (15 / 32) for i in range(33)],
    length_bins = [i * (1500 / 32) for i in range(33)],
    bidirectional = False,
    flow_transform_1=augment_iat,
    flow_transform_2=augment_iat,
)

# one FlowPic only
for fp1, _, _ in dl:
    flowpic_to_img(tensor=fp1, filename="flowpic_iat.png")
    combine_flowpics_to_img(tensor_orig=fp1_orig, tensor_aug=fp1, filename="iat_and_orig.png")
    break

In [16]:
# packet loss augmentation

dl = create_flowpic_dataloader(
    dir_path="/workplace/datasets/ucdavis/final-splits/pretraining.csv",
    batch_size=32,
    meta_key="app",    
    time_bins = [i * (15 / 32) for i in range(33)],
    length_bins = [i * (1500 / 32) for i in range(33)],
    bidirectional = False,
    flow_transform_1=packet_loss,
    flow_transform_2=packet_loss,
)

# one FlowPic only
for fp1, _, _ in dl:
    flowpic_to_img(tensor=fp1, filename="flowpic_pkts_loss.png")
    combine_flowpics_to_img(tensor_orig=fp1_orig, tensor_aug=fp1, filename="pktl_and_orig.png")
    break

In [17]:
# FlowPic rotation

dl = create_flowpic_dataloader(
    dir_path="/workplace/datasets/ucdavis/final-splits/pretraining.csv",
    batch_size=32,
    meta_key="app",    
    time_bins = [i * (15 / 32) for i in range(33)],
    length_bins = [i * (1500 / 32) for i in range(33)],
    bidirectional = False,
)

# one FlowPic only
for fp1, _, _ in dl:
    rotate_transform = T.RandomRotation(degrees=(-10,10))
    if fp1.dim() == 4:
        new_fp = torch.stack([rotate_transform(img) for img in fp1])

    flowpic_to_img(tensor=new_fp, filename="flowpic_rotation.png")
    combine_flowpics_to_img(tensor_orig=fp1_orig, tensor_aug=new_fp, filename="rotation_and_orig.png")
    break



### FlowPics per dataset

---

#### Mirage19

In [8]:
# 2 FlowPics from Mirage19 dataset

dl_m19 = create_flowpic_dataloader(
    dir_path="/workplace/datasets/mirage19/processed/splits/train.csv",
    batch_size=32,
    meta_key="BF_label",    
    time_bins = [i * (15 / 32) for i in range(33)],
    length_bins = [i * (1500 / 32) for i in range(33)],
    bidirectional = False,
)

f_dl_m19 = create_flowpic_dataloader(
    dir_path="/workplace/datasets/mirage19/processed/splits/train.csv",
    batch_size=32,
    meta_key="BF_label",    
    time_bins = [i * (15 / 32) for i in range(33)],
    length_bins = [i * (1500 / 32) for i in range(33)],
    bidirectional = False,
    min_packets = 30,
)

for fp1, _, _ in dl_m19:
    combine_flowpics_to_img(tensor_orig=fp1, tensor_aug=fp1, index=2, index2=4, filename="m19_flowpics.png")
    break

for fp1, _, _ in f_dl_m19:
    combine_flowpics_to_img(tensor_orig=fp1, tensor_aug=fp1, index=2, index2=4, filename="f_m19_flowpics.png")
    break



---

#### Mirage22

In [15]:
# 2 FlowPics from Mirage22 dataset

dl_m22 = create_flowpic_dataloader(
    dir_path="/workplace/datasets/mirage22/processed/splits-10/train.csv",
    batch_size=32,
    meta_key="BF_label",    
    time_bins = [i * (15 / 32) for i in range(33)],
    length_bins = [i * (1500 / 32) for i in range(33)],
    bidirectional = False,
    min_packets = 10,
)

f_dl_m22 = create_flowpic_dataloader(
    dir_path="/workplace/datasets/mirage22/processed/splits-1000/train.csv",
    batch_size=32,
    meta_key="BF_label",    
    time_bins = [i * (15 / 32) for i in range(33)],
    length_bins = [i * (1500 / 32) for i in range(33)],
    bidirectional = False,
    min_packets = 1000,
)

for fp1, _, _ in dl_m22:
    combine_flowpics_to_img(tensor_orig=fp1, tensor_aug=fp1, index=2, index2=3, filename="10_m22_flowpics.png")
    break

for fp1, _, _ in f_dl_m22:
    combine_flowpics_to_img(tensor_orig=fp1, tensor_aug=fp1, index=2, index2=3, filename="1000_m22_flowpics.png")
    break

---

#### Ucdavis

In [13]:
# FlowPics from Ucdavis dataset

train_dl_ucd = create_flowpic_dataloader(
    dir_path="/workplace/datasets/ucdavis/final-splits/pretraining.csv",
    batch_size=32,
    meta_key="app",    
    time_bins = [i * (15 / 32) for i in range(33)],
    length_bins = [i * (1500 / 32) for i in range(33)],
    bidirectional = False,
)

script_dl_ucd = create_flowpic_dataloader(
    dir_path="/workplace/datasets/ucdavis/final-splits/script.csv",
    batch_size=32,
    meta_key="app",    
    time_bins = [i * (15 / 32) for i in range(33)],
    length_bins = [i * (1500 / 32) for i in range(33)],
    bidirectional = False,
)

human_dl_ucd = create_flowpic_dataloader(
    dir_path="/workplace/datasets/ucdavis/final-splits/human.csv",
    batch_size=32,
    meta_key="app",    
    time_bins = [i * (15 / 32) for i in range(33)],
    length_bins = [i * (1500 / 32) for i in range(33)],
    bidirectional = False,
)

for fp1, _, _ in train_dl_ucd:
    combine_flowpics_to_img(tensor_orig=fp1, tensor_aug=fp1, index=2, index2=4, filename="ucd_flowpics.png")
    break

for fp1, _, _ in script_dl_ucd:
    for fp2, _, _ in human_dl_ucd:
        combine_flowpics_to_img(tensor_orig=fp1, tensor_aug=fp2, filename="script_human_flowpics.png")

---

#### UTMobileNet21

In [14]:
# FlowPics from UTMobileNet dataset

dl_utm = create_flowpic_dataloader(
    dir_path="/workplace/datasets/utmobilenet21/final-splits/train.csv",
    batch_size=32,
    meta_key="app",    
    time_bins = [i * (15 / 32) for i in range(33)],
    length_bins = [i * (1500 / 32) for i in range(33)],
    bidirectional = False,
)

f_dl_utm = create_flowpic_dataloader(
    dir_path="/workplace/datasets/utmobilenet21/final-splits/train.csv",
    batch_size=32,
    meta_key="app",    
    time_bins = [i * (15 / 32) for i in range(33)],
    length_bins = [i * (1500 / 32) for i in range(33)],
    bidirectional = False,
    min_packets = 30,
)

for fp1, _, _ in dl_utm:
    combine_flowpics_to_img(tensor_orig=fp1, tensor_aug=fp1, index=2, index2=4, filename="utm_flowpics.png")
    break

for fp1, _, _ in f_dl_utm:
    combine_flowpics_to_img(tensor_orig=fp1, tensor_aug=fp1, index=2, index2=4, filename="f_utm_flowpics.png")
    break

---

#### CESNET

In [12]:
# FlowPics from CESNET dataset

dl_ces = create_flowpic_dataloader(
    dir_path="/workplace/data/long-export/tls-pstats100/tmp_balanced/train_group_c_timestamps.csv",
    batch_size=32,   
    meta_key = "label",
    time_bins = [i * (15 / 32) for i in range(33)],
    length_bins = [i * (1500 / 32) for i in range(33)],
    bidirectional = False,
)

i = 0
for fp1, _, _ in dl_ces:
    if i == 0:
        combine_flowpics_to_img(tensor_orig=fp1, tensor_aug=fp1, index=20, index2=30, filename="cesnet_flowpics1.png")
        combine_flowpics_to_img(tensor_orig=fp1, tensor_aug=fp1, index=2, index2=14, filename="cesnet_flowpics2.png")
        combine_flowpics_to_img(tensor_orig=fp1, tensor_aug=fp1, index=10, index2=4, filename="cesnet_flowpics3.png")
    if i == 1:
        combine_flowpics_to_img(tensor_orig=fp1, tensor_aug=fp1, index=2, index2=23, filename="cesnet_flowpics4.png")
        combine_flowpics_to_img(tensor_orig=fp1, tensor_aug=fp1, index=0, index2=7, filename="cesnet_flowpics5.png")
        combine_flowpics_to_img(tensor_orig=fp1, tensor_aug=fp1, index=1, index2=11, filename="cesnet_flowpics6.png")
        break
                
    i+=1

