# Introduction
Point of this notebook is simply to observe (withotu any decision making) the performance across a matrix of window skips and window lengths

In [None]:
from sampleddetection.common_lingo import Action, State
from sampleddetection.environment.model import Environment
from sampleddetection.datastructures.flowsession import SampledFlowSession
import numpy as np
from typing import List
import os
from tqdm.notebook import tqdm
from pathlib import Path
from itertools import product
import random

# Make sure these are reloaded when cells are rerun
%load_ext autoreload
%autoreload 2

In [None]:
# Setup the environment
# From Microsecond to dekasecond
window_skips    = 2*np.logspace(-6, 1, 3, dtype=float)
window_lengths  = 2*np.logspace(-5, 1, 3, dtype=float)
batch_size      = 16
csv_path = './data/miniLabeledWednesday.csv'
dataset_dir    = './data/precalc_windows/'
dataset_filename = 'ws_{}_wl_{}.csv'
desired_features = [
            # Debugging info
            "start_ts",
            "start_timestamp",
            "end_timestamp",
            "tot_fwd_pkts",
            "tot_bwd_pkts",
            # Non debugging
            "label",
            "fwd_pkt_len_max",
            "fwd_pkt_len_min",
            "fwd_pkt_len_mean",
            "bwd_pkt_len_max",
            "bwd_pkt_len_min",
            "bwd_pkt_len_mean",
            "flow_byts_s",
            "flow_pkts_s",
            "flow_iat_mean",
            "flow_iat_max",
            "flow_iat_min",
            "fwd_iat_mean",
            "fwd_iat_max",
            "fwd_iat_min",
            "bwd_iat_max",
            "bwd_iat_min",
            "bwd_iat_mean",
            "pkt_len_min",
            "pkt_len_max",
            "pkt_len_mean",
]

# Use product to get a matrix of combinations
options_matrix = list(product(window_skips, window_lengths))
print(f"Working with {len(options_matrix)} permutaitions")

In [None]:
# Create or Load dataset
from sampleddetection.samplers.window_sampler import DynamicWindowSampler
from sampleddetection.writers.convenience import save_to_csv
from sampleddetection.readers.readers import CSVReader

sampler = DynamicWindowSampler(Path(csv_path))
environment = Environment(sampler)
min_necessary_flows  = 20



    
# Create it

    # Ensure that the dataset is balanced.

In [None]:
def generate_sessions(amount: int, ws: float, wl: float) -> List[SampledFlowSession]:
    cur_amnt = 0
    sessions = []
    inner_bar = tqdm(total=amount,desc=f'Generating ws:{ws}- wl{wl} flow',leave=False)
    while cur_amnt < amount:
        flow_sesh =  environment.reset(winskip=ws,winlen=wl).flow_sesh
        amnt_sesh_flows = len(flow_sesh.flows.keys())
        cur_amnt += amnt_sesh_flows
        sessions.append(flow_sesh)

        inner_bar.update(amnt_sesh_flows)
    return sessions

In [None]:

flows = {}
# Set random seeds:
np.random.seed(0)
random.seed(0)
import csv

for ws, wl in tqdm(options_matrix,desc='Creating datasets'):
    # Check if datasets exists
    flows = {f"ws:{ws}-ws:{wl}" : []}
    target_name = os.path.join(dataset_dir,dataset_filename.format(ws, wl))
    if os.path.exists(target_name):
        print(f"Loading {dataset_filename.format(ws, wl)} from {dataset_dir}")
        with open(target_name) as f:
            # Get csv lines into dictionary as a list
            flows[f"ws:{ws}-ws:{wl}"] = [line for line in csv.DictReader(f)]
        continue
    sessions = generate_sessions(min_necessary_flows,ws,wl)

    ds_path = os.path.join(dataset_dir,dataset_filename.format(ws, wl))
    save_to_csv(sessions, ds_path, desired_features=desired_features, overwrite=True)
    
