In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler, MinMaxScaler
import seaborn as sns
import matplotlib.pyplot as plt
import os
import time
from sklearn.utils.class_weight import compute_class_weight
import shap
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score, precision_score, recall_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectKBest, f_classif
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
import random

In [8]:
import pandas as pd

class CorrEncoder:
    """
    CorrEncoder: Takes a dataset as input and uses it for the encode function. Encodes the filtered categories then draws correlations.
    If correlation is above the threshold adds it to a new dataframe then returns the one hot encoded values with the labels.

    Initialisation:
        - data (pd.DataFrame): The Dataset that contains the target column and target label variables.
    """
    
    def __init__(self, data):
        self.data = data.copy()
        # Removes Label for the multi-class processing as it is based on the label category (threat or not).
        self.data = self.data.drop(columns=['attack_cat'])

    def encode(self, target_column, sparse_n, threshold):
        """
        encode: Takes a target column and target label to encode and draw correlations from. The target column is iterated through
        for all categories that contain more positive values than defined in sparse_n. This allows for filtering of sparse categories.
        The function then one hot encodes the given category with the static target column and draws correlations for them. If correlation
        is greater then threshold then add it to the new DataFrame. The function returns the one hot encoded categories that pass the
        threshold with the target label.

        The purpose of this function is to resolve the high cardinality problem in one hot encoding.

        Parameters:
            - target_column (string): The name of the target column. The target column should contain the various categories to encode.
            - sparse_n (integer): The minimum amount of positive values required for a category after encoding (deals with sparse categories).
            - threshold (float): The threshold for correlation. The function creates onehot encoded columns of all variables that have correlation
              higher that the threshold to the target label.

        Returns:
            - ohe_df (pd.DataFrame): The one hot encoded values from the target column.
        """

        # Convert to string to handle duplicates.
        self.data[target_column] = self.data[target_column].astype(str)
        value_counts = self.data[target_column].value_counts()
        #print('Value Counts:', value_counts)
        # Check if number of 1s is above the given threshold set by sparse_n.
        categories = value_counts[value_counts > sparse_n].index.tolist()
        ohe_list = []

        # Attack category (target label)
        attack_cat = self.data['Label']
        
        # Go through each unique category in the target column.
        for c in categories:
            col_name = f'{target_column}_{c}'

            # Create the binary encoding column for the current category and target label.
            corr_column = (self.data[target_column] == c).astype(int)
            correlation = corr_column.corr(attack_cat)

            # Check if absolute correlation is greater than threshold.
            if abs(correlation) > threshold:
                corr_column.name = col_name
                ohe_list.append(corr_column)
        print('Number of Encoded Features for', target_column)
        print(len(ohe_list))
        if ohe_list:
            # NOTE: This section can be expanded to include print outs but at the moment am focusing on the evaluations.
            ohe_df = pd.concat(ohe_list, axis=1)
            return ohe_df
        else:
            # This ommits errors (if really high thresholds are used).
            print("No correlations exceed the threshold.")
            return pd.DataFrame()

In [9]:
def select_features(data, type_of, k):
    """
    select_features: Filter data using various feature importance methods.

    - 'corr': Filters based on correlation.
    - 'ft_importance': Filters using feature importance from Random Forest.
    - 'kbest': Filters using Select K-Best from scikit.

    Parameters:
        - type_of (string): The type of feature importance to measure correlation (corr), feature importance from Random Forest (ft_importance), and Select K-Best (k-best).

    Returns:
        top_k_features (list): The list of n features.
    """
    
    X = data.drop(columns=['attack_cat', 'Label'])
    y = data['Label']
    
    if type_of == 'correlation':
        corr_values = {}
        for feature in X.columns:
            corr_values[feature] = X[feature].corr(y)
        corr_data = pd.Series(corr_values).abs()
        top_k_features = corr_data.nlargest(k).index.tolist()
        return top_k_features

    elif type_of == 'ft_importance':
        model = RandomForestClassifier()
        model.fit(X, y)
        feature_importances = pd.Series(model.feature_importances_, index=X.columns)
        top_k_features = feature_importances.nlargest(k).index.tolist()
        return top_k_features

    elif type_of == 'kbest':
        selector = SelectKBest(score_func=f_classif, k=k)
        selector.fit(X, y)
        top_k_features = []
        selected_mask = selector.get_support()
        for i in range(len(selected_mask)):
            if selected_mask[i]:
                top_k_features.append(X.columns[i])

        return top_k_features
                

In [10]:
def get_data(size, rs, threshold, downsample, split_method):
    """
    get_data: Preprocess and transform data.
    
    Parameters:
        - size (integer): The size of the validation set.
        - rs (int): The random seed to use for sampling and slicing.
        - threshold (float): The threshold for correlation when one hot encoding.
        - downsample (string, float): Either 'full' or a probability of how much to downsample the labels.
        - split_method (string): Slice or sample the data for the validation set.

    Returns:
        - train_data (pd.DataFrame): The train dataset with labels.
        - val_data (pd.DataFrame): The validation dataset with labels.
    """

    feature_names = pd.read_csv('features2.csv')
    #self.name = category
    feature_names_list = feature_names['Name'].tolist()
    datasets = []

    # Create a list of datasets.
    for i in range(1, 5):
        df = pd.read_csv(f'UNSW-NB15_{i}.csv', header=None)
        df.columns = feature_names_list
        df.loc[df['attack_cat'].isnull(), 'attack_cat'] = 'Normal'
        datasets.append(df)

    # Process each dataset individually this can be increased to more datasets.
    for i in range(len(datasets)):
        df = datasets[i]
            
        # Clean the dataset
        length1 = len(df)
        df['attack_cat'] = df['attack_cat'].str.replace(r'\s+', '', regex=True)
        df['attack_cat'] = df['attack_cat'].str.replace('Backdoors', 'Backdoor')
        # Very sparse data.
        df = df.drop(columns=['ct_ftp_cmd', 'ct_flw_http_mthd', 'is_ftp_login'])
        df = df[~df['sport'].astype(str).str.startswith('0x')]
        df = df[~df['sport'].astype(str).str.startswith('-')]
        df['sport'] = df['sport'].apply(pd.to_numeric)
        df = df[~df['dsport'].astype(str).str.startswith('0x')]
        df = df[~df['dsport'].astype(str).str.startswith('-')]
        df['dsport'] = df['dsport'].apply(pd.to_numeric)
        print(f"Filtered Rows (Cleaning): {length1 - len(df)}")

        # Full downsampling by matching Normal to Threat (Label).
        if downsample == 'full':
            threat_rows = df[df['attack_cat'] != 'Normal']
            num_threat_rows = len(threat_rows)
            print(df['attack_cat'].value_counts())
            normal_rows = df[df['attack_cat'] == 'Normal']
            sampled_data = normal_rows.sample(n=num_threat_rows, random_state=rs)
            df = pd.concat([threat_rows, sampled_data]).reset_index(drop=True)

        # Downsample by a given pecentage.
        elif downsample is not None:
            mask = (df['Label'].shift(-1) != 1) & (df['Label'].shift(1) != 1)
            normal_rows = df[(df['attack_cat'] == 'Normal') & mask]
            percentage_to_remove = int(len(normal_rows) * downsample)
            rows_to_remove = normal_rows.sample(n=percentage_to_remove, random_state=rs)
            df = df.drop(rows_to_remove.index)
            print(f"Downsampled Rows: {len(rows_to_remove)}")

        datasets[i] = df
    # Combine data.
    full_data = pd.concat(datasets).reset_index(drop=True)
    categorical_columns = ['state', 'service']
    encoder = OneHotEncoder(sparse_output=False, dtype='float32')
    encoded_data = encoder.fit_transform(full_data[categorical_columns])
    encoded_df = pd.DataFrame(encoded_data, columns=encoder.get_feature_names_out(categorical_columns), index=full_data.index)
    full_encoded = pd.concat([full_data.drop(columns=categorical_columns), encoded_df], axis=1)
    # Use onehot encoding on categorical values that share high correlation.
    encoder = CorrEncoder(full_encoded)
    ohe1 = encoder.encode('dsport', 30, threshold)
    ohe2 = encoder.encode('proto', 30, threshold)
    ohe3 = encoder.encode('sport', 30, threshold)
    ohe4 = encoder.encode('srcip', 30, threshold)
    ohe5 = encoder.encode('dstip', 30, threshold)
    cols_to_drop = ['dsport', 'proto', 'sport', 'srcip', 'dstip']
    filtered_data = full_encoded.drop(columns=cols_to_drop)
    combined_data = pd.concat([filtered_data, ohe1, ohe2, ohe3, ohe4, ohe5], axis=1)
    df_features = combined_data.drop(columns=['attack_cat', 'Label'])
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(df_features)
    final_data = pd.DataFrame(scaled_data, columns=df_features.columns, index=combined_data.index)
    final_data['Label'] = combined_data['Label']
    final_data['attack_cat'] = combined_data['attack_cat']

    if split_method == 'slice':
        slice_size = int(size * len(final_data))
        val_start = random.randrange(0, len(final_data) - 2 * slice_size)
        val_end = val_start + slice_size
        val_data = final_data.iloc[val_start:val_end]
        train_data = final_data.drop(val_data.index)
    elif split_method == 'shuffle':
        train_data, val_data = train_test_split(final_data, test_size=size, random_state=rs)
        
    return train_data, val_data

# Downsample by 20% of the Normal labels. - 80% of the Normal labels remain.
- Lower accuracy on some labels.
- Takes a while to encode.
- I will do the full data tomorrow morning aswell should be about 70 minutes.

In [11]:
train_data, val_data = get_data(
    size=0.2,
    rs=42,
    threshold=0.1,
    downsample=0.99,
    split_method='shuffle'
    )

  df = pd.read_csv(f'UNSW-NB15_{i}.csv', header=None)
  df = pd.read_csv(f'UNSW-NB15_{i}.csv', header=None)


Filtered Rows (Cleaning): 67
Downsampled Rows: 663712
Filtered Rows (Cleaning): 61
Downsampled Rows: 627633
Filtered Rows (Cleaning): 105
Downsampled Rows: 511922
Filtered Rows (Cleaning): 75
Downsampled Rows: 331833
Number of Encoded Features for dsport
5
Number of Encoded Features for proto
3
Number of Encoded Features for sport
3
Number of Encoded Features for srcip
19
Number of Encoded Features for dstip
19


In [13]:
print(train_data['attack_cat'].value_counts())
print(val_data['attack_cat'].value_counts())

print(train_data['Label'].value_counts())
print(val_data['Label'].value_counts())

attack_cat
Generic           172709
Normal             66743
Exploits           35383
Fuzzers            19340
DoS                13000
Reconnaissance     11198
Analysis            2143
Backdoor            1857
Shellcode           1197
Worms                141
Name: count, dtype: int64
attack_cat
Generic           42772
Normal            16613
Exploits           9142
Fuzzers            4906
DoS                3353
Reconnaissance     2789
Analysis            534
Backdoor            472
Shellcode           314
Worms                33
Name: count, dtype: int64
Label
1    256968
0     66743
Name: count, dtype: int64
Label
1    64315
0    16613
Name: count, dtype: int64


In [178]:
X_train = train_data.drop(columns=['attack_cat', 'Label'])
y_train = train_data['Label']
train_cat = train_data['attack_cat']
X_val = val_data.drop(columns=['attack_cat', 'Label'])
y_val = val_data['Label']
val_cat = val_data['attack_cat']
rf_classifier = RandomForestClassifier(random_state=42)
rf_classifier.fit(X_train, y_train)
y_pred = rf_classifier.predict(X_val)
print("Accuracy:", accuracy_score(y_val, y_pred))
print("Precision:", precision_score(y_val, y_pred))
print("Recall:", recall_score(y_val, y_pred))
print("F1 Score:", f1_score(y_val, y_pred))
print("\nClassification Report:\n", classification_report(y_val, y_pred))

Accuracy: 0.9963574447095205
Precision: 0.9856518640180979
Recall: 0.9905016972377068
F1 Score: 0.9880708294501398

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    357460
           1       0.99      0.99      0.99     64222

    accuracy                           1.00    421682
   macro avg       0.99      0.99      0.99    421682
weighted avg       1.00      1.00      1.00    421682



In [179]:
val_data['predicted_label'] = y_pred
val_data['true_label'] = y_val
attack_categories = val_data['attack_cat'].unique()
for category in attack_categories:
    print(f"Evaluation for attack category: {category}")
    category_data = val_data[val_data['attack_cat'] == category]
    if len(category_data) == 0:
        print(f"No samples found for category: {category}")
        print("-" * 50)
        continue
    y_true_category = category_data['true_label']
    y_pred_category = category_data['predicted_label']
    correct_predictions = (y_true_category == y_pred_category).sum()
    total_samples = len(y_true_category)
    accuracy = accuracy_score(y_true_category, y_pred_category)
    precision = precision_score(y_true_category, y_pred_category, zero_division=0)
    recall = recall_score(y_true_category, y_pred_category, zero_division=0)
    f1 = f1_score(y_true_category, y_pred_category, zero_division=0)
    print(f"Total samples: {total_samples}")
    print(f"Correct predictions: {correct_predictions}")
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1-Score: {f1}")
    print("\nClassification Report:\n", classification_report(y_true_category, y_pred_category, zero_division=0))
    print("-" * 50)

Evaluation for attack category: Normal
Total samples: 357460
Correct predictions: 356534
Accuracy: 0.997409500363677
Precision: 0.0
Recall: 0.0
F1-Score: 0.0

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    357460
           1       0.00      0.00      0.00         0

    accuracy                           1.00    357460
   macro avg       0.50      0.50      0.50    357460
weighted avg       1.00      1.00      1.00    357460

--------------------------------------------------
Evaluation for attack category: Generic
Total samples: 43233
Correct predictions: 43225
Accuracy: 0.9998149561677422
Precision: 1.0
Recall: 0.9998149561677422
F1-Score: 0.9999074695227741

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      1.00      1.00     43233

    accuracy                           1.00     43233
   macro

# Downsample by 50% of the Normal labels. - 50% of the Normal labels remain.
- Lower accuracy on some labels.

In [180]:
train_data, val_data = get_data(
    size=0.2,
    rs=42,
    threshold=0.1,
    downsample=0.5,
    split_method='shuffle'
    )

  df = pd.read_csv(f'UNSW-NB15_{i}.csv', header=None)
  df = pd.read_csv(f'UNSW-NB15_{i}.csv', header=None)


Filtered Rows (Cleaning): 67
Downsampled Rows: 335208
Filtered Rows (Cleaning): 61
Downsampled Rows: 316986
Filtered Rows (Cleaning): 105
Downsampled Rows: 258546
Filtered Rows (Cleaning): 75
Downsampled Rows: 167592
Number of Encoded Features for dsport
5
Number of Encoded Features for proto
3
Number of Encoded Features for sport
3
Number of Encoded Features for srcip
14
Number of Encoded Features for dstip
20


In [181]:
print(train_data['attack_cat'].value_counts())

attack_cat
Normal            912105
Generic           172501
Exploits           35464
Fuzzers            19448
DoS                13047
Reconnaissance     11153
Analysis            2172
Backdoor            1860
Shellcode           1226
Worms                149
Name: count, dtype: int64


In [182]:
X_train = train_data.drop(columns=['attack_cat', 'Label'])
y_train = train_data['Label']
train_cat = train_data['attack_cat']
X_val = val_data.drop(columns=['attack_cat', 'Label'])
y_val = val_data['Label']
val_cat = val_data['attack_cat']
rf_classifier = RandomForestClassifier(random_state=42)
rf_classifier.fit(X_train, y_train)
y_pred = rf_classifier.predict(X_val)
print("Accuracy:", accuracy_score(y_val, y_pred))
print("Precision:", precision_score(y_val, y_pred))
print("Recall:", recall_score(y_val, y_pred))
print("F1 Score:", f1_score(y_val, y_pred))
print("\nClassification Report:\n", classification_report(y_val, y_pred))

Accuracy: 0.9948405991473988
Precision: 0.9840936791273894
Recall: 0.9925773773399935
F1 Score: 0.9883173225906414

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    228019
           1       0.98      0.99      0.99     64263

    accuracy                           0.99    292282
   macro avg       0.99      0.99      0.99    292282
weighted avg       0.99      0.99      0.99    292282



In [183]:
val_data['predicted_label'] = y_pred
val_data['true_label'] = y_val
attack_categories = val_data['attack_cat'].unique()
for category in attack_categories:
    print(f"Evaluation for attack category: {category}")
    category_data = val_data[val_data['attack_cat'] == category]
    if len(category_data) == 0:
        print(f"No samples found for category: {category}")
        print("-" * 50)
        continue
    y_true_category = category_data['true_label']
    y_pred_category = category_data['predicted_label']
    correct_predictions = (y_true_category == y_pred_category).sum()
    total_samples = len(y_true_category)
    accuracy = accuracy_score(y_true_category, y_pred_category)
    precision = precision_score(y_true_category, y_pred_category, zero_division=0)
    recall = recall_score(y_true_category, y_pred_category, zero_division=0)
    f1 = f1_score(y_true_category, y_pred_category, zero_division=0)
    print(f"Total samples: {total_samples}")
    print(f"Correct predictions: {correct_predictions}")
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1-Score: {f1}")
    print("\nClassification Report:\n", classification_report(y_true_category, y_pred_category, zero_division=0))
    print("-" * 50)

Evaluation for attack category: Normal
Total samples: 228019
Correct predictions: 226988
Accuracy: 0.9954784469715243
Precision: 0.0
Recall: 0.0
F1-Score: 0.0

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    228019
           1       0.00      0.00      0.00         0

    accuracy                           1.00    228019
   macro avg       0.50      0.50      0.50    228019
weighted avg       1.00      1.00      1.00    228019

--------------------------------------------------
Evaluation for attack category: Generic
Total samples: 42980
Correct predictions: 42978
Accuracy: 0.999953466728711
Precision: 1.0
Recall: 0.999953466728711
F1-Score: 0.9999767328230066

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      1.00      1.00     42980

    accuracy                           1.00     42980
   macro 

# Downsample by 90% of the Normal labels. - 10% of the Normal labels remain.
- Fixes accuracy to above 99% for all labels except Normal- this might be a problem too?
- Decreases accuracy slightly in Normal labels.

In [184]:
train_data, val_data = get_data(
    size=0.2,
    rs=42,
    threshold=0.1,
    downsample=0.9,
    split_method='shuffle'
    )

  df = pd.read_csv(f'UNSW-NB15_{i}.csv', header=None)
  df = pd.read_csv(f'UNSW-NB15_{i}.csv', header=None)


Filtered Rows (Cleaning): 67
Downsampled Rows: 603375
Filtered Rows (Cleaning): 61
Downsampled Rows: 570575
Filtered Rows (Cleaning): 105
Downsampled Rows: 465383
Filtered Rows (Cleaning): 75
Downsampled Rows: 301666
Number of Encoded Features for dsport
5
Number of Encoded Features for proto
3
Number of Encoded Features for sport
3
Number of Encoded Features for srcip
17
Number of Encoded Features for dstip
21


In [185]:
val_data['attack_cat'].value_counts()

attack_cat
Normal            55342
Generic           43334
Exploits           8882
Fuzzers            4711
DoS                3308
Reconnaissance     2794
Analysis            570
Backdoor            467
Shellcode           305
Worms                35
Name: count, dtype: int64

In [186]:
X_train = train_data.drop(columns=['attack_cat', 'Label'])
y_train = train_data['Label']
train_cat = train_data['attack_cat']
X_val = val_data.drop(columns=['attack_cat', 'Label'])
y_val = val_data['Label']
val_cat = val_data['attack_cat']
rf_classifier = RandomForestClassifier(random_state=42)
rf_classifier.fit(X_train, y_train)
y_pred = rf_classifier.predict(X_val)
print("Accuracy:", accuracy_score(y_val, y_pred))
print("Precision:", precision_score(y_val, y_pred))
print("Recall:", recall_score(y_val, y_pred))
print("F1 Score:", f1_score(y_val, y_pred))
print("\nClassification Report:\n", classification_report(y_val, y_pred))

Accuracy: 0.9926929886094131
Precision: 0.9873728462494438
Recall: 0.9991926218054219
F1 Score: 0.9932475710548452

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.99      0.99     55342
           1       0.99      1.00      0.99     64406

    accuracy                           0.99    119748
   macro avg       0.99      0.99      0.99    119748
weighted avg       0.99      0.99      0.99    119748



In [187]:
val_data['predicted_label'] = y_pred
val_data['true_label'] = y_val
attack_categories = val_data['attack_cat'].unique()
for category in attack_categories:
    print(f"Evaluation for attack category: {category}")
    category_data = val_data[val_data['attack_cat'] == category]
    if len(category_data) == 0:
        print(f"No samples found for category: {category}")
        print("-" * 50)
        continue
    y_true_category = category_data['true_label']
    y_pred_category = category_data['predicted_label']
    correct_predictions = (y_true_category == y_pred_category).sum()
    total_samples = len(y_true_category)
    accuracy = accuracy_score(y_true_category, y_pred_category)
    precision = precision_score(y_true_category, y_pred_category, zero_division=0)
    recall = recall_score(y_true_category, y_pred_category, zero_division=0)
    f1 = f1_score(y_true_category, y_pred_category, zero_division=0)
    print(f"Total samples: {total_samples}")
    print(f"Correct predictions: {correct_predictions}")
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1-Score: {f1}")
    print("\nClassification Report:\n", classification_report(y_true_category, y_pred_category, zero_division=0))
    print("-" * 50)

Evaluation for attack category: Generic
Total samples: 43334
Correct predictions: 43332
Accuracy: 0.9999538468638944
Precision: 1.0
Recall: 0.9999538468638944
F1-Score: 0.9999769228994069

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      1.00      1.00     43334

    accuracy                           1.00     43334
   macro avg       0.50      0.50      0.50     43334
weighted avg       1.00      1.00      1.00     43334

--------------------------------------------------
Evaluation for attack category: Normal
Total samples: 55342
Correct predictions: 54519
Accuracy: 0.9851288352426728
Precision: 0.0
Recall: 0.0
F1-Score: 0.0

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.99      0.99     55342
           1       0.00      0.00      0.00         0

    accuracy                           0.99     55342
   macro 

# Get Feature importances using different measures.

In [188]:
features1 = select_features(train_data, 'correlation', 30)
features2 = select_features(train_data, 'ft_importance', 30)
features3 = select_features(train_data, 'kbest', 30)

print(features1)
print(features2)
print(features3)

['Label', 'sttl', 'ct_state_ttl', 'state_INT', 'ct_dst_src_ltm', 'ct_dst_sport_ltm', 'proto_tcp', 'swin', 'dwin', 'dmeansz', 'state_FIN', 'ct_src_dport_ltm', 'srcip_175.45.176.1', 'ct_srv_dst', 'ct_srv_src', 'srcip_175.45.176.3', 'dstip_149.171.126.18', 'Dload', 'Ltime', 'Stime', 'ct_src_ ltm', 'ct_dst_ltm', 'service_dns', 'state_CON', 'dsport_53', 'dtcpb', 'stcpb', 'sport_1043', 'proto_udp', 'srcip_175.45.176.0']
['Label', 'ct_state_ttl', 'sttl', 'sbytes', 'dbytes', 'dttl', 'dmeansz', 'dur', 'Dload', 'state_INT', 'Dpkts', 'tcprtt', 'smeansz', 'ackdat', 'Sload', 'ct_dst_sport_ltm', 'synack', 'ct_srv_dst', 'srcip_149.171.126.18', 'dloss', 'Djit', 'ct_src_dport_ltm', 'ct_srv_src', 'Sintpkt', 'dstip_175.45.176.3', 'Dintpkt', 'state_FIN', 'sport_0', 'Spkts', 'dstip_175.45.176.1']
['sttl', 'Dload', 'swin', 'dwin', 'stcpb', 'dtcpb', 'dmeansz', 'Stime', 'Ltime', 'ct_state_ttl', 'ct_srv_src', 'ct_srv_dst', 'ct_dst_ltm', 'ct_src_ ltm', 'ct_src_dport_ltm', 'ct_dst_sport_ltm', 'ct_dst_src_ltm', '

  f = msb / msw
