In [10]:
import pandas as pd
import numpy as np
import glob
import os
from sys import platform
import datetime

# THIS LABELLING SCRIPT IS USED TO LABEL THE CORRECTED VERSION OF CIC-IDS-2017. FOR DETAILS CONSULT OUR WEBSITE:
# https://intrusion-detection.distrinet-research.be/CNS2022/index.html

pd.set_option('display.max_rows', 100)

# Enter the path that contains the CSV files that were generated by the CICFlowMeter tool. There should be five CSV
# files in total, one per day.
DATASET_PATH = ""

# Enter the output path for the fully labelled CSV files
OUTPUT_PATH = ""

# If set to true, a column is added at the front of the CSV with line numbers
print_index = True

In [11]:
# Basic preprocessing before getting started on labelling.
# Deletes rows with "Infinity" and NaNs, converts "Timestamp" to Pandas Datetime, and converts all necessary columns to
# numeric values
def format_csv_for_labelling(df):
    df = df.replace('Infinity', np.nan)
    df['Timestamp'] = pd.to_datetime(df['Timestamp'])
    for column in df.columns:
        if column not in ['Flow ID' , 'Timestamp', 'Src IP', 'Dst IP', 'Label']:
            df[column] = pd.to_numeric(df[column], errors='coerce')

    df.dropna()

    return df.dropna()

def read_csvs_from_path_and_reformat(path):
    df = pd.read_csv(path, encoding='cp1252')

    df = format_csv_for_labelling(df)
    print("labels after pre-processing:", df["Label"].value_counts())

    df["Attempted Category"] = -1

    int64_columns = ["Total TCP Flow Time"]

    int32_columns = ["Src Port", "Dst Port", "Flow Duration", "Total Fwd Packet", "Total Bwd packets", "Total Length of Fwd Packet", "Total Length of Bwd Packet", "Fwd Packet Length Max",
        "Fwd Packet Length Min", "Bwd Packet Length Max", "Bwd Packet Length Min", "Flow IAT Max", "Flow IAT Min", "Fwd IAT Total", "Fwd IAT Max", "Fwd IAT Min", "Bwd IAT Total",
        "Bwd IAT Max", "Bwd IAT Min", "Fwd PSH Flags", "Bwd PSH Flags", "Fwd URG Flags", "Bwd URG Flags", "Packet Length Min", "Packet Length Max", "FIN Flag Count", "SYN Flag Count", "RST Flag Count", "PSH Flag Count",
        "ACK Flag Count", "URG Flag Count", "CWR Flag Count", "ECE Flag Count", "Subflow Fwd Packets", "Subflow Fwd Bytes",
        "Subflow Bwd Packets", "Subflow Bwd Bytes", "FWD Init Win Bytes", "Bwd Init Win Bytes", "Fwd Act Data Pkts", "Fwd Seg Size Min", "Active Max",
        "Active Min", "Idle Max", "Idle Min"]

    int16_columns = ["Fwd Header Length", "Bwd Header Length", "ICMP Code", "ICMP Type"]

    for column in int64_columns:
        df[column] = df[column].astype('int64')

    for column in int32_columns:
        df[column] = df[column].astype('int32')

    for column in int16_columns:
        df[column] = df[column].astype('int16')

    return df

# Main labelling function. Only used for labelling Malicious and Malicious - Attempted flows.
# Timestamps are in NANOSECONDS (!) Unix time. Note that the CSV files are in the UTC timezone.
# df = dataframe with flows. Note that labelling happens inplace on the 'df' parameter, and so this function doesn't return anything
# label = the label that will be given to flows matching the criteria specified in the function
# additional_filters = add any additional constraints that cannot be covered by the already provided function arguments
# see examples in the actual labelling logic for correct syntax
# attempted_category = please consult our website (https://intrusion-detection.distrinet-research.be/CNS2022/Tools_Documentation.html)
# for details on how the "Attempted" categories are defined.
# payload_filter = When set to true, this will automatically add a constraint ["Total Length of Fwd Packet"] == 0. Note that
# the Attempted label and category still need to be specified manually
def label_flows(df, label, attack_start_time_nanoseconds, attack_end_time_nanoseconds, src_ip_list=None,
                dst_ip_list= None, src_port_list=None, dst_port_list=None, additional_filters=[], attempted_category=-1, payload_filter=False):


    # Create initial mask for whole df with all values set to True. Squeeze is necessary to remove second axis (with value 1)
    # The reason is that a df of shape (X,) gets converted to (1,X) if you '&' it with a df of shape (X,1)
    mask = pd.DataFrame(True,index=df.index,columns=[df.columns[0]]).squeeze()

    attack_start_datetime = pd.to_datetime(attack_start_time_nanoseconds, unit='ns')
    attack_end_datetime = pd.to_datetime(attack_end_time_nanoseconds, unit='ns')

    mask &= (df["Timestamp"] >= attack_start_datetime)
    mask &= (df["Timestamp"] <= attack_end_datetime)

    if src_ip_list is not None:
        mask &= (df["Src IP"].isin(src_ip_list))
    if dst_ip_list is not None:
        mask &= (df["Dst IP"].isin(dst_ip_list))

    if src_port_list is not None:
        mask &= (df["Src Port"].isin(src_port_list))
    if dst_port_list is not None:
        mask &= (df["Dst Port"].isin(dst_port_list))

    if payload_filter:
        mask &= (df["Total Length of Fwd Packet"] == 0)

    for filter in additional_filters:
        mask &= filter

    df["Label"].mask(mask, label, inplace=True)
    df["Attempted Category"].mask(mask, attempted_category, inplace=True)

# This function is called when all labelling of malicious flows is completed. Anything that has not yet received a label
# so far is labelled as Benign.
def label_rest_as_benign_and_write_csv(df, file_to_write):
    df["Label"].mask(df["Label"] == "NeedManualLabel", "BENIGN", inplace=True)

    # Relabel artefact flows with [Flow Id] = '8.0.6.4-8.6.0.1-0-0-0' to label = 0
    df["Label"].mask(df["Flow ID"] == '8.0.6.4-8.6.0.1-0-0-0', "BENIGN", inplace=True)

    print("label count after labelling:\r\n", df["Label"].value_counts())
    print("Attempted Category count after labelling:\r\n", df["Attempted Category"].value_counts())

    # Adds line numbers in the first column if print_index is set to true
    if print_index:
        df.reset_index(inplace=True, drop=True)
        df.index += 1
        df.index.name = 'id'
        df.to_csv(file_to_write)
    else:
        df.to_csv(file_to_write, index=False)


In [12]:
monday_df = read_csvs_from_path_and_reformat(DATASET_PATH + "Monday-WorkingHours.pcap_Flow.csv")

label_rest_as_benign_and_write_csv(monday_df, OUTPUT_PATH + "monday.csv")

labels after pre-processing: NeedManualLabel    371624
Name: Label, dtype: int64
label count after labelling:
 BENIGN    371624
Name: Label, dtype: int64
Attempted Category count after labelling:
 -1    371624
Name: Attempted Category, dtype: int64


In [13]:
#--------------------+
# TUESDAY 04-07-2017 |
#--------------------+

tuesday_df = read_csvs_from_path_and_reformat(DATASET_PATH + "Tuesday-WorkingHours.pcap_Flow.csv")

# FTP-PATATOR
# -----------

label_flows(tuesday_df, "FTP-Patator", 1499170672838272000, 1499174416931403000, ["172.16.0.1"],
            ["192.168.10.50"], dst_port_list=[21])

# Default payload filter
label_flows(tuesday_df, "FTP-Patator - Attempted", 1499170672838272000, 1499174416931403000, ["172.16.0.1"],
            ["192.168.10.50"], dst_port_list=[21], payload_filter=True, attempted_category=0)

label_flows(tuesday_df, "FTP-Patator - Attempted", 1499170672838272000, 1499174416931403000, ["172.16.0.1"],
            ["192.168.10.50"], dst_port_list=[21], additional_filters=[(tuesday_df["Src Port"] == 52108)],
            attempted_category=2)

# SSH-Patator
# -----------

label_flows(tuesday_df, "SSH-Patator", 1499188141049616000, 1499195059018486000, ["172.16.0.1"],
            ["192.168.10.50"], dst_port_list=[22])

label_flows(tuesday_df, "SSH-Patator - Attempted", 1499188141049616000, 1499195059018486000, ["172.16.0.1"],
            ["192.168.10.50"], dst_port_list=[22], payload_filter=True, attempted_category=0)

label_flows(tuesday_df, "SSH-Patator - Attempted", 1499188141049616000, 1499195059018486000, ["172.16.0.1"],
            ["192.168.10.50"], dst_port_list=[22], additional_filters=
            [
                (tuesday_df["Total Length of Fwd Packet"] <= 32) & (tuesday_df["Total Length of Bwd Packet"] == 0)
            ], attempted_category=3)

label_rest_as_benign_and_write_csv(tuesday_df, OUTPUT_PATH + "tuesday.csv")

tuesday_df = None

labels after pre-processing: NeedManualLabel    322078
Name: Label, dtype: int64
label count after labelling:
 BENIGN                     315106
FTP-Patator                  3972
SSH-Patator                  2961
SSH-Patator - Attempted        27
FTP-Patator - Attempted        12
Name: Label, dtype: int64
Attempted Category count after labelling:
 -1    322039
 3        27
 0        10
 2         2
Name: Attempted Category, dtype: int64


In [17]:
#----------------------+
# WEDNESDAY 05-07-2017 |
#----------------------+

wednesday_df = read_csvs_from_path_and_reformat(DATASET_PATH + "Wednesday-WorkingHours.pcap_Flow.csv")

# DoS Slowloris
# -------------

label_flows(wednesday_df, "DoS Slowloris", 1499258926211817000, 1499260236498684000, ["172.16.0.1"],
            ["192.168.10.50"], dst_port_list=[80], additional_filters=[
        ~(wednesday_df["Src Port"].isin([33358, 33360, 33362, 54114]))
    ])

# port 33358, 33360 and 33362 contain attack teardown flows
label_flows(wednesday_df, "DoS Slowloris - Attempted", 1499258926211817000, 1499260236498684000, ["172.16.0.1"],
            ["192.168.10.50"], src_port_list=[33358, 33360, 33362], dst_port_list=[80], attempted_category=2)

#Payload filter (order is important, this part needs to come before Attempted category 6)
label_flows(wednesday_df, "DoS Slowloris - Attempted", 1499258926211817000, 1499260236498684000, ["172.16.0.1"],
            ["192.168.10.50"], dst_port_list=[80], attempted_category=0, payload_filter=True, additional_filters=[
        ~(wednesday_df["Src Port"].isin([33358, 33360, 33362, 54114]))
    ])

#Target unresponsive because of DoS, no payloads in these flows
label_flows(wednesday_df, "DoS Slowloris - Attempted", 1499258926211817000, 1499260236498684000,
            ["192.168.10.50"], ["172.16.0.1"], src_port_list=[80], attempted_category=6, additional_filters=[
        ~(wednesday_df["Dst Port"].isin([33358, 33360, 33362, 54114])) & (wednesday_df["Total Length of Bwd Packet"] == 0)
        & (wednesday_df["Flow Duration"] >= 199800)
    ])

# Artefact likely from authors checking the webserver
label_flows(wednesday_df, "DoS Slowloris - Attempted", 1499258926211817000, 1499260236498684000, ["172.16.0.1"],
            ["192.168.10.50"], src_port_list=[54114], dst_port_list=[80], attempted_category=4)

# DoS Slowhttptest
# ----------------

label_flows(wednesday_df, "DoS Slowhttptest", 1499260537936810000, 1499261869331517000, ["172.16.0.1"],
            ["192.168.10.50"], dst_port_list=[80], additional_filters=[
        ~(wednesday_df["Src Port"].isin([33372]))])


# Attack startup artefact
label_flows(wednesday_df, "DoS Slowhttptest - Attempted", 1499260537936810000, 1499261869331517000, ["172.16.0.1"],
            ["192.168.10.50"], src_port_list=[33372], dst_port_list=[80], attempted_category=2)

# Payload filter
label_flows(wednesday_df, "DoS Slowhttptest - Attempted", 1499260537936810000, 1499261869331517000, ["172.16.0.1"],
            ["192.168.10.50"], dst_port_list=[80], attempted_category=0, payload_filter=True, additional_filters=[
        ~(wednesday_df["Src Port"].isin([33372, 37670]))])

# Retransmissions because target web server is brought down
label_flows(wednesday_df, "DoS Slowhttptest - Attempted", 1499260537936810000, 1499261869331517000, ["172.16.0.1"],
            ["192.168.10.50"], dst_port_list=[80], attempted_category=6, additional_filters=[
        ~(wednesday_df["Src Port"].isin([33372, 37670])) & (wednesday_df["Total Length of Fwd Packet"] == 0) &
        (wednesday_df["Flow Duration"] >= 199984) & (wednesday_df["Total Bwd packets"] == 0)
    ]
    )

# Artefact from authors likely checking the webserver
label_flows(wednesday_df, "DoS Slowhttptest - Attempted", 1499260537936810000, 1499261869331517000, ["172.16.0.1"],
            ["192.168.10.50"], src_port_list=[37670], dst_port_list=[80], attempted_category=4)


# DoS Hulk
# --------

# Note that ports 48678 and 43664 have a benign flow launched by attacker IP while attack is already ongoing,
# containing benign HTTP request. This will be labelled as Attack artefact
label_flows(wednesday_df, "DoS Hulk", 1499262203194704000, 1499262299999999999, ["172.16.0.1"],
            ["192.168.10.50"], dst_port_list=[80], additional_filters=[
        ~(wednesday_df["Src Port"].isin([48678 , 43664]))
    ])

#Attack artefact - likely authors checking webserver mid-attack.
label_flows(wednesday_df, "DoS Hulk - Attempted", 1499262203194704000, 1499262299999999999, ["172.16.0.1"],
            ["192.168.10.50"], src_port_list=[48678 , 43664], dst_port_list=[80], attempted_category=4)

# Normal DoS Hulk
label_flows(wednesday_df, "DoS Hulk", 1499262300000000000, 1499263641326171000, ["172.16.0.1"],
            ["192.168.10.50"], dst_port_list=[80])

# Payload filter
label_flows(wednesday_df, "DoS Hulk - Attempted", 1499262203194704000, 1499263641326171000, ["172.16.0.1"],
            ["192.168.10.50"], dst_port_list=[80], payload_filter=True, attempted_category=0, additional_filters=[
        ~(wednesday_df["Src Port"].isin([48678 , 43664]))])

# Artefacts caused by either attack tool or non-empty TCP appendices. Reasoning is that 282 is minimum size of malicious payload
label_flows(wednesday_df, "DoS Hulk - Attempted", 1499262203194704000, 1499263641326171000, ["172.16.0.1"],
            ["192.168.10.50"], dst_port_list=[80], attempted_category=3, additional_filters=[
        ~(wednesday_df["Src Port"].isin([48678 , 43664])) & (wednesday_df["Total Length of Fwd Packet"] > 0)
        & (wednesday_df["Total Length of Fwd Packet"] < 282)
    ])

# DoS GoldenEye
# -------------

label_flows(wednesday_df, "DoS GoldenEye", 1499263803231753000, 1499264408915718000, ["172.16.0.1"],
            ["192.168.10.50"], dst_port_list=[80])

label_flows(wednesday_df, "DoS GoldenEye - Attempted", 1499263803231753000, 1499264408915718000, ["172.16.0.1"],
            ["192.168.10.50"], dst_port_list=[80], attempted_category=0, payload_filter=True)

# Heartbleed
# ----------

label_flows(wednesday_df, "Heartbleed", 1499278335650811000, 1499279563294455000, ["172.16.0.1"],
            ["192.168.10.51"], dst_port_list=[444], additional_filters=[
        (wednesday_df["Src Port"] ==  45022)
    ])

label_flows(wednesday_df, "Heartbleed - Attempted", 1499278335650811000, 1499279563294455000, ["172.16.0.1"],
            ["192.168.10.51"], dst_port_list=[444], attempted_category=0, payload_filter=True, additional_filters=[
        (wednesday_df["Src Port"] ==  45022)])

label_rest_as_benign_and_write_csv(wednesday_df, OUTPUT_PATH + "wednesday.csv")

wednesday_df = None

labels after pre-processing: NeedManualLabel    496641
Name: Label, dtype: int64
label count after labelling:
 BENIGN                          319120
DoS Hulk                        158468
DoS GoldenEye                     7567
DoS Slowloris                     3997
DoS Slowhttptest - Attempted      3368
DoS Slowhttptest                  1740
DoS Slowloris - Attempted         1709
DoS Hulk - Attempted               581
DoS GoldenEye - Attempted           80
Heartbleed                          11
Name: Label, dtype: int64
Attempted Category count after labelling:
 -1    490903
 0      2927
 6      2804
 4         4
 2         3
Name: Attempted Category, dtype: int64
labels after pre-processing: NeedManualLabel    496641
Name: Label, dtype: int64
label count after labelling:
 BENIGN                          319120
DoS Hulk                        158468
DoS GoldenEye                     7567
DoS Slowloris                     3997
DoS Slowhttptest - Attempted      3368
DoS Slowhttptest 

In [18]:
#---------------------+
# THURSDAY 06-07-2017 |
#---------------------+

thursday_df = read_csvs_from_path_and_reformat(DATASET_PATH + "Thursday-WorkingHours.pcap_Flow.csv")

# Web Attack - Brute Force
# ------------------------

label_flows(thursday_df, "Web Attack - Brute Force - Attempted", 1499343354880049000, 1499343531179279000,
            ["172.16.0.1"], ["192.168.10.50"], dst_port_list=[80], attempted_category=2)

label_flows(thursday_df, "Web Attack - Brute Force", 1499343567660566000, 1499346011622209000,
            ["172.16.0.1"], ["192.168.10.50"], dst_port_list=[80], additional_filters=
            [
                (thursday_df["Total Fwd Packet"] > 20) | (thursday_df["Src Port"] == 44464)
            ])

label_flows(thursday_df, "Web Attack - Brute Force - Attempted", 1499343567660566000, 1499346011622209000,
            ["172.16.0.1"], ["192.168.10.50"], dst_port_list=[80], payload_filter=True, attempted_category=0,
            additional_filters=
            [~((thursday_df["Total Fwd Packet"] > 20) | (thursday_df["Src Port"] == 44464))])

label_flows(thursday_df, "Web Attack - Brute Force - Attempted", 1499343567660566000, 1499346011622209000,
            ["172.16.0.1"], ["192.168.10.50"], dst_port_list=[80], attempted_category=4,
            additional_filters=
            [
                (thursday_df["Total Length of Fwd Packet"] > 0) & ~(thursday_df["Src Port"] == 44464) &
                (thursday_df["Total Fwd Packet"] == 5) & (thursday_df["Total Bwd packets"] == 5)
            ])

# Web Attack - XSS
# ----------------

label_flows(thursday_df, "Web Attack - XSS", 1499346935283859000, 1499348121341704000, ["172.16.0.1"],
            ["192.168.10.50"], dst_port_list=[80], additional_filters=
            [
                ~(thursday_df["Src Port"].isin([36180, 36182, 36184, 36186, 36188, 36190])) &
                (thursday_df["Total Fwd Packet"] >= 150)
            ])

label_flows(thursday_df, "Web Attack - XSS - Attempted", 1499346935283859000, 1499348121341704000, ["172.16.0.1"],
            ["192.168.10.50"], dst_port_list=[80], attempted_category=0, payload_filter=True, additional_filters=
            [~(thursday_df["Src Port"].isin([36180, 36182, 36184, 36186, 36188, 36190]))])

label_flows(thursday_df, "Web Attack - XSS - Attempted", 1499346935283859000, 1499348121341704000, ["172.16.0.1"],
            ["192.168.10.50"], dst_port_list=[80], attempted_category=2, additional_filters=
            [
                ~(thursday_df["Src Port"].isin([36180, 36182, 36184, 36186, 36188, 36190])) &
                (thursday_df["Total Length of Fwd Packet"] > 0) & (thursday_df["Total Fwd Packet"] < 150)
            ])

# Web Attack - SQL Injection
# --------------------------

label_flows(thursday_df, "Web Attack - SQL Injection - Attempted", 1499348127852814000, 1499348145720612000,
            ["172.16.0.1"], ["192.168.10.50"], dst_port_list=[80], attempted_category=2,
            additional_filters=[
                thursday_df["Src Port"].isin([36180, 36182, 36184, 36186, 36188])
            ])

label_flows(thursday_df, "Web Attack - SQL Injection", 1499348145732950000, 1499348575320284000,
            ["172.16.0.1"], ["192.168.10.50"], dst_port_list=[80],
            additional_filters=[
                ~(thursday_df["Src Port"].isin([36180, 36182, 36184, 36186, 36188]))
            ])

label_flows(thursday_df, "Web Attack - SQL Injection - Attempted", 1499348127852814000, 1499348145720612000,
            ["172.16.0.1"], ["192.168.10.50"], dst_port_list=[80], attempted_category=0,
            payload_filter=True)


# Infiltration
# 5.1 Dropbox Download
# ------------


label_flows(thursday_df, "Infiltration", 1499361542547210000, 1499366769364731000, ["192.168.10.8"], ["205.174.165.73"])

label_flows(thursday_df, "Infiltration - Attempted", 1499361542547210000, 1499366769364731000, ["192.168.10.8"],
            ["205.174.165.73"], attempted_category=0, payload_filter=True)

label_flows(thursday_df, "Infiltration - Attempted", 1499361228830533000, 1499361301251276000 , ["192.168.10.9"],
            ["205.174.165.73"], attempted_category=2)

# 5.2 Cooldisk Mac

label_flows(thursday_df, "Infiltration", 1499363616453990000, 1499371339347892000, ["192.168.10.25"], ["205.174.165.73"])

label_flows(thursday_df, "Infiltration - Attempted", 1499363616453990000, 1499371339347892000, ["192.168.10.25"],
            ["205.174.165.73"], attempted_category=0, payload_filter=True)

# 5.3 NMAP + Portscan

# Round 1

label_flows(thursday_df, "Infiltration - Portscan", 1499360431706755000, 1499360445728887000, ["172.16.0.1"],
            ["192.168.10.51"], additional_filters=[
        (thursday_df["Src Port"] == 50122) | (thursday_df["Src Port"] == 50133)
    ])

# Round 2

label_flows(thursday_df, "Infiltration - Portscan", 1499362410884008000, 1499362444285175000, ["192.168.10.8"],
            ["192.168.10.5"])

# Round 3

label_flows(thursday_df, "Infiltration - Portscan", 1499364314425162000, 1499366764331875000, ["192.168.10.8"],
            ["192.168.10.5", "192.168.10.9", "192.168.10.12", "192.168.10.14", "192.168.10.15", "192.168.10.16",
            "192.168.10.17", "192.168.10.19", "192.168.10.25", "192.168.10.50", "192.168.10.51"], additional_filters= [
        ~((thursday_df["Fwd Packet Length Max"] == 408) & (thursday_df["Dst IP"] == "192.168.10.50")) &
        ~((thursday_df["Total Length of Fwd Packet"].isin([176, 20514])) & (thursday_df["Dst IP"] == "192.168.10.50"))
    ]
)

label_rest_as_benign_and_write_csv(thursday_df, OUTPUT_PATH + "thursday.csv")

thursday_df = None

labels after pre-processing: NeedManualLabel    362076
Name: Label, dtype: int64
label count after labelling:
 BENIGN                                    288172
Infiltration - Portscan                    71767
Web Attack - Brute Force - Attempted        1292
Web Attack - XSS - Attempted                 655
Web Attack - Brute Force                      73
Infiltration - Attempted                      45
Infiltration                                  36
Web Attack - XSS                              18
Web Attack - SQL Injection                    13
Web Attack - SQL Injection - Attempted         5
Name: Label, dtype: int64
Attempted Category count after labelling:
 -1    360079
 0      1908
 4        71
 2        18
Name: Attempted Category, dtype: int64


In [19]:
#---------------------+
# FRIDAY 07-07-2017 |
#---------------------+

friday_df = read_csvs_from_path_and_reformat(DATASET_PATH + "Friday-WorkingHours.pcap_Flow.csv")

# Portscan
# --------

#First round
label_flows(friday_df, "Portscan", 1499446532117090000, 1499447948582083000, ["172.16.0.1"], ["192.168.10.50"])


#Second round
label_flows(friday_df, "Portscan", 1499449905450532000, 1499451841699238000, ["172.16.0.1"], ["192.168.10.50"])

# Botnet
# ------

label_flows(friday_df, "Botnet", 1499432653990571000, 1499436122903736000, ["192.168.10.15", "192.168.10.9",
            "192.168.10.14", "192.168.10.5", "192.168.10.8"], ["205.174.165.73"])

label_flows(friday_df, "Botnet - Attempted", 1499432653990571000, 1499436122903736000, ["192.168.10.15", "192.168.10.9",
            "192.168.10.14", "192.168.10.5", "192.168.10.8"], ["205.174.165.73"], attempted_category=0,
            payload_filter=True)

label_flows(friday_df, "Botnet - Attempted", 1499436180000000000, 1499457684606663000, ["192.168.10.15", "192.168.10.9",
            "192.168.10.14", "192.168.10.5", "192.168.10.8"], ["205.174.165.73"], attempted_category=1)


# DDoS
# ----

label_flows(friday_df, "DDoS", 1499453791796937000, 1499454972216560000, ["172.16.0.1"], ["192.168.10.50"])

label_flows(friday_df, "DDoS - Attempted", 1499453791796937000, 1499454972216560000, ["172.16.0.1"], ["192.168.10.50"],
            attempted_category=0, payload_filter=True)

label_rest_as_benign_and_write_csv(friday_df, OUTPUT_PATH + "friday.csv")

friday_df = None

labels after pre-processing: NeedManualLabel    547557
Name: Label, dtype: int64
label count after labelling:
 BENIGN                288544
Portscan              159066
DDoS                   95144
Botnet - Attempted      4067
Botnet                   736
Name: Label, dtype: int64
Attempted Category count after labelling:
 -1    543490
 1      4067
Name: Attempted Category, dtype: int64
