# Time Window Feature Extraction

These features are extracted for each x-second time interval for each flow.  
(Same features as per flow feature extraction except for Duration)  

    0. Total Number of Packets
	1. Total Bytes
	2. Largest Packet Size
	3. Smallest Packet Size
	4. Number of ARP Packets
	5. Number of DNS Packets
	6. Number of TCP ACKs
	7. Minimum Advertised Receive Window
	8. Maximum Advertised Receive Window
	9. Direction (1 for outgoing, 0 for incoming)
	10. Std. Dev of packet size
	11. Average Packet Size
	12. Size of first 10 packets
	13. Number of TCP FIN
	14. Number of TCP SYN
	15. Number of TCP RSTS
	16. Number of TCP PUSH 
	17. Number of TCP URG
	18. Number of TCP CWR (Congestion Window Reduced)
	19. Number of TCP ECE (Explicit Congestion Notification Echo)
	20. Average Packet Inter-Arrival Time
    21. Max. Inter-arrival time
	22. Min. Inter-arrival time
	23. Avg. Packet Throughput (packets/second)
    24. Avg. Byte Throughput (bytes/second)  
    25: Standard Deviation of Packet Inter-arrival Time
    
These are extracted for different time intervals. 

In [1]:
import numpy as np
import netaddr
import csv
import math
import datetime

#
# Extracts a feature vector for each time window for each flow in the input file.
#
# A flow is defined by a unique bi-directional tuple: 
# {IP Source Address, Source Port, IP Destination Address, Destination Port, Protocol}
# 
# The 25 features above are extracted for each win_len-second window for all flows in the given datafile. 
# The src_ip parameter is used to determine the Direction feature
#
def extract_win_features(filename, src_ip, win_len):
#     print('yeet')
    # Dictionary holding values for feature calculations with keys of:
    # 'ip_source-source_port-ip_dest-dest_port-protocol'
    flows = {}
    
    # Returned feature values extracted from the given file
    feature_vecs = []
    
    # Reading in the CSV file of packet captures
    with open(filename, 'r') as data_csv:
        reader = csv.reader(data_csv, delimiter=',')
        # Loop through all lines (all packets), get statistics of every window if every flow
        for index, line in enumerate(reader):
            # Line/CSV row format (represents a packet): 
            # [0:Time (Packet Arrival) 1:Source IP 2:Dest IP 3:Protocol 
            #  4: Length 5:Info 6:Source Port 7: Dest Port]
            if line[0] == 'Time':
                continue
            # Parse out fields needed to construct key (if not the first CSV line)
            ip_src = line[1]
            src_port = line[6]
            ip_dest = line[2]
            dest_port = line[7]
            protocol = line[3]
            time = float(line[0])
            
            key = ip_src + '-' + src_port + '-' + ip_dest + '-' + dest_port + '-' + protocol
            
            # Create new flow and flow statistics if a new flow is encountered
            if key not in flows: 
                flows[key] = {'tot_bytes': 0, 'direction': 0, 'ARP': 0, 'DNS': 0,  
                              'ACK': 0, 'min_arw': 1000000, 'max_arw': 0, 'FIN': 0,
                              'SYN': 0, 'RST': 0, 'PUSH': 0, 'URG': 0, 'CWE': 0, 
                              'ECE': 0, 'all_sizes': [], 'all_intervals': [], 
                              'last_arrival': None, 'win_start': time, 'win_end': time + win_len}
                # Direction of flow is consistent throughout all packets
                flows[key]['direction'] = 1 if ip_src == src_ip else 0
            
            # Update flow statistics based on current packet's information
            flow_stats = flows[key]
            length = float(line[4])
            info = line[5]
                
            # If arrival of current packet is past the window's time interval,
            # featurize current stats and reset stats for next window 
            if time > flow_stats['win_end']:
                new_win_start = flow_stats['win_end']
                # if packet is arriving more than 1 window-interval away from the window end of the most recent packet
                if time > new_win_start + win_len:
                    new_win_end = new_win_start + win_len
                    # add as many feature vectors representing empty windows as needed, until window of current 
                    # arrival is reached 
                    while new_win_end < time:
                        direction = 1 if ip_src == src_ip else 0
                        empty_features = np.array([0, 0, 0, 0, 0, 0, 0,
                                          1000000, 0, direction, 0, 0, 0,
                                          0, 0, 0, 0, 0, 0, 0, 0,
                                          0, 0, 0, 0, 0])
                        feature_vecs.append(empty_features)
                        new_win_end += win_len 
                    new_win_start = new_win_end - win_len
                if len(flow_stats['all_sizes']) == 1:
                    flow_features = np.array([all_pckt_sizes.size, flow_stats['tot_bytes'], np.max(all_pckt_sizes),
                                          np.min(all_pckt_sizes), flow_stats['ACK'], flow_stats['DNS'], flow_stats['ACK'],
                                          flow_stats['min_arw'], flow_stats['max_arw'], flow_stats['direction'], 
                                          0, 0, np.sum(all_pckt_sizes),
                                          flow_stats['FIN'], flow_stats['SYN'], flow_stats['RST'], flow_stats['PUSH'],
                                          flow_stats['URG'], flow_stats['CWE'], flow_stats['ECE'], 0,
                                          0, 0, 0, 0, 0])
                else:
                    all_pckt_sizes = np.array(flow_stats['all_sizes'])
                    all_pckt_intervals = np.array(flow_stats['all_intervals'])
                    # first 10 packet sizes aren't used for windows...
                    first_sizes = np.sum(all_pckt_sizes[:10]) if all_pckt_sizes.size >= 10 else np.sum(all_pckt_sizes)
                    byte_rate = flow_stats['tot_bytes']/ win_len
                    pckt_rate = all_pckt_sizes.size/ win_len
                    flow_features = np.array([all_pckt_sizes.size, flow_stats['tot_bytes'], np.max(all_pckt_sizes),
                                              np.min(all_pckt_sizes), flow_stats['ARP'], flow_stats['DNS'], flow_stats['ACK'],
                                              flow_stats['min_arw'], flow_stats['max_arw'], flow_stats['direction'], 
                                              np.std(all_pckt_sizes), np.average(all_pckt_sizes), first_sizes,
                                              flow_stats['FIN'], flow_stats['SYN'], flow_stats['RST'], flow_stats['PUSH'],
                                              flow_stats['URG'], flow_stats['CWE'], flow_stats['ECE'], np.average(all_pckt_intervals),
                                              np.max(all_pckt_intervals), np.min(all_pckt_intervals), 
                                              pckt_rate, byte_rate, np.std(all_pckt_intervals)]) #np.std(all_pckt_intervals)
                feature_vecs.append(flow_features) 
                flows[key] = {'tot_bytes': 0, 'direction': 0, 'ARP': 0, 'DNS': 0,  
                              'ACK': 0, 'min_arw': 1000000, 'max_arw': 0, 'FIN': 0,
                              'SYN': 0, 'RST': 0, 'PUSH': 0, 'URG': 0, 'CWE': 0, 
                              'ECE': 0, 'all_sizes': [], 'all_intervals': [], 
                              'last_arrival': new_win_start, 'win_start': new_win_start, 'win_end': new_win_start + win_len}
                flow_stats = flows[key]
            
            # Total bytes update
            flow_stats['tot_bytes'] += length
            # Number of ARP Packets update
            if 'ARP' in protocol:
                flow_stats['ARP'] += 1
            # Number of DNS Packets update
            if 'DNS' in protocol:
                flow_stats['DNS'] += 1
            # Number of ACK Packets update
            if protocol == 'TCP' and 'ACK' in info:
                flow_stats['ACK'] += 1
            # Minimum or Maximum Advertised Receive Window update
            if protocol == 'TCP' and 'Win=' in info:
                win_ind = info.index('Win=')
                # Window size is immediately after the 'Win=' and goes until next space
                win_size = int(info[win_ind+4 : info.index(' ', win_ind)])
                if win_size < flow_stats['min_arw']:
                    flow_stats['min_arw'] = win_size
                elif win_size > flow_stats['max_arw']:
                    flow_stats['max_arw'] = win_size
            # Number of FIN Packets update
            if protocol == 'TCP' and 'FIN' in info:
                flow_stats['FIN'] += 1
            # Number of SYN Packets update
            if protocol == 'TCP' and 'SYN' in info:
                flow_stats['SYN'] += 1
            # Number of RST Packets update
            if protocol == 'TCP' and 'RST' in info:
                flow_stats['RST'] += 1
            # Number of PUSH Packets update
            if protocol == 'TCP' and 'PUSH' in info:
                flow_stats['PUSH'] += 1
            # Number of URG Packets update
            if protocol == 'TCP' and 'URG' in info:
                flow_stats['URG'] += 1
            # Number of CWE Packets update
            if protocol == 'TCP' and ('CWE' in info or 'CWR' in info):
                flow_stats['CWE'] += 1
            # Number of ECE Packets update
            if protocol == 'TCP' and 'ECE' in info:
                flow_stats['ECE'] += 1
            # Adding this packet's size to array of all packet sizes
            flow_stats['all_sizes'].append(length)
            # Adding interval between this packet's arrival and last packet's arrival 
            # to array of all inter-packet arrival times, but don't take the first packet's time
            if flow_stats['last_arrival'] is not None:
                flow_stats['all_intervals'].append(time - flow_stats['last_arrival'])
            # Update last arrival time
            flow_stats['last_arrival'] = time
    
    # Return a list of feature vectors, one per flow 
    return feature_vecs

In [2]:
data_dir = "../DT-Data/"
dtn1_ip = '204.99.128.105'
clustereddtn_ip = '204.99.128.81'
kchow_ip = '155.101.8.11'
airplane2_ip = '204.99.128.82'
gdrive_ip = '172.217.11.170'
gdrive_ip2 = '172.217.4.138'
gdrive_ip3 = '172.217.5.74'

In [29]:
# ALL 2 SECOND WINDOW FEATURES

# Globus Flows feature extractions
globus_dtn1_src1 = data_dir + 'globus-dtn1-src-iso.csv'
globus_dtn1_src1 = extract_win_features(globus_dtn1_src1, dtn1_ip, 2.0)
print(f'Number of flow feature vectors from globus-dtn1-src-iso: {len(globus_dtn1_src1)}')

globus_dtn1_dest1 = data_dir + 'globus-dtn1-dest-iso.csv'
globus_dtn1_dest1 = extract_win_features(globus_dtn1_dest1, kchow_ip, 2.0)
print(f'Number of flow feature vectors from globus-dtn1-dest-iso: {len(globus_dtn1_dest1)}')

globus_dtn1_src2 = data_dir + 'globus-dtn1-src-iso2.csv'
globus_dtn1_src2 = extract_win_features(globus_dtn1_src2, dtn1_ip, 2.0)
print(f'Number of flow feature vectors from globus-dtn1-src-iso2.csv: {len(globus_dtn1_src2)}')

globus_dtn1_dest2 = data_dir + 'globus-dtn1-dest-iso2.csv'
globus_dtn1_dest2 = extract_win_features(globus_dtn1_dest2, kchow_ip, 2.0)
print(f'Number of flow feature vectors from globus_dtn1_dest2: {len(globus_dtn1_dest2)}')

globus_clusterdtn_src = data_dir + 'globus-clusterdtn-src-iso.csv'
globus_clusterdtn_src = extract_win_features(globus_clusterdtn_src, clustereddtn_ip, 2.0)
print(f'Number of flow feature vectors from globus_clusterdtn_src.csv: {len(globus_clusterdtn_src)}')

globus_clusterdtn_dest = data_dir + 'globus-clusterdtn-dest-iso.csv'
globus_clusterdtn_dest = extract_win_features(globus_clusterdtn_dest, kchow_ip, 2.0)
print(f'Number of flow feature vectors from globus-clusterdtn-dest-iso.csv: {len(globus_clusterdtn_dest)}')

# FDT Flows feature extractions
fdt_a2_src = data_dir + 'fdt-airplane2-src-iso.csv'
fdt_a2_src = extract_win_features(fdt_a2_src, airplane2_ip, 2.0)
print(f'Number of flow feature vectors from fdt-airplane2-src-iso.csv: {len(fdt_a2_src)}')

fdt_a2_dest = data_dir + 'fdt-airplane2-dest-iso.csv'
fdt_a2_dest = extract_win_features(fdt_a2_dest, kchow_ip, 2.0)
print(f'Number of flow feature vectors from fdt-airplane2-dest-iso.csv: {len(fdt_a2_dest)}')

# 1 stream configured transfer
fdt_a2_dest_1str = data_dir + 'fdt-airplane2-dest-iso-1stream.csv'
fdt_a2_dest_1str = extract_win_features(fdt_a2_dest_1str, kchow_ip, 2.0)
print(f'Number of flow feature vectors from fdt-airplane2-dest-iso-1stream.csv: {len(fdt_a2_dest_1str)}')

# 2 stream configured transfer
fdt_a2_dest_2str = data_dir + 'fdt-airplane2-dest-iso-2stream.csv'
fdt_a2_dest_2str = extract_win_features(fdt_a2_dest_2str, kchow_ip, 2.0)
print(f'Number of flow feature vectors from fdt-airplane2-dest-iso-2stream: {len(fdt_a2_dest_2str)}')

fdt_dtn1_dest = data_dir + 'fdt-dtn1-dest-iso.csv'
fdt_dtn1_dest = extract_win_features(fdt_dtn1_dest, kchow_ip, 2.0)
print(f'Number of flow feature vectors from fdt-dtn1-dest-iso: {len(fdt_dtn1_dest)}')

fdt_dtn1_src = data_dir + 'fdt-dtn1-src-iso.csv'
fdt_dtn1_src = extract_win_features(fdt_dtn1_src, dtn1_ip, 2.0)
print(f'Number of flow feature vectors from fdt-dtn1-src-iso: {len(fdt_dtn1_src)}')

# RClone Flows feature extractions
rclone_src = data_dir + 'rclone-gdrive-src-iso.csv'
rclone_src = extract_win_features(rclone_src, gdrive_ip, 2.0)
print(f'Number of flow feature vectors from rclone-gdrive-src-iso: {len(rclone_src)}')

rclone_dest = data_dir + 'rclone-gdrive-dest-iso.csv'
rclone_dest = extract_win_features(rclone_dest, gdrive_ip, 2.0)
print(f'Number of flow feature vectors from rclone-gdrive-src-iso: {len(rclone_dest)}')

rclone_src2 = data_dir + 'rclone-gdrive-src-iso2.csv'
rclone_src2 = extract_win_features(rclone_src2, gdrive_ip2, 2.0)
print(f'Number of flow feature vectors from rclone-gdrive-src-iso2: {len(rclone_src2)}')

rclone_dest2 = data_dir + 'rclone-gdrive-dest-iso2.csv'
rclone_dest2 = extract_win_features(rclone_dest2, kchow_ip, 2.0)
print(f'Number of flow feature vectors from rclone-gdrive-dest-iso2: {len(rclone_dest2)}')

rclone_src3 = data_dir + 'rclone-gdrive-src-iso3.csv'
rclone_src3 = extract_win_features(rclone_src3, gdrive_ip3, 2.0)
print(f'Number of flow feature vectors from rclone-gdrive-src-iso2: {len(rclone_src3)}')

rclone_dest3 = data_dir + 'rclone-gdrive-dest-iso3.csv'
rclone_dest3 = extract_win_features(rclone_dest3, kchow_ip, 2.0)
print(f'Number of flow feature vectors from rclone-gdrive-dest-iso3: {len(rclone_dest3)}')

# Creating dictionary of all 2 second window feature vectors mapping by capture file, saving it
two_sec_dict = {'globus_dtn1_src1' : globus_dtn1_src1, 'globus_dtn1_dest1' : globus_dtn1_dest1, 
             'globus_dtn1_src2' : globus_dtn1_src2, 'globus_dtn1_dest2' : globus_dtn1_dest2,
             'globus_clusterdtn_src' : globus_clusterdtn_src, 'globus_clusterdtn_dest' : globus_clusterdtn_dest,
             'fdt_a2_src' : fdt_a2_src, 'fdt_a2_dest' : fdt_a2_src, 'fdt_dtn1_dest' : fdt_dtn1_dest, 
             'fdt_dtn1_src' : fdt_dtn1_src, 'fdt_a2_dest_1str' : fdt_a2_dest_1str, 'fdt_a2_dest_2str' : fdt_a2_dest_2str,
             'rclone_src' : rclone_src, 'rclone_dest' : rclone_dest, 'rclone_src2': rclone_src2, 
             'rclone_dest2': rclone_dest2, 'rclone_src3' : rclone_src3, 'rclone_dest3' : rclone_dest3}

np.save('../Feature-Vectors/2_second_features.npy', two_sec_dict)

Number of flow feature vectors from globus-dtn1-src-iso: 285
Number of flow feature vectors from globus-dtn1-dest-iso: 168
Number of flow feature vectors from globus-dtn1-src-iso2.csv: 612
Number of flow feature vectors from globus_dtn1_dest2: 144
Number of flow feature vectors from globus_clusterdtn_src.csv: 75
Number of flow feature vectors from globus-clusterdtn-dest-iso.csv: 120
Number of flow feature vectors from fdt-airplane2-src-iso.csv: 104
Number of flow feature vectors from fdt-airplane2-dest-iso.csv: 88
Number of flow feature vectors from fdt-airplane2-dest-iso-1stream.csv: 26
Number of flow feature vectors from fdt-airplane2-dest-iso-2stream: 52
Number of flow feature vectors from fdt-dtn1-dest-iso: 136
Number of flow feature vectors from fdt-dtn1-src-iso: 104
Number of flow feature vectors from rclone-gdrive-src-iso: 133
Number of flow feature vectors from rclone-gdrive-src-iso: 207
Number of flow feature vectors from rclone-gdrive-src-iso2: 145
Number of flow feature vect

In [3]:
# ALL 4 SECOND WINDOW FEATURES

# Globus Flows feature extractions
globus_dtn1_src1 = data_dir + 'globus-dtn1-src-iso.csv'
globus_dtn1_src1 = extract_win_features(globus_dtn1_src1, dtn1_ip, 4.0)
print(f'Number of flow feature vectors from globus-dtn1-src-iso: {len(globus_dtn1_src1)}')

globus_dtn1_dest1 = data_dir + 'globus-dtn1-dest-iso.csv'
globus_dtn1_dest1 = extract_win_features(globus_dtn1_dest1, kchow_ip, 4.0)
print(f'Number of flow feature vectors from globus-dtn1-dest-iso: {len(globus_dtn1_dest1)}')

globus_dtn1_src2 = data_dir + 'globus-dtn1-src-iso2.csv'
globus_dtn1_src2 = extract_win_features(globus_dtn1_src2, dtn1_ip, 4.0)
print(f'Number of flow feature vectors from globus-dtn1-src-iso2.csv: {len(globus_dtn1_src2)}')

globus_dtn1_dest2 = data_dir + 'globus-dtn1-dest-iso2.csv'
globus_dtn1_dest2 = extract_win_features(globus_dtn1_dest2, kchow_ip, 4.0)
print(f'Number of flow feature vectors from globus-dtn1-dest-iso2.csv: {len(globus_dtn1_dest2)}')

globus_clusterdtn_src = data_dir + 'globus-clusterdtn-src-iso.csv'
globus_clusterdtn_src = extract_win_features(globus_clusterdtn_src, clustereddtn_ip, 4.0)
print(f'Number of flow feature vectors from globus-clusterdtn-src-iso.csv: {len(globus_clusterdtn_src)}')

globus_clusterdtn_dest = data_dir + 'globus-clusterdtn-dest-iso.csv'
globus_clusterdtn_dest = extract_win_features(globus_clusterdtn_dest, kchow_ip, 4.0)
print(f'Number of flow feature vectors from globus-clusterdtn-dest-iso.csv: {len(globus_clusterdtn_dest)}')

# FDT Flows feature extractions
fdt_a2_src = data_dir + 'fdt-airplane2-src-iso.csv'
fdt_a2_src = extract_win_features(fdt_a2_src, airplane2_ip, 4.0)
print(f'Number of flow feature vectors from fdt-airplane2-src-iso: {len(fdt_a2_src)}')

fdt_a2_dest = data_dir + 'fdt-airplane2-dest-iso.csv'
fdt_a2_dest = extract_win_features(fdt_a2_dest, kchow_ip, 4.0)
print(f'Number of flow feature vectors from fdt-airplane2-dest-iso: {len(fdt_a2_dest)}')

# 1 stream configured transfer
fdt_a2_dest_1str = data_dir + 'fdt-airplane2-dest-iso-1stream.csv'
fdt_a2_dest_1str = extract_win_features(fdt_a2_dest_1str, kchow_ip, 4.0)
print(f'Number of flow feature vectors from fdt-airplane2-dest-iso-1stream: {len(fdt_a2_dest_1str)}')

# 2 stream configured transfer
fdt_a2_dest_2str = data_dir + 'fdt-airplane2-dest-iso-2stream.csv'
fdt_a2_dest_2str = extract_win_features(fdt_a2_dest_2str, kchow_ip, 4.0)
print(f'Number of flow feature vectors from fdt-airplane2-dest-iso-2stream: {len(fdt_a2_dest_2str)}')

fdt_dtn1_dest = data_dir + 'fdt-dtn1-dest-iso.csv'
fdt_dtn1_dest = extract_win_features(fdt_dtn1_dest, kchow_ip, 4.0)
print(f'Number of flow feature vectors from fdt-dtn1-dest-iso: {len(fdt_dtn1_dest)}')

fdt_dtn1_src = data_dir + 'fdt-dtn1-src-iso.csv'
fdt_dtn1_src = extract_win_features(fdt_dtn1_src, dtn1_ip, 4.0)
print(f'Number of flow feature vectors from fdt-dtn1-src-iso: {len(fdt_dtn1_src)}')

#RClone Flows feature extractions
rclone_src = data_dir + 'rclone-gdrive-src-iso.csv'
rclone_src = extract_win_features(rclone_src, gdrive_ip, 4.0)
print(f'Number of flow feature vectors from rclone-gdrive-src-iso: {len(rclone_src)}')

rclone_dest = data_dir + 'rclone-gdrive-dest-iso.csv'
rclone_dest = extract_win_features(rclone_dest, gdrive_ip, 4.0)
print(f'Number of flow feature vectors from rclone-gdrive-src-iso: {len(rclone_dest)}')

rclone_src2 = data_dir + 'rclone-gdrive-src-iso2.csv'
rclone_src2 = extract_win_features(rclone_src2, gdrive_ip2, 4.0)
print(f'Number of flow feature vectors from rclone-gdrive-src-iso2: {len(rclone_src2)}')

rclone_dest2 = data_dir + 'rclone-gdrive-dest-iso2.csv'
rclone_dest2 = extract_win_features(rclone_dest2, kchow_ip, 4.0)
print(f'Number of flow feature vectors from rclone-gdrive-dest-iso2: {len(rclone_dest2)}')

rclone_src3 = data_dir + 'rclone-gdrive-src-iso3.csv'
rclone_src3 = extract_win_features(rclone_src3, gdrive_ip3, 4.0)
print(f'Number of flow feature vectors from rclone-gdrive-src-iso2: {len(rclone_src3)}')

rclone_dest3 = data_dir + 'rclone-gdrive-dest-iso3.csv'
rclone_dest3 = extract_win_features(rclone_dest3, kchow_ip, 4.0)
print(f'Number of flow feature vectors from rclone-gdrive-dest-iso3: {len(rclone_dest3)}')

#Creating dictionary of all 2 second window feature vectors mapping by capture file, saving it
four_sec_dict = {'globus_dtn1_src1' : globus_dtn1_src1, 'globus_dtn1_dest1' : globus_dtn1_dest1, 
             'globus_dtn1_src2' : globus_dtn1_src2, 'globus_dtn1_dest2' : globus_dtn1_dest2,
             'globus_clusterdtn_src' : globus_clusterdtn_src, 'globus_clusterdtn_dest' : globus_clusterdtn_dest,
             'fdt_a2_src' : fdt_a2_src, 'fdt_a2_dest' : fdt_a2_src, 'fdt_dtn1_dest' : fdt_dtn1_dest, 
             'fdt_dtn1_src' : fdt_dtn1_src, 'fdt_a2_dest_1str' : fdt_a2_dest_1str, 'fdt_a2_dest_2str' : fdt_a2_dest_2str,
             'rclone_src' : rclone_src, 'rclone_dest' : rclone_dest, 'rclone_src2': rclone_src2, 
             'rclone_dest2': rclone_dest2, 'rclone_src3' : rclone_src3, 'rclone_dest3' : rclone_dest3}

np.save('../Feature-Vectors/4_second_features.npy', four_sec_dict)

Number of flow feature vectors from globus-dtn1-src-iso: 142
Number of flow feature vectors from globus-dtn1-dest-iso: 84
Number of flow feature vectors from globus-dtn1-src-iso2.csv: 306
Number of flow feature vectors from globus-dtn1-dest-iso2.csv: 72
Number of flow feature vectors from globus-clusterdtn-src-iso.csv: 37
Number of flow feature vectors from globus-clusterdtn-dest-iso.csv: 56
Number of flow feature vectors from fdt-airplane2-src-iso: 48
Number of flow feature vectors from fdt-airplane2-dest-iso: 40
Number of flow feature vectors from fdt-airplane2-dest-iso-1stream: 12
Number of flow feature vectors from fdt-airplane2-dest-iso-2stream: 24
Number of flow feature vectors from fdt-dtn1-dest-iso: 64
Number of flow feature vectors from fdt-dtn1-src-iso: 48
Number of flow feature vectors from rclone-gdrive-src-iso: 65
Number of flow feature vectors from rclone-gdrive-src-iso: 103
Number of flow feature vectors from rclone-gdrive-src-iso2: 71
Number of flow feature vectors from

In [26]:
# ALL 8 SECOND WINDOW FEATURES

# Globus Flows feature extractions
globus_dtn1_src1 = data_dir + 'globus-dtn1-src-iso.csv'
globus_dtn1_src1 = extract_win_features(globus_dtn1_src1, dtn1_ip, 8.0)
print(f'Number of flow feature vectors from globus-dtn1-src-iso: {len(globus_dtn1_src1)}')

globus_dtn1_dest1 = data_dir + 'globus-dtn1-dest-iso.csv'
globus_dtn1_dest1 = extract_win_features(globus_dtn1_dest1, kchow_ip, 8.0)
print(f'Number of flow feature vectors from globus-dtn1-dest-iso: {len(globus_dtn1_dest1)}')

globus_dtn1_src2 = data_dir + 'globus-dtn1-src-iso2.csv'
globus_dtn1_src2 = extract_win_features(globus_dtn1_src2, dtn1_ip, 8.0)
print(f'Number of flow feature vectors from globus-dtn1-src-iso2.csv: {len(globus_dtn1_src2)}')

globus_dtn1_dest2 = data_dir + 'globus-dtn1-dest-iso2.csv'
globus_dtn1_dest2 = extract_win_features(globus_dtn1_dest2, kchow_ip, 8.0)
print(f'Number of flow feature vectors from globus-dtn1-dest-iso: {len(globus_dtn1_dest2)}')

globus_clusterdtn_src = data_dir + 'globus-clusterdtn-src-iso.csv'
globus_clusterdtn_src = extract_win_features(globus_clusterdtn_src, clustereddtn_ip, 8.0)
print(f'Number of flow feature vectors from globus-dtn1-src-iso2.csv: {len(globus_clusterdtn_src)}')

globus_clusterdtn_dest = data_dir + 'globus-clusterdtn-dest-iso.csv'
globus_clusterdtn_dest = extract_win_features(globus_clusterdtn_dest, kchow_ip, 8.0)
print(f'Number of flow feature vectors from globus-dtn1-dest-iso: {len(globus_clusterdtn_dest)}')

# FDT Flows feature extractions
fdt_a2_src = data_dir + 'fdt-airplane2-src-iso.csv'
fdt_a2_src = extract_win_features(fdt_a2_src, airplane2_ip, 8.0)
print(f'Number of flow feature vectors from fdt-airplane2-src-iso: {len(fdt_a2_src)}')

fdt_a2_dest = data_dir + 'fdt-airplane2-dest-iso.csv'
fdt_a2_dest = extract_win_features(fdt_a2_dest, kchow_ip, 8.0)
print(f'Number of flow feature vectors from fdt-airplane2-dest-iso: {len(fdt_a2_dest)}')

# 1 stream configured transfer
fdt_a2_dest_1str = data_dir + 'fdt-airplane2-dest-iso-1stream.csv'
fdt_a2_dest_1str = extract_win_features(fdt_a2_dest_1str, kchow_ip, 8.0)
print(f'Number of flow feature vectors from fdt-airplane2-dest-iso-1stream: {len(fdt_a2_dest_1str)}')

# 2 stream configured transfer
fdt_a2_dest_2str = data_dir + 'fdt-airplane2-dest-iso-2stream.csv'
fdt_a2_dest_2str = extract_win_features(fdt_a2_dest_2str, kchow_ip, 8.0)
print(f'Number of flow feature vectors from fdt-airplane2-dest-iso-2stream: {len(fdt_a2_dest_2str)}')

fdt_dtn1_dest = data_dir + 'fdt-dtn1-dest-iso.csv'
fdt_dtn1_dest = extract_win_features(fdt_dtn1_dest, kchow_ip, 8.0)
print(f'Number of flow feature vectors from fdt-dtn1-dest-iso: {len(fdt_dtn1_dest)}')

fdt_dtn1_src = data_dir + 'fdt-dtn1-src-iso.csv'
fdt_dtn1_src = extract_win_features(fdt_dtn1_src, dtn1_ip, 8.0)
print(f'Number of flow feature vectors from fdt-dtn1-src-iso: {len(fdt_dtn1_src)}')

# RClone Flows feature extractions
rclone_src = data_dir + 'rclone-gdrive-src-iso.csv'
rclone_src = extract_win_features(rclone_src, gdrive_ip, 8.0)
print(f'Number of flow feature vectors from rclone-gdrive-src-iso: {len(rclone_src)}')

rclone_dest = data_dir + 'rclone-gdrive-dest-iso.csv'
rclone_dest = extract_win_features(rclone_dest, gdrive_ip, 8.0)
print(f'Number of flow feature vectors from rclone-gdrive-src-iso: {len(rclone_dest)}')

rclone_src2 = data_dir + 'rclone-gdrive-src-iso2.csv'
rclone_src2 = extract_win_features(rclone_src2, gdrive_ip2, 8.0)
print(f'Number of flow feature vectors from rclone-gdrive-src-iso2: {len(rclone_src2)}')

rclone_dest2 = data_dir + 'rclone-gdrive-dest-iso2.csv'
rclone_dest2 = extract_win_features(rclone_dest2, kchow_ip, 8.0)
print(f'Number of flow feature vectors from rclone-gdrive-dest-iso2: {len(rclone_dest2)}')

rclone_src3 = data_dir + 'rclone-gdrive-src-iso3.csv'
rclone_src3 = extract_win_features(rclone_src3, gdrive_ip3, 8.0)
print(f'Number of flow feature vectors from rclone-gdrive-src-iso2: {len(rclone_src3)}')

rclone_dest3 = data_dir + 'rclone-gdrive-dest-iso3.csv'
rclone_dest3 = extract_win_features(rclone_dest3, kchow_ip, 8.0)
print(f'Number of flow feature vectors from rclone-gdrive-dest-iso3: {len(rclone_dest3)}')

# Creating dictionary of all 2 second window feature vectors mapping by capture file, saving it
eight_sec_dict = {'globus_dtn1_src1' : globus_dtn1_src1, 'globus_dtn1_dest1' : globus_dtn1_dest1, 
             'globus_dtn1_src2' : globus_dtn1_src2, 'globus_dtn1_dest2' : globus_dtn1_dest2,
             'globus_clusterdtn_src' : globus_clusterdtn_src, 'globus_clusterdtn_dest' : globus_clusterdtn_dest,
             'fdt_a2_src' : fdt_a2_src, 'fdt_a2_dest' : fdt_a2_src, 'fdt_dtn1_dest' : fdt_dtn1_dest, 
             'fdt_dtn1_src' : fdt_dtn1_src, 'fdt_a2_dest_1str' : fdt_a2_dest_1str, 'fdt_a2_dest_2str' : fdt_a2_dest_2str,
             'rclone_src' : rclone_src, 'rclone_dest' : rclone_dest, 'rclone_src2': rclone_src2, 
             'rclone_dest2': rclone_dest2, 'rclone_src3' : rclone_src3, 'rclone_dest3' : rclone_dest3}

np.save('../Feature-Vectors/8_second_features.npy', eight_sec_dict)

Number of flow feature vectors from globus-dtn1-src-iso: 69
Number of flow feature vectors from globus-dtn1-dest-iso: 40
Number of flow feature vectors from globus-dtn1-src-iso2.csv: 153
Number of flow feature vectors from globus-dtn1-dest-iso: 32
Number of flow feature vectors from globus-dtn1-src-iso2.csv: 17
Number of flow feature vectors from globus-dtn1-dest-iso: 24
Number of flow feature vectors from fdt-airplane2-src-iso: 24
Number of flow feature vectors from fdt-airplane2-dest-iso: 16
Number of flow feature vectors from fdt-airplane2-dest-iso-1stream: 6
Number of flow feature vectors from fdt-airplane2-dest-iso-2stream: 12
Number of flow feature vectors from fdt-dtn1-dest-iso: 32
Number of flow feature vectors from fdt-dtn1-src-iso: 24
Number of flow feature vectors from rclone-gdrive-src-iso: 32
Number of flow feature vectors from rclone-gdrive-src-iso: 51
Number of flow feature vectors from rclone-gdrive-src-iso2: 34
Number of flow feature vectors from rclone-gdrive-dest-iso