In [1]:
import numpy as np
import pandas as pd
import os
import glob
import csv
from pcapfile import savefile
from pcapfile.protocols.linklayer import ethernet
from pcapfile.protocols.network import ip
from pcapfile.protocols.transport import tcp
import binascii
from datetime import datetime
import pytz

In [2]:
pcap_data_path = '/media/mo/HDD/intrusion_detection/dataset/OriginalNetwork TrafficandLogdata/'
csv_data_path = '/media/mo/HDD/intrusion_detection/dataset/ProcessedTrafficDataforMLAlgorithms/'
csv_dist_path = '/media/mo/HDD/intrusion_detection/dataset/AttacksRecords/'

In [3]:
#Processed Traffic Data
files = ['Wednesday-14-02-2018_TrafficForML_CICFlowMeter.csv',
        'Thursday-15-02-2018_TrafficForML_CICFlowMeter.csv',
        'Friday-16-02-2018_TrafficForML_CICFlowMeter.csv',
        'Thuesday-20-02-2018_TrafficForML_CICFlowMeter.csv',
        'Wednesday-21-02-2018_TrafficForML_CICFlowMeter.csv',
        'Thursday-22-02-2018_TrafficForML_CICFlowMeter.csv',
        'Friday-23-02-2018_TrafficForML_CICFlowMeter.csv',
        'Wednesday-28-02-2018_TrafficForML_CICFlowMeter.csv',
        'Thursday-01-03-2018_TrafficForML_CICFlowMeter.csv',
        'Friday-02-03-2018_TrafficForML_CICFlowMeter.csv']

In [4]:
def read_file (file_):
    #input the name for the CSV file
    file_ = csv_data_path+file_
    with open(file_, 'r') as f:
        contents = [x.split(',') for x in f.readlines()]
        f.flush()
    return contents

def day_attack(file_):
    #input the content of csv file as np array
    return [item for item in np.unique(file_[:,-1]) if item!='Benign\n' and item!='Label\n']

def file_features(file_):
    #input the content of csv file as np array
    return file_[0]

def extract_flowdata(file_, attack_names):
    #input the content of csv file as np array
    dstPort=[]
    protocol=[]
    #attack_records = [file_[0].tolist()]
    attack_records = [file_[0]]
    for name in attack_names:
        #record=[item.tolist() for item in file_ if item[-1]==name]
        record=[item for item in file_ if item[-1]==name]
        attack_records.extend(record)
        dstPort.append(np.unique(np.array(record)[:,0]).tolist())
        protocol.append(np.unique(np.array(record)[:,1]).tolist())
    return np.array(attack_records), dstPort, protocol

def write_to_file(file_, name):
    #input attack records
    df = pd.DataFrame(data=file_[1:], columns=file_[0])
    df.to_csv(csv_dist_path+name, index=False)
    return df
    
    

In [5]:
def normalized_timestamp(ts):
    return(str(datetime.utcfromtimestamp(ts)))


def convert_datetime_timezone(dt, tz1, tz2):
    tz1 = pytz.timezone(tz1)
    tz2 = pytz.timezone(tz2)
    dt = datetime.strptime(dt,"%Y-%m-%d %H:%M:%S")
    dt = tz1.localize(dt)
    dt = dt.astimezone(tz2)
    dt = dt.strftime("%Y-%m-%d %H:%M:%S")
    return dt

def get_all_flows(capdata):
    #input capdata
    flows =[]
    timestamps=[]
    for pkt in capdata.packets:
        eth_frame = ethernet.Ethernet(pkt.raw())
        try:
            ip_packet = ip.IP(binascii.unhexlify(eth_frame.payload))
            tcp_packet = tcp.TCP(binascii.unhexlify(eth_frame.payload))
        except:
            continue
        flows.append([ip_packet.src.decode("utf-8"), ip_packet.dst.decode("utf-8"), str(tcp_packet.src_port), str(tcp_packet.dst_port), str(ip_packet.p)])
        timestamps.append(normalized_timestamp(pkt.timestamp))
    return np.unique(flows, axis=0), flows,timestamps


def get_attack_flows(flows, attacker_ip):
    #input unique_flows
    if attacker_ip in np.unique(flows[:,0]):
        attacker_fwd_flows=[item.tolist() for item in flows if item[0]==attacker_ip]
    else:
        attacker_fwd_flows=[]
        print('Attacker IP does not exist !!!')
        
    if attacker_ip in np.unique(flows[:,1]):
        attacker_bwd_flows=[item.tolist() for item in flows if item[1]==attacker_ip]
    else:
        attacker_bwd_flows=[]
        print('Attacker IP does not exist !!!')
    
    return attacker_fwd_flows, attacker_bwd_flows   


def write_labels(attacker_fwd_flows, attacker_bwd_flows, attack):
    uni_labeled_file = open(csv_dist_path+"uni_labels.txt", 'a')
    for item in attacker_fwd_flows:
        for initem in item:
            uni_labeled_file.write(initem +',')
        uni_labeled_file.write(attack)

    bi_labeled_file = open(csv_dist_path+"bi_labels.txt", 'a')
    for item in attacker_fwd_flows:
        for initem in item:
            bi_labeled_file.write(initem +',')
        bi_labeled_file.write(attack)
    for item in attacker_bwd_flows:
        for initem in item:
            bi_labeled_file.write(initem +',')
        bi_labeled_file.write(attack)
    uni_labeled_file.close()
    bi_labeled_file.close()
    
def attack_active_time(attacker_fwd_flows,flows,timestamps):
    for item in attacker_fwd_flows:
        i = []
        j = 0
        for idx, itemx in enumerate(flows):
            if item==itemx:
                i.append(idx)
                j+=1

        begin_time=convert_datetime_timezone(timestamps[i[0]], "UTC", "Canada/Atlantic")
        end_time=convert_datetime_timezone(timestamps[i[-1]], "UTC", "Canada/Atlantic")
        print("Flow: {}\n\nBegin: {}   End: {}\n\nNumber of packets: {}\n" .format(item, begin_time,end_time,j))

        
def get_all_flows_2(cap, attackers, write_to_file):
    #input file object and a file to write the flows in formation 
    flow_file = open (write_to_file,'a') 
    flows =[]
    timestamps=[]
    for pkt in savefile.load_savefile(cap, lazy=True).packets:
        eth_frame = ethernet.Ethernet(pkt.raw())
        try:
            ip_packet = ip.IP(binascii.unhexlify(eth_frame.payload))
            tcp_packet = tcp.TCP(binascii.unhexlify(eth_frame.payload))
        except:
            continue
        if ip_packet.src.decode("utf-8") in attackers:
            flow_file.write('{},{},{},{},{},{}\n'.format(ip_packet.src.decode("utf-8"), ip_packet.dst.decode("utf-8"), str(tcp_packet.src_port), str(tcp_packet.dst_port), str(ip_packet.p),normalized_timestamp(pkt.timestamp)))
        else:
            continue
    flow_file.close()
    
def read_unique_flows(file_):
    #file_ =csv_dist_path+file_
    with open (file_,'r') as f:
        contents = f.readlines()
        f.flush()
    return list(set(contents))



def get_attacks_labels(unique_flows, attack_time, attack_name,labels_file): 
    flows = [item.split(',') for item in unique_flows]
    #time = [item.split()[-1] for item in np.array(flows)[:,-1]]
    attack_flow = open(labels_file,'a')
    for item in flows:
        time = item[-1].split()[-1]
        if time > attack_time[0] and time < attack_time[1]:
            attack_flow.write('{},{},{},{},{},{}\n'.format(item[0],item[1],item[2],item[3],item[4],attack_name))
    attack_flow.close()   

# Day 1: FTP-BruteForce AND SSH-Bruteforce
### FlowID format (srcIP, dstIp, srcPort, dstPort, protocol)
### The FTP-BruteForce attack FlowID (172.31.70.4, 172.31.69.25, srcPort, dstPort, protocol)
### The SSH-BruteForce attack FlowID (172.31.70.6, 172.31.69.25, srcPort, dstPort, protocol)

In [5]:
#read processed data
#read processed data
day1 = read_file(files[0])
features = file_features(day1)

#fix only for day4 because the file is large
x_day1 = np.array([[item[0],item[-1]] for item in day1])
attacks = day_attack(x_day1)
attack_records,dstPort, protocol = extract_flowdata(day1, attacks)

In [6]:
print("Features:\n {}\n\nAttacks:{}\n\nSize of attack recods: {}".format(features,attacks,attack_records.shape))

Features:
 ['Dst Port' 'Protocol' 'Timestamp' 'Flow Duration' 'Tot Fwd Pkts'
 'Tot Bwd Pkts' 'TotLen Fwd Pkts' 'TotLen Bwd Pkts' 'Fwd Pkt Len Max'
 'Fwd Pkt Len Min' 'Fwd Pkt Len Mean' 'Fwd Pkt Len Std' 'Bwd Pkt Len Max'
 'Bwd Pkt Len Min' 'Bwd Pkt Len Mean' 'Bwd Pkt Len Std' 'Flow Byts/s'
 'Flow Pkts/s' 'Flow IAT Mean' 'Flow IAT Std' 'Flow IAT Max'
 'Flow IAT Min' 'Fwd IAT Tot' 'Fwd IAT Mean' 'Fwd IAT Std' 'Fwd IAT Max'
 'Fwd IAT Min' 'Bwd IAT Tot' 'Bwd IAT Mean' 'Bwd IAT Std' 'Bwd IAT Max'
 'Bwd IAT Min' 'Fwd PSH Flags' 'Bwd PSH Flags' 'Fwd URG Flags'
 'Bwd URG Flags' 'Fwd Header Len' 'Bwd Header Len' 'Fwd Pkts/s'
 'Bwd Pkts/s' 'Pkt Len Min' 'Pkt Len Max' 'Pkt Len Mean' 'Pkt Len Std'
 'Pkt Len Var' 'FIN Flag Cnt' 'SYN Flag Cnt' 'RST Flag Cnt' 'PSH Flag Cnt'
 'ACK Flag Cnt' 'URG Flag Cnt' 'CWE Flag Count' 'ECE Flag Cnt'
 'Down/Up Ratio' 'Pkt Size Avg' 'Fwd Seg Size Avg' 'Bwd Seg Size Avg'
 'Fwd Byts/b Avg' 'Fwd Pkts/b Avg' 'Fwd Blk Rate Avg' 'Bwd Byts/b Avg'
 'Bwd Pkts/b Avg' 'Bwd Blk

In [7]:
records = write_to_file(attack_records, files[0])
records.head()

Unnamed: 0,Dst Port,Protocol,Timestamp,Flow Duration,Tot Fwd Pkts,Tot Bwd Pkts,TotLen Fwd Pkts,TotLen Bwd Pkts,Fwd Pkt Len Max,Fwd Pkt Len Min,...,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,21,6,14/02/2018 10:33:26,19,1,1,0,0,0,0,...,40,0,0,0,0,0,0,0,0,FTP-BruteForce\n
1,21,6,14/02/2018 10:33:26,3,1,1,0,0,0,0,...,40,0,0,0,0,0,0,0,0,FTP-BruteForce\n
2,21,6,14/02/2018 10:33:26,3,1,1,0,0,0,0,...,40,0,0,0,0,0,0,0,0,FTP-BruteForce\n
3,21,6,14/02/2018 10:33:26,2,1,1,0,0,0,0,...,40,0,0,0,0,0,0,0,0,FTP-BruteForce\n
4,21,6,14/02/2018 10:33:26,2,1,1,0,0,0,0,...,40,0,0,0,0,0,0,0,0,FTP-BruteForce\n


### Check the DstPort and Protocol

In [8]:
print ("Attacks:{}\n\ndstPort per attack: {}\n\n Protocol per attack: {}".format(attacks,dstPort, protocol))

Attacks:['FTP-BruteForce\n', 'SSH-Bruteforce\n']

dstPort per attack: [['21'], ['21', '22']]

 Protocol per attack: [['6'], ['6']]


### The FTP-BruteForce attack FlowID (172.31.70.4, 172.31.69.25, srcPort, 21, 6)
### The SSH-BruteForce attack FlowID (172.31.70.6, 172.31.69.25, srcPort, 21, 6)
### The SSH-BruteForce attack FlowID (172.31.70.6, 172.31.69.25, srcPort, 22, 6)

# Now, get the SrcPort fro pcap file
### Note:  FTP-BruteForce attacker ip is 172.31.70.4 (Valid IP:18.221.219.4)
### Note:  SSH-BruteForce attacker ip is 172.31.70.4 (Valid IP:13.58.98.64)

In [9]:
#read pcap file
pcap_file=pcap_data_path+'Wednesday-14-02-2018/pcap/UCAP172.31.69.25'
cap = open(pcap_file, 'rb')
capdata = savefile.load_savefile(cap, verbose=True)
cap.close()

[+] attempting to load /media/mo/HDD/intrusion_detection/dataset/OriginalNetwork TrafficandLogdata/Wednesday-14-02-2018/pcap/UCAP172.31.69.25
[+] found valid header
[+] loaded 4718327 packets
[+] finished loading savefile.


In [11]:
uniqueflows, flows, timestamps = get_all_flows(capdata)
print('Total trafic: {},{}\n\nUnique flows: {}'.format(len(flows),len(timestamps),len(uniqueflows)))

Total trafic: 4715153,4715153

Unique flows: 1758


In [12]:
uniqueflows[0]

array(['1.30.162.58', '172.31.69.25', '17664', '78', '17'], dtype='<U15')

In [13]:
flows.index(uniqueflows[0].tolist())

4714903

# FTP-BruteForce

In [14]:
attacker_fwd_flows, attacker_bwd_flows  = get_attack_flows(uniqueflows, "18.221.219.4")
attacker_bwd_flows, attacker_fwd_flows

([['172.31.69.25', '18.221.219.4', '17664', '40', '6']],
 [['18.221.219.4', '172.31.69.25', '17664', '60', '6']])

In [16]:
write_labels(attacker_fwd_flows, attacker_bwd_flows, attacks[0])

In [117]:
attacker_fwd_flows[0] in flows

True

### Check the time of the active attack 


In [39]:
attack_active_time(attacker_fwd_flows,flows,timestamps)

Flow: ['18.221.219.4', '172.31.69.25', '17664', '60', '6']

Begin: 2018-02-14 10:33:26   End: 2018-02-14 12:10:31

Number of packets: 193360



In [40]:
attack_active_time(attacker_bwd_flows,flows,timestamps)

Flow: ['172.31.69.25', '18.221.219.4', '17664', '40', '6']

Begin: 2018-02-14 10:33:26   End: 2018-02-14 12:10:31

Number of packets: 193360



# SSH-BruteForce

In [41]:
attacker_fwd_flows, attacker_bwd_flows  = get_attack_flows(uniqueflows, "13.58.98.64")
attacker_bwd_flows , attacker_fwd_flows

([['172.31.69.25', '13.58.98.64', '17664', '1028', '6'],
  ['172.31.69.25', '13.58.98.64', '17664', '116', '6'],
  ['172.31.69.25', '13.58.98.64', '17664', '148', '6'],
  ['172.31.69.25', '13.58.98.64', '17664', '40', '6'],
  ['172.31.69.25', '13.58.98.64', '17664', '52', '6'],
  ['172.31.69.25', '13.58.98.64', '17664', '60', '6'],
  ['172.31.69.25', '13.58.98.64', '17664', '900', '6'],
  ['172.31.69.25', '13.58.98.64', '17664', '93', '6']],
 [['13.58.98.64', '172.31.69.25', '17664', '116', '6'],
  ['13.58.98.64', '172.31.69.25', '17664', '148', '6'],
  ['13.58.98.64', '172.31.69.25', '17664', '164', '6'],
  ['13.58.98.64', '172.31.69.25', '17664', '180', '6'],
  ['13.58.98.64', '172.31.69.25', '17664', '324', '6'],
  ['13.58.98.64', '172.31.69.25', '17664', '52', '6'],
  ['13.58.98.64', '172.31.69.25', '17664', '60', '6'],
  ['13.58.98.64', '172.31.69.25', '17664', '64', '6'],
  ['13.58.98.64', '172.31.69.25', '17664', '68', '6'],
  ['13.58.98.64', '172.31.69.25', '17664', '692', '6']

In [42]:
write_labels(attacker_fwd_flows, attacker_bwd_flows, attacks[1])

In [43]:
attack_active_time(attacker_fwd_flows,flows,timestamps)

Flow: ['13.58.98.64', '172.31.69.25', '17664', '116', '6']

Begin: 2018-02-14 14:01:50   End: 2018-02-14 15:32:30

Number of packets: 565192

Flow: ['13.58.98.64', '172.31.69.25', '17664', '148', '6']

Begin: 2018-02-14 14:01:50   End: 2018-02-14 15:32:30

Number of packets: 434275

Flow: ['13.58.98.64', '172.31.69.25', '17664', '164', '6']

Begin: 2018-02-14 14:01:50   End: 2018-02-14 15:32:30

Number of packets: 130893

Flow: ['13.58.98.64', '172.31.69.25', '17664', '180', '6']

Begin: 2018-02-14 14:02:00   End: 2018-02-14 15:30:12

Number of packets: 24

Flow: ['13.58.98.64', '172.31.69.25', '17664', '324', '6']

Begin: 2018-02-14 14:01:50   End: 2018-02-14 15:32:30

Number of packets: 94207

Flow: ['13.58.98.64', '172.31.69.25', '17664', '52', '6']

Begin: 2018-02-14 14:01:50   End: 2018-02-14 15:32:30

Number of packets: 606982

Flow: ['13.58.98.64', '172.31.69.25', '17664', '60', '6']

Begin: 2018-02-14 14:01:21   End: 2018-02-14 15:32:30

Number of packets: 94237

Flow: ['13.58.

# Day 2: DoS-GoldenEye AND DoS-Slowloris
### FlowID format (srcIP, dstIp, srcPort, dstPort, protocol)
### The DoS-GoldenEye attack FlowID (172.31.70.46, 172.31.69.25, srcPort, dstPort, protocol)
### The DoS-Slowloris attack FlowID (172.31.70.8, 172.31.69.25, srcPort, dstPort, protocol)

In [6]:
#read processed data
day2 = read_file(files[1])
features = file_features(day2)

#fix only for day4 because the file is large
x_day2 = np.array([[item[0],item[-1]] for item in day2])
attacks = day_attack(x_day2)
attack_records,dstPort, protocol = extract_flowdata(day2, attacks)
print("Features:\n {}\n\nAttacks:{}\n\nSize of attack recods: {}".format(features,attacks,attack_records.shape))

Features:
 ['Dst Port' 'Protocol' 'Timestamp' 'Flow Duration' 'Tot Fwd Pkts'
 'Tot Bwd Pkts' 'TotLen Fwd Pkts' 'TotLen Bwd Pkts' 'Fwd Pkt Len Max'
 'Fwd Pkt Len Min' 'Fwd Pkt Len Mean' 'Fwd Pkt Len Std' 'Bwd Pkt Len Max'
 'Bwd Pkt Len Min' 'Bwd Pkt Len Mean' 'Bwd Pkt Len Std' 'Flow Byts/s'
 'Flow Pkts/s' 'Flow IAT Mean' 'Flow IAT Std' 'Flow IAT Max'
 'Flow IAT Min' 'Fwd IAT Tot' 'Fwd IAT Mean' 'Fwd IAT Std' 'Fwd IAT Max'
 'Fwd IAT Min' 'Bwd IAT Tot' 'Bwd IAT Mean' 'Bwd IAT Std' 'Bwd IAT Max'
 'Bwd IAT Min' 'Fwd PSH Flags' 'Bwd PSH Flags' 'Fwd URG Flags'
 'Bwd URG Flags' 'Fwd Header Len' 'Bwd Header Len' 'Fwd Pkts/s'
 'Bwd Pkts/s' 'Pkt Len Min' 'Pkt Len Max' 'Pkt Len Mean' 'Pkt Len Std'
 'Pkt Len Var' 'FIN Flag Cnt' 'SYN Flag Cnt' 'RST Flag Cnt' 'PSH Flag Cnt'
 'ACK Flag Cnt' 'URG Flag Cnt' 'CWE Flag Count' 'ECE Flag Cnt'
 'Down/Up Ratio' 'Pkt Size Avg' 'Fwd Seg Size Avg' 'Bwd Seg Size Avg'
 'Fwd Byts/b Avg' 'Fwd Pkts/b Avg' 'Fwd Blk Rate Avg' 'Bwd Byts/b Avg'
 'Bwd Pkts/b Avg' 'Bwd Blk

In [7]:
records = write_to_file(attack_records, files[1])
records.head()

Unnamed: 0,Dst Port,Protocol,Timestamp,Flow Duration,Tot Fwd Pkts,Tot Bwd Pkts,TotLen Fwd Pkts,TotLen Bwd Pkts,Fwd Pkt Len Max,Fwd Pkt Len Min,...,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,80,6,15/02/2018 09:27:46,6010454,4,4,285,972,285,0,...,32,0,0,0,0,0,0,0,0,DoS attacks-GoldenEye\n
1,80,6,15/02/2018 09:27:46,6005042,4,4,422,662,422,0,...,32,0,0,0,0,0,0,0,0,DoS attacks-GoldenEye\n
2,80,6,15/02/2018 09:27:46,6003639,4,4,548,972,548,0,...,32,0,0,0,0,0,0,0,0,DoS attacks-GoldenEye\n
3,80,6,15/02/2018 09:27:46,5998377,4,4,327,972,327,0,...,32,0,0,0,0,0,0,0,0,DoS attacks-GoldenEye\n
4,80,6,15/02/2018 09:27:46,5999806,4,4,378,972,378,0,...,32,0,0,0,0,0,0,0,0,DoS attacks-GoldenEye\n


In [8]:
#read pcap file
pcap_file=pcap_data_path+'Thursday-15-02-2018/pcap/UCAP172.31.69.25'
cap = open(pcap_file, 'rb')
capdata = savefile.load_savefile(cap, verbose=True)
cap.close()

[+] attempting to load /media/mo/HDD/intrusion_detection/dataset/OriginalNetwork TrafficandLogdata/Thursday-15-02-2018/pcap/UCAP172.31.69.25
[+] found valid header
[+] loaded 424625 packets
[+] finished loading savefile.


In [9]:
uniqueflows, flows, timestamps = get_all_flows(capdata)
print('Total trafic: {},{}\n\nUnique flows: {}'.format(len(flows),len(timestamps),len(uniqueflows)))

Total trafic: 421521,421521

Unique flows: 2648


# DoS-GoldenEye

In [12]:
attacker_fwd_flows, attacker_bwd_flows  = get_attack_flows(uniqueflows, "18.219.211.138")
attacker_bwd_flows, attacker_fwd_flows

([['172.31.69.25', '18.219.211.138', '17664', '1024', '6'],
  ['172.31.69.25', '18.219.211.138', '17664', '40', '6'],
  ['172.31.69.25', '18.219.211.138', '17664', '52', '6'],
  ['172.31.69.25', '18.219.211.138', '17664', '60', '6'],
  ['172.31.69.25', '18.219.211.138', '17664', '714', '6']],
 [['18.219.211.138', '172.31.69.25', '17664', '261', '6'],
  ['18.219.211.138', '172.31.69.25', '17664', '267', '6'],
  ['18.219.211.138', '172.31.69.25', '17664', '269', '6'],
  ['18.219.211.138', '172.31.69.25', '17664', '270', '6'],
  ['18.219.211.138', '172.31.69.25', '17664', '271', '6'],
  ['18.219.211.138', '172.31.69.25', '17664', '272', '6'],
  ['18.219.211.138', '172.31.69.25', '17664', '273', '6'],
  ['18.219.211.138', '172.31.69.25', '17664', '274', '6'],
  ['18.219.211.138', '172.31.69.25', '17664', '275', '6'],
  ['18.219.211.138', '172.31.69.25', '17664', '276', '6'],
  ['18.219.211.138', '172.31.69.25', '17664', '277', '6'],
  ['18.219.211.138', '172.31.69.25', '17664', '278', '6']

In [13]:
write_labels(attacker_fwd_flows, attacker_bwd_flows, attacks[0])

In [14]:
attack_active_time(attacker_fwd_flows,flows,timestamps)

Flow: ['18.219.211.138', '172.31.69.25', '17664', '261', '6']

Begin: 2018-02-15 09:55:55   End: 2018-02-15 09:55:55

Number of packets: 1

Flow: ['18.219.211.138', '172.31.69.25', '17664', '267', '6']

Begin: 2018-02-15 09:29:33   End: 2018-02-15 09:57:03

Number of packets: 6

Flow: ['18.219.211.138', '172.31.69.25', '17664', '269', '6']

Begin: 2018-02-15 09:39:30   End: 2018-02-15 09:51:51

Number of packets: 2

Flow: ['18.219.211.138', '172.31.69.25', '17664', '270', '6']

Begin: 2018-02-15 09:30:58   End: 2018-02-15 09:52:34

Number of packets: 3

Flow: ['18.219.211.138', '172.31.69.25', '17664', '271', '6']

Begin: 2018-02-15 09:29:54   End: 2018-02-15 09:50:48

Number of packets: 3

Flow: ['18.219.211.138', '172.31.69.25', '17664', '272', '6']

Begin: 2018-02-15 09:29:49   End: 2018-02-15 09:56:39

Number of packets: 21

Flow: ['18.219.211.138', '172.31.69.25', '17664', '273', '6']

Begin: 2018-02-15 09:28:08   End: 2018-02-15 09:50:53

Number of packets: 2

Flow: ['18.219.211.

Flow: ['18.219.211.138', '172.31.69.25', '17664', '326', '6']

Begin: 2018-02-15 09:29:12   End: 2018-02-15 09:56:48

Number of packets: 37

Flow: ['18.219.211.138', '172.31.69.25', '17664', '327', '6']

Begin: 2018-02-15 09:28:08   End: 2018-02-15 09:57:20

Number of packets: 45

Flow: ['18.219.211.138', '172.31.69.25', '17664', '328', '6']

Begin: 2018-02-15 09:27:54   End: 2018-02-15 09:57:10

Number of packets: 44

Flow: ['18.219.211.138', '172.31.69.25', '17664', '329', '6']

Begin: 2018-02-15 09:27:42   End: 2018-02-15 09:55:48

Number of packets: 41

Flow: ['18.219.211.138', '172.31.69.25', '17664', '330', '6']

Begin: 2018-02-15 09:27:46   End: 2018-02-15 09:56:55

Number of packets: 45

Flow: ['18.219.211.138', '172.31.69.25', '17664', '331', '6']

Begin: 2018-02-15 09:27:54   End: 2018-02-15 09:57:09

Number of packets: 44

Flow: ['18.219.211.138', '172.31.69.25', '17664', '332', '6']

Begin: 2018-02-15 09:27:47   End: 2018-02-15 09:57:09

Number of packets: 55

Flow: ['18.21

Flow: ['18.219.211.138', '172.31.69.25', '17664', '387', '6']

Begin: 2018-02-15 09:27:42   End: 2018-02-15 09:57:10

Number of packets: 125

Flow: ['18.219.211.138', '172.31.69.25', '17664', '388', '6']

Begin: 2018-02-15 09:27:54   End: 2018-02-15 09:56:47

Number of packets: 138

Flow: ['18.219.211.138', '172.31.69.25', '17664', '389', '6']

Begin: 2018-02-15 09:27:42   End: 2018-02-15 09:57:20

Number of packets: 129

Flow: ['18.219.211.138', '172.31.69.25', '17664', '390', '6']

Begin: 2018-02-15 09:27:46   End: 2018-02-15 09:57:20

Number of packets: 143

Flow: ['18.219.211.138', '172.31.69.25', '17664', '391', '6']

Begin: 2018-02-15 09:27:49   End: 2018-02-15 09:56:47

Number of packets: 117

Flow: ['18.219.211.138', '172.31.69.25', '17664', '392', '6']

Begin: 2018-02-15 09:27:54   End: 2018-02-15 09:57:10

Number of packets: 135

Flow: ['18.219.211.138', '172.31.69.25', '17664', '393', '6']

Begin: 2018-02-15 09:27:42   End: 2018-02-15 09:57:10

Number of packets: 121

Flow: 

Flow: ['18.219.211.138', '172.31.69.25', '17664', '451', '6']

Begin: 2018-02-15 09:27:42   End: 2018-02-15 09:57:20

Number of packets: 158

Flow: ['18.219.211.138', '172.31.69.25', '17664', '452', '6']

Begin: 2018-02-15 09:27:42   End: 2018-02-15 09:57:10

Number of packets: 162

Flow: ['18.219.211.138', '172.31.69.25', '17664', '453', '6']

Begin: 2018-02-15 09:27:42   End: 2018-02-15 09:57:09

Number of packets: 131

Flow: ['18.219.211.138', '172.31.69.25', '17664', '454', '6']

Begin: 2018-02-15 09:27:42   End: 2018-02-15 09:57:09

Number of packets: 135

Flow: ['18.219.211.138', '172.31.69.25', '17664', '455', '6']

Begin: 2018-02-15 09:27:42   End: 2018-02-15 09:57:20

Number of packets: 158

Flow: ['18.219.211.138', '172.31.69.25', '17664', '456', '6']

Begin: 2018-02-15 09:27:46   End: 2018-02-15 09:57:09

Number of packets: 164

Flow: ['18.219.211.138', '172.31.69.25', '17664', '457', '6']

Begin: 2018-02-15 09:27:49   End: 2018-02-15 09:57:10

Number of packets: 173

Flow: 

Flow: ['18.219.211.138', '172.31.69.25', '17664', '512', '6']

Begin: 2018-02-15 09:27:54   End: 2018-02-15 09:57:20

Number of packets: 141

Flow: ['18.219.211.138', '172.31.69.25', '17664', '513', '6']

Begin: 2018-02-15 09:27:46   End: 2018-02-15 09:57:18

Number of packets: 129

Flow: ['18.219.211.138', '172.31.69.25', '17664', '514', '6']

Begin: 2018-02-15 09:27:42   End: 2018-02-15 09:56:48

Number of packets: 132

Flow: ['18.219.211.138', '172.31.69.25', '17664', '515', '6']

Begin: 2018-02-15 09:28:16   End: 2018-02-15 09:57:09

Number of packets: 100

Flow: ['18.219.211.138', '172.31.69.25', '17664', '516', '6']

Begin: 2018-02-15 09:27:47   End: 2018-02-15 09:57:10

Number of packets: 105

Flow: ['18.219.211.138', '172.31.69.25', '17664', '517', '6']

Begin: 2018-02-15 09:27:46   End: 2018-02-15 09:57:09

Number of packets: 102

Flow: ['18.219.211.138', '172.31.69.25', '17664', '518', '6']

Begin: 2018-02-15 09:27:42   End: 2018-02-15 09:57:20

Number of packets: 103

Flow: 

Flow: ['18.219.211.138', '172.31.69.25', '17664', '573', '6']

Begin: 2018-02-15 09:27:48   End: 2018-02-15 09:56:41

Number of packets: 76

Flow: ['18.219.211.138', '172.31.69.25', '17664', '574', '6']

Begin: 2018-02-15 09:28:01   End: 2018-02-15 09:56:09

Number of packets: 34

Flow: ['18.219.211.138', '172.31.69.25', '17664', '575', '6']

Begin: 2018-02-15 09:27:46   End: 2018-02-15 09:56:40

Number of packets: 90

Flow: ['18.219.211.138', '172.31.69.25', '17664', '576', '6']

Begin: 2018-02-15 09:27:47   End: 2018-02-15 09:56:10

Number of packets: 41

Flow: ['18.219.211.138', '172.31.69.25', '17664', '577', '6']

Begin: 2018-02-15 09:27:42   End: 2018-02-15 09:57:03

Number of packets: 58

Flow: ['18.219.211.138', '172.31.69.25', '17664', '578', '6']

Begin: 2018-02-15 09:28:16   End: 2018-02-15 09:57:09

Number of packets: 56

Flow: ['18.219.211.138', '172.31.69.25', '17664', '579', '6']

Begin: 2018-02-15 09:27:42   End: 2018-02-15 09:57:09

Number of packets: 55

Flow: ['18.21

Flow: ['18.219.211.138', '172.31.69.25', '17664', '637', '6']

Begin: 2018-02-15 09:27:46   End: 2018-02-15 09:57:03

Number of packets: 37

Flow: ['18.219.211.138', '172.31.69.25', '17664', '638', '6']

Begin: 2018-02-15 09:28:16   End: 2018-02-15 09:57:20

Number of packets: 35

Flow: ['18.219.211.138', '172.31.69.25', '17664', '639', '6']

Begin: 2018-02-15 09:27:46   End: 2018-02-15 09:56:55

Number of packets: 36

Flow: ['18.219.211.138', '172.31.69.25', '17664', '640', '6']

Begin: 2018-02-15 09:27:46   End: 2018-02-15 09:55:48

Number of packets: 13

Flow: ['18.219.211.138', '172.31.69.25', '17664', '641', '6']

Begin: 2018-02-15 09:28:44   End: 2018-02-15 09:56:40

Number of packets: 35

Flow: ['18.219.211.138', '172.31.69.25', '17664', '642', '6']

Begin: 2018-02-15 09:27:47   End: 2018-02-15 09:56:55

Number of packets: 44

Flow: ['18.219.211.138', '172.31.69.25', '17664', '643', '6']

Begin: 2018-02-15 09:27:42   End: 2018-02-15 09:54:59

Number of packets: 29

Flow: ['18.21

Flow: ['18.219.211.138', '172.31.69.25', '17664', '696', '6']

Begin: 2018-02-15 09:28:16   End: 2018-02-15 09:54:45

Number of packets: 13

Flow: ['18.219.211.138', '172.31.69.25', '17664', '697', '6']

Begin: 2018-02-15 09:28:08   End: 2018-02-15 09:52:55

Number of packets: 9

Flow: ['18.219.211.138', '172.31.69.25', '17664', '698', '6']

Begin: 2018-02-15 09:37:48   End: 2018-02-15 09:56:40

Number of packets: 7

Flow: ['18.219.211.138', '172.31.69.25', '17664', '699', '6']

Begin: 2018-02-15 09:28:08   End: 2018-02-15 09:56:48

Number of packets: 12

Flow: ['18.219.211.138', '172.31.69.25', '17664', '700', '6']

Begin: 2018-02-15 09:29:12   End: 2018-02-15 09:56:48

Number of packets: 11

Flow: ['18.219.211.138', '172.31.69.25', '17664', '701', '6']

Begin: 2018-02-15 09:30:43   End: 2018-02-15 09:54:59

Number of packets: 13

Flow: ['18.219.211.138', '172.31.69.25', '17664', '702', '6']

Begin: 2018-02-15 09:29:54   End: 2018-02-15 09:55:55

Number of packets: 28

Flow: ['18.219.

Flow: ['18.219.211.138', '172.31.69.25', '17664', '758', '6']

Begin: 2018-02-15 09:39:58   End: 2018-02-15 09:54:52

Number of packets: 4

Flow: ['18.219.211.138', '172.31.69.25', '17664', '761', '6']

Begin: 2018-02-15 09:28:44   End: 2018-02-15 09:56:55

Number of packets: 3

Flow: ['18.219.211.138', '172.31.69.25', '17664', '762', '6']

Begin: 2018-02-15 09:27:49   End: 2018-02-15 09:38:05

Number of packets: 2

Flow: ['18.219.211.138', '172.31.69.25', '17664', '763', '6']

Begin: 2018-02-15 09:28:09   End: 2018-02-15 09:56:40

Number of packets: 7

Flow: ['18.219.211.138', '172.31.69.25', '17664', '764', '6']

Begin: 2018-02-15 09:29:33   End: 2018-02-15 09:54:36

Number of packets: 12

Flow: ['18.219.211.138', '172.31.69.25', '17664', '765', '6']

Begin: 2018-02-15 09:51:30   End: 2018-02-15 09:56:02

Number of packets: 4

Flow: ['18.219.211.138', '172.31.69.25', '17664', '766', '6']

Begin: 2018-02-15 09:28:37   End: 2018-02-15 09:53:34

Number of packets: 6

Flow: ['18.219.211.

# DoS-Slowloris

In [15]:
attacker_fwd_flows, attacker_bwd_flows  = get_attack_flows(uniqueflows, "18.217.165.70")
attacker_bwd_flows, attacker_fwd_flows

([['172.31.69.25', '18.217.165.70', '17664', '40', '6'],
  ['172.31.69.25', '18.217.165.70', '17664', '52', '6'],
  ['172.31.69.25', '18.217.165.70', '17664', '568', '6'],
  ['172.31.69.25', '18.217.165.70', '17664', '60', '6']],
 [['18.217.165.70', '172.31.69.25', '17664', '282', '6'],
  ['18.217.165.70', '172.31.69.25', '17664', '290', '6'],
  ['18.217.165.70', '172.31.69.25', '17664', '40', '6'],
  ['18.217.165.70', '172.31.69.25', '17664', '52', '6'],
  ['18.217.165.70', '172.31.69.25', '17664', '60', '6']])

In [16]:
write_labels(attacker_fwd_flows, attacker_bwd_flows, attacks[1])

In [17]:
attack_active_time(attacker_fwd_flows,flows,timestamps)

Flow: ['18.217.165.70', '172.31.69.25', '17664', '282', '6']

Begin: 2018-02-15 11:00:12   End: 2018-02-15 11:41:47

Number of packets: 49282

Flow: ['18.217.165.70', '172.31.69.25', '17664', '290', '6']

Begin: 2018-02-15 11:01:34   End: 2018-02-15 11:42:01

Number of packets: 2559

Flow: ['18.217.165.70', '172.31.69.25', '17664', '40', '6']

Begin: 2018-02-15 11:41:34   End: 2018-02-15 11:42:01

Number of packets: 1021

Flow: ['18.217.165.70', '172.31.69.25', '17664', '52', '6']

Begin: 2018-02-15 11:00:12   End: 2018-02-15 11:41:38

Number of packets: 10016

Flow: ['18.217.165.70', '172.31.69.25', '17664', '60', '6']

Begin: 2018-02-15 11:00:12   End: 2018-02-15 11:41:34

Number of packets: 22152



# Day 3: DoS-SlowHTTPTest AND DoS-Hulk
### FlowID format (srcIP, dstIp, srcPort, dstPort, protocol)
### The DoS-GoldenEye attack FlowID (172.31.70.23, 172.31.69.25, srcPort, dstPort, protocol)
### The DoS-Slowloris attack FlowID (172.31.70.16, 172.31.69.25, srcPort, dstPort, protocol)

In [6]:
#read processed data
day3 = read_file(files[2])
features = file_features(day3)

#fix only for day4 because the file is large
x_day3 = np.array([[item[0],item[-1]] for item in day3])
attacks = day_attack(x_day3)
attack_records,dstPort, protocol = extract_flowdata(day3, attacks)
print("Features:\n {}\n\nAttacks:{}\n\nSize of attack recods: {}".format(features,attacks,attack_records.shape))

Features:
 ['Dst Port' 'Protocol' 'Timestamp' 'Flow Duration' 'Tot Fwd Pkts'
 'Tot Bwd Pkts' 'TotLen Fwd Pkts' 'TotLen Bwd Pkts' 'Fwd Pkt Len Max'
 'Fwd Pkt Len Min' 'Fwd Pkt Len Mean' 'Fwd Pkt Len Std' 'Bwd Pkt Len Max'
 'Bwd Pkt Len Min' 'Bwd Pkt Len Mean' 'Bwd Pkt Len Std' 'Flow Byts/s'
 'Flow Pkts/s' 'Flow IAT Mean' 'Flow IAT Std' 'Flow IAT Max'
 'Flow IAT Min' 'Fwd IAT Tot' 'Fwd IAT Mean' 'Fwd IAT Std' 'Fwd IAT Max'
 'Fwd IAT Min' 'Bwd IAT Tot' 'Bwd IAT Mean' 'Bwd IAT Std' 'Bwd IAT Max'
 'Bwd IAT Min' 'Fwd PSH Flags' 'Bwd PSH Flags' 'Fwd URG Flags'
 'Bwd URG Flags' 'Fwd Header Len' 'Bwd Header Len' 'Fwd Pkts/s'
 'Bwd Pkts/s' 'Pkt Len Min' 'Pkt Len Max' 'Pkt Len Mean' 'Pkt Len Std'
 'Pkt Len Var' 'FIN Flag Cnt' 'SYN Flag Cnt' 'RST Flag Cnt' 'PSH Flag Cnt'
 'ACK Flag Cnt' 'URG Flag Cnt' 'CWE Flag Count' 'ECE Flag Cnt'
 'Down/Up Ratio' 'Pkt Size Avg' 'Fwd Seg Size Avg' 'Bwd Seg Size Avg'
 'Fwd Byts/b Avg' 'Fwd Pkts/b Avg' 'Fwd Blk Rate Avg' 'Bwd Byts/b Avg'
 'Bwd Pkts/b Avg' 'Bwd Blk

In [7]:
records = write_to_file(attack_records, files[2])
records.head()

Unnamed: 0,Dst Port,Protocol,Timestamp,Flow Duration,Tot Fwd Pkts,Tot Bwd Pkts,TotLen Fwd Pkts,TotLen Bwd Pkts,Fwd Pkt Len Max,Fwd Pkt Len Min,...,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,80,6,16/02/2018 01:45:27,1793,3,4,364,935,364,0,...,32,0,0,0,0,0,0,0,0,DoS attacks-Hulk\n
1,80,6,16/02/2018 01:45:27,1720,3,4,300,935,300,0,...,32,0,0,0,0,0,0,0,0,DoS attacks-Hulk\n
2,80,6,16/02/2018 01:45:27,191,2,0,0,0,0,0,...,32,0,0,0,0,0,0,0,0,DoS attacks-Hulk\n
3,80,6,16/02/2018 01:45:27,912,3,4,359,935,359,0,...,32,0,0,0,0,0,0,0,0,DoS attacks-Hulk\n
4,80,6,16/02/2018 01:45:27,1869,3,4,384,935,384,0,...,32,0,0,0,0,0,0,0,0,DoS attacks-Hulk\n


# DoS-SlowHTTPTest  AND  DoS-Hulk


# THIS FLIE DOESN'T WORK

In [None]:
#read pcap file
attackers =['13.59.126.31',
            '18.219.193.20']

flows_file= csv_dist_path+'friday_16'

pcap_file1='/media/mo/HDD/intrusion_detection/dataset/OriginalNetwork TrafficandLogdata/Friday-16-02-2018/pcap/UCAP172.31.69.25'
pcap_file2='/media/mo/HDD/intrusion_detection/dataset/OriginalNetwork TrafficandLogdata/Friday-16-02-2018/pcap/UCAP172.31.69.25-part2'

cap1 = open(pcap_file1, 'rb')
get_all_flows_2(cap1, attackers, flows_file)
cap1.close()

cap2 = open(pcap_file2, 'rb')
get_all_flows_2(cap2, attackers, flows_file)
cap2.close()

unique_flows = read_unique_flows(flows_file)

#Add two minutes for the time range, Just in case. Original attack time is > attack_time = ["10:12:00", "11:08:00"]
attack_name1 = 'DoS-SlowHTTPTest'
attack_time1 = ["10:14:00", "11:10:00"]


#Add two minutes for the time range, Just in case. Original attack time is > attack_time = ["13:45:00", "14:19:00"]
attack_name2 = 'DoS-Hulk'
attack_time2 = ["13:47:00", "14:21:00"]

labels_file = csv_dist_path+'labels.txt'
get_attacks_labels(unique_flows, attack_time1, attack_name1, labels_file)
get_attacks_labels(unique_flows, attack_time2, attack_name2, labels_file)


#  Day 4: DDoS attacks-LOIC-HTTP AND DDoS-LOIC-UDP
### FlowID format (srcIP, dstIp, srcPort, dstPort, protocol)
### The DoS-GoldenEye attack FlowID (x, 172.31.69.25, srcPort, dstPort, protocol)
### The DoS-Slowloris attack FlowID (x, 172.31.69.28, srcPort, dstPort, protocol)

In [10]:
#read processed data
day4 = read_file(files[3])
features = file_features(day4)

#fix only for day4 because the file is large
x_day4 = np.array([[item[0],item[-1]] for item in day4])
attacks = day_attack(x_day4)
attack_records,dstPort, protocol = extract_flowdata(day4, attacks)
print("Features:\n {}\n\nAttacks:{}\n\nSize of attack recods: {}".format(features,attacks,attack_records.shape))

Features:
 ['Flow ID', 'Src IP', 'Src Port', 'Dst IP', 'Dst Port', 'Protocol', 'Timestamp', 'Flow Duration', 'Tot Fwd Pkts', 'Tot Bwd Pkts', 'TotLen Fwd Pkts', 'TotLen Bwd Pkts', 'Fwd Pkt Len Max', 'Fwd Pkt Len Min', 'Fwd Pkt Len Mean', 'Fwd Pkt Len Std', 'Bwd Pkt Len Max', 'Bwd Pkt Len Min', 'Bwd Pkt Len Mean', 'Bwd Pkt Len Std', 'Flow Byts/s', 'Flow Pkts/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min', 'Fwd IAT Tot', 'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Max', 'Fwd IAT Min', 'Bwd IAT Tot', 'Bwd IAT Mean', 'Bwd IAT Std', 'Bwd IAT Max', 'Bwd IAT Min', 'Fwd PSH Flags', 'Bwd PSH Flags', 'Fwd URG Flags', 'Bwd URG Flags', 'Fwd Header Len', 'Bwd Header Len', 'Fwd Pkts/s', 'Bwd Pkts/s', 'Pkt Len Min', 'Pkt Len Max', 'Pkt Len Mean', 'Pkt Len Std', 'Pkt Len Var', 'FIN Flag Cnt', 'SYN Flag Cnt', 'RST Flag Cnt', 'PSH Flag Cnt', 'ACK Flag Cnt', 'URG Flag Cnt', 'CWE Flag Count', 'ECE Flag Cnt', 'Down/Up Ratio', 'Pkt Size Avg', 'Fwd Seg Size Avg', 'Bwd Seg Size Avg', 'Fwd Byts

In [11]:
records = write_to_file(attack_records, files[3])
records.head()

Unnamed: 0,Flow ID,Src IP,Src Port,Dst IP,Dst Port,Protocol,Timestamp,Flow Duration,Tot Fwd Pkts,Tot Bwd Pkts,...,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,172.31.69.25-52.14.136.135-80-50819-6,52.14.136.135,50819,172.31.69.25,80,6,20/02/2018 10:13:54,3904,3,4,...,20,0,0,0,0,0,0,0,0,DDoS attacks-LOIC-HTTP\n
1,172.31.69.25-52.14.136.135-80-50820-6,52.14.136.135,50820,172.31.69.25,80,6,20/02/2018 10:13:54,762,3,4,...,20,0,0,0,0,0,0,0,0,DDoS attacks-LOIC-HTTP\n
2,172.31.69.25-52.14.136.135-80-50821-6,52.14.136.135,50821,172.31.69.25,80,6,20/02/2018 10:13:54,726,3,4,...,20,0,0,0,0,0,0,0,0,DDoS attacks-LOIC-HTTP\n
3,172.31.69.25-52.14.136.135-80-50822-6,52.14.136.135,50822,172.31.69.25,80,6,20/02/2018 10:13:54,556,3,4,...,20,0,0,0,0,0,0,0,0,DDoS attacks-LOIC-HTTP\n
4,172.31.69.25-52.14.136.135-80-50823-6,52.14.136.135,50823,172.31.69.25,80,6,20/02/2018 10:13:54,916,3,4,...,20,0,0,0,0,0,0,0,0,DDoS attacks-LOIC-HTTP\n


In [6]:
data =pd.read_csv('/media/mo/HDD/intrusion_detection/dataset/AttacksRecords/Thuesday-20-02-2018_TrafficForML_CICFlowMeter.csv')
data.head()

Unnamed: 0,Flow ID,Src IP,Src Port,Dst IP,Dst Port,Protocol,Timestamp,Flow Duration,Tot Fwd Pkts,Tot Bwd Pkts,...,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,172.31.69.25-52.14.136.135-80-50819-6,52.14.136.135,50819,172.31.69.25,80,6,20/02/2018 10:13:54,3904,3,4,...,20,0,0,0,0,0.0,0,0.0,0.0,DDoS attacks-LOIC-HTTP\n
1,172.31.69.25-52.14.136.135-80-50820-6,52.14.136.135,50820,172.31.69.25,80,6,20/02/2018 10:13:54,762,3,4,...,20,0,0,0,0,0.0,0,0.0,0.0,DDoS attacks-LOIC-HTTP\n
2,172.31.69.25-52.14.136.135-80-50821-6,52.14.136.135,50821,172.31.69.25,80,6,20/02/2018 10:13:54,726,3,4,...,20,0,0,0,0,0.0,0,0.0,0.0,DDoS attacks-LOIC-HTTP\n
3,172.31.69.25-52.14.136.135-80-50822-6,52.14.136.135,50822,172.31.69.25,80,6,20/02/2018 10:13:54,556,3,4,...,20,0,0,0,0,0.0,0,0.0,0.0,DDoS attacks-LOIC-HTTP\n
4,172.31.69.25-52.14.136.135-80-50823-6,52.14.136.135,50823,172.31.69.25,80,6,20/02/2018 10:13:54,916,3,4,...,20,0,0,0,0,0.0,0,0.0,0.0,DDoS attacks-LOIC-HTTP\n


In [7]:
a = data['Flow ID']
a.shape

(576191,)

In [13]:
unique_flows = np.unique(a)
unique_flows = np.array([item.split('-') for item in unique_flows])

In [14]:
unique_flows [:10]

array([['172.31.69.25', '18.216.200.189', '80', '49152', '6'],
       ['172.31.69.25', '18.216.200.189', '80', '49153', '6'],
       ['172.31.69.25', '18.216.200.189', '80', '49154', '6'],
       ['172.31.69.25', '18.216.200.189', '80', '49155', '6'],
       ['172.31.69.25', '18.216.200.189', '80', '49156', '6'],
       ['172.31.69.25', '18.216.200.189', '80', '49157', '6'],
       ['172.31.69.25', '18.216.200.189', '80', '49158', '6'],
       ['172.31.69.25', '18.216.200.189', '80', '49159', '6'],
       ['172.31.69.25', '18.216.200.189', '80', '49160', '6'],
       ['172.31.69.25', '18.216.200.189', '80', '49161', '6']],
      dtype='<U14')

In [15]:
unique_flows[:,1]

array(['18.216.200.189', '18.216.200.189', '18.216.200.189', ...,
       '52.14.136.135', '52.14.136.135', '52.14.136.135'], dtype='<U14')

# Attack  attacks-LOIC-HTTP

In [18]:
attackers =['18.218.115.60',
            '18.219.9.1',
            '18.219.32.43',
            '18.218.55.126',
            '52.14.136.135',
            '18.219.5.43',
            '18.216.200.189',
            '18.218.229.235',
            '18.218.11.51',
            '18.216.24.42']

for item in attackers:
    print (item in unique_flows[:,1])

True
True
True
True
True
True
True
True
True
True


In [54]:
len(unique_flows)

163539

In [53]:
flowids = [[item[1],item[0],item[3],item[2],item[4], "Attack attacks-LOIC-HTTP\n"] for item in unique_flows if item[1]in attackers]
len(flowids)

163539

In [55]:
flowids[-1]

['52.14.136.135',
 '172.31.69.25',
 '65534',
 '80',
 '6',
 'Attack attacks-LOIC-HTTP\n']

In [44]:
i=0
for item in flowids:
    if item[1] == '172.31.69.25':
        i+=1
print (i)

163539


In [56]:
uni_labeled_file = open(csv_dist_path+"uni_labels.txt", 'a')
for item in flowids:
    for initem in (item):
        if initem == item[-1]:
            split =''
        else:
            split=',' 
        uni_labeled_file.write(initem +split)
    #uni_labeled_file.write(attack)


#  Day 5: DDOS attack-HOIC AND DDoS-LOIC-UDP
### FlowID format (srcIP, dstIp, srcPort, dstPort, protocol)

In [6]:
#read processed data
day5 = read_file(files[4])
features = file_features(day5)

#fix only for day4 because the file is large
x_day5 = np.array([[item[0],item[-1]] for item in day5])
attacks = day_attack(x_day5)
attack_records,dstPort, protocol = extract_flowdata(day5, attacks)
print("Features:\n {}\n\nAttacks:{}\n\nSize of attack recods: {}".format(features,attacks,attack_records.shape))

Features:
 ['Dst Port', 'Protocol', 'Timestamp', 'Flow Duration', 'Tot Fwd Pkts', 'Tot Bwd Pkts', 'TotLen Fwd Pkts', 'TotLen Bwd Pkts', 'Fwd Pkt Len Max', 'Fwd Pkt Len Min', 'Fwd Pkt Len Mean', 'Fwd Pkt Len Std', 'Bwd Pkt Len Max', 'Bwd Pkt Len Min', 'Bwd Pkt Len Mean', 'Bwd Pkt Len Std', 'Flow Byts/s', 'Flow Pkts/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min', 'Fwd IAT Tot', 'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Max', 'Fwd IAT Min', 'Bwd IAT Tot', 'Bwd IAT Mean', 'Bwd IAT Std', 'Bwd IAT Max', 'Bwd IAT Min', 'Fwd PSH Flags', 'Bwd PSH Flags', 'Fwd URG Flags', 'Bwd URG Flags', 'Fwd Header Len', 'Bwd Header Len', 'Fwd Pkts/s', 'Bwd Pkts/s', 'Pkt Len Min', 'Pkt Len Max', 'Pkt Len Mean', 'Pkt Len Std', 'Pkt Len Var', 'FIN Flag Cnt', 'SYN Flag Cnt', 'RST Flag Cnt', 'PSH Flag Cnt', 'ACK Flag Cnt', 'URG Flag Cnt', 'CWE Flag Count', 'ECE Flag Cnt', 'Down/Up Ratio', 'Pkt Size Avg', 'Fwd Seg Size Avg', 'Bwd Seg Size Avg', 'Fwd Byts/b Avg', 'Fwd Pkts/b Avg', 'Fwd Blk Rate Av

In [7]:
records = write_to_file(attack_records, files[4])
records.head()

Unnamed: 0,Dst Port,Protocol,Timestamp,Flow Duration,Tot Fwd Pkts,Tot Bwd Pkts,TotLen Fwd Pkts,TotLen Bwd Pkts,Fwd Pkt Len Max,Fwd Pkt Len Min,...,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,80,6,21/02/2018 02:11:08,31105,3,4,336,935,336,0,...,20,0,0,0,0,0,0,0,0,DDOS attack-HOIC\n
1,80,6,21/02/2018 02:11:08,30092,3,4,336,935,336,0,...,20,0,0,0,0,0,0,0,0,DDOS attack-HOIC\n
2,80,6,21/02/2018 02:11:08,33993,3,4,336,935,336,0,...,20,0,0,0,0,0,0,0,0,DDOS attack-HOIC\n
3,80,6,21/02/2018 02:11:08,33099,3,4,336,935,336,0,...,20,0,0,0,0,0,0,0,0,DDOS attack-HOIC\n
4,80,6,21/02/2018 02:11:08,32149,3,4,336,935,336,0,...,20,0,0,0,0,0,0,0,0,DDOS attack-HOIC\n


In [11]:
records.columns

Index(['Dst Port', 'Protocol', 'Timestamp', 'Flow Duration', 'Tot Fwd Pkts',
       'Tot Bwd Pkts', 'TotLen Fwd Pkts', 'TotLen Bwd Pkts', 'Fwd Pkt Len Max',
       'Fwd Pkt Len Min', 'Fwd Pkt Len Mean', 'Fwd Pkt Len Std',
       'Bwd Pkt Len Max', 'Bwd Pkt Len Min', 'Bwd Pkt Len Mean',
       'Bwd Pkt Len Std', 'Flow Byts/s', 'Flow Pkts/s', 'Flow IAT Mean',
       'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min', 'Fwd IAT Tot',
       'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Max', 'Fwd IAT Min',
       'Bwd IAT Tot', 'Bwd IAT Mean', 'Bwd IAT Std', 'Bwd IAT Max',
       'Bwd IAT Min', 'Fwd PSH Flags', 'Bwd PSH Flags', 'Fwd URG Flags',
       'Bwd URG Flags', 'Fwd Header Len', 'Bwd Header Len', 'Fwd Pkts/s',
       'Bwd Pkts/s', 'Pkt Len Min', 'Pkt Len Max', 'Pkt Len Mean',
       'Pkt Len Std', 'Pkt Len Var', 'FIN Flag Cnt', 'SYN Flag Cnt',
       'RST Flag Cnt', 'PSH Flag Cnt', 'ACK Flag Cnt', 'URG Flag Cnt',
       'CWE Flag Count', 'ECE Flag Cnt', 'Down/Up Ratio', 'Pkt Size Avg',
      

In [10]:
records = pd.read_csv("/media/mo/HDD/intrusion_detection/dataset/AttacksRecords/Wednesday-21-02-2018_TrafficForML_CICFlowMeter.csv") 
protocols =records['Dst Port']
unique_protocols = np.unique(protocols)
unique_protocols

array([80])

# DDoS-LOIC-UDP AND DDOS attack-HOIC

In [6]:
attackers =['18.218.115.60',
            '18.219.9.1',
            '18.219.32.43',
            '18.218.55.126',
            '52.14.136.135',
            '18.219.5.43',
            '18.216.200.189',
            '18.218.229.235',
            '18.218.11.51',
            '18.216.24.42']


pcap_file1='/media/mo/HDD/intrusion_detection/dataset/OriginalNetwork TrafficandLogdata/Wednesday-21-02-2018/pcap/UCAP172.31.69.28 part 1'
pcap_file2='/media/mo/HDD/intrusion_detection/dataset/OriginalNetwork TrafficandLogdata/Wednesday-21-02-2018/pcap/UCAP172.31.69.28 part 2'
flow_file_= csv_dist_path+'day5'

cap1 = open(pcap_file1, 'rb')
get_all_flows_2(cap1, attackers, flow_file_)
cap1.close()

cap2 = open(pcap_file2, 'rb')
get_all_flows_2(cap2, attackers, flow_file_)
cap2.close()



In [10]:
unique_flows = read_unique_flows(csv_dist_path+'day5')
len(unique_flows)

141329

In [11]:
#Add two minutes for the time range, Just in case. Original attack time is > attack_time = ["10:09:00", "10:43:00"]
attack_name1 = 'DDOS-LOIC-UDP'
attack_time1 = ["14:11:00", "14:45:00"]


#Add two minutes for the time range, Just in case. Original attack time is > attack_time = ["14:05:00", "15:05:00"]
attack_name2 = 'DDOS-HOIC'
attack_time2 = ["18:07:00", "19:07:00"]

labels_file = csv_dist_path+'uni_labels.txt'
get_attacks_labels(unique_flows, attack_time1, attack_name1, labels_file)
get_attacks_labels(unique_flows, attack_time2, attack_name2, labels_file)


#  Day 6: Brute Force -Web AND Brute Force -XSS AND SQL Injection


# Brute Force -Web

In [7]:
#read processed data
day6 = read_file(files[5])
features = file_features(day6)

#fix only for day4 because the file is large
x_day6 = np.array([[item[0],item[-1]] for item in day6])
attacks = day_attack(x_day6)
attack_records,dstPort, protocol = extract_flowdata(day6, attacks)
print("Features:\n {}\n\nAttacks:{}\n\nSize of attack recods: {}".format(features,attacks,attack_records.shape))

Features:
 ['Dst Port', 'Protocol', 'Timestamp', 'Flow Duration', 'Tot Fwd Pkts', 'Tot Bwd Pkts', 'TotLen Fwd Pkts', 'TotLen Bwd Pkts', 'Fwd Pkt Len Max', 'Fwd Pkt Len Min', 'Fwd Pkt Len Mean', 'Fwd Pkt Len Std', 'Bwd Pkt Len Max', 'Bwd Pkt Len Min', 'Bwd Pkt Len Mean', 'Bwd Pkt Len Std', 'Flow Byts/s', 'Flow Pkts/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min', 'Fwd IAT Tot', 'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Max', 'Fwd IAT Min', 'Bwd IAT Tot', 'Bwd IAT Mean', 'Bwd IAT Std', 'Bwd IAT Max', 'Bwd IAT Min', 'Fwd PSH Flags', 'Bwd PSH Flags', 'Fwd URG Flags', 'Bwd URG Flags', 'Fwd Header Len', 'Bwd Header Len', 'Fwd Pkts/s', 'Bwd Pkts/s', 'Pkt Len Min', 'Pkt Len Max', 'Pkt Len Mean', 'Pkt Len Std', 'Pkt Len Var', 'FIN Flag Cnt', 'SYN Flag Cnt', 'RST Flag Cnt', 'PSH Flag Cnt', 'ACK Flag Cnt', 'URG Flag Cnt', 'CWE Flag Count', 'ECE Flag Cnt', 'Down/Up Ratio', 'Pkt Size Avg', 'Fwd Seg Size Avg', 'Bwd Seg Size Avg', 'Fwd Byts/b Avg', 'Fwd Pkts/b Avg', 'Fwd Blk Rate Av

In [9]:
records = write_to_file(attack_records, files[5])
records.head()

Unnamed: 0,Dst Port,Protocol,Timestamp,Flow Duration,Tot Fwd Pkts,Tot Bwd Pkts,TotLen Fwd Pkts,TotLen Bwd Pkts,Fwd Pkt Len Max,Fwd Pkt Len Min,...,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,80,6,22/02/2018 10:13:44,6499677,13,10,2333,4425,431,0,...,20,0,0,0,0,0,0,0,0,Brute Force -Web\n
1,80,6,22/02/2018 10:13:51,46,2,0,0,0,0,0,...,20,0,0,0,0,0,0,0,0,Brute Force -Web\n
2,80,6,22/02/2018 10:13:45,6397036,4,7,372,9375,372,0,...,20,0,0,0,0,0,0,0,0,Brute Force -Web\n
3,80,6,22/02/2018 10:13:51,6,2,0,0,0,0,0,...,20,0,0,0,0,0,0,0,0,Brute Force -Web\n
4,80,6,22/02/2018 10:13:51,5022507,4,4,599,364,599,0,...,20,0,0,0,0,0,0,0,0,Brute Force -Web\n


### This file needs time adjustment +4hours

In [12]:
#read pcap file
attackers =['18.218.115.60']

pcap_file1='/media/mo/HDD/intrusion_detection/dataset/OriginalNetwork TrafficandLogdata/Thursday-22-02-2018/pcap/UCAP172.31.69.28'
flows_file_= csv_dist_path+'day6'

cap1 = open(pcap_file1, 'rb')
get_all_flows_2(cap1, attackers,flows_file_)
cap1.close()

unique_flows = read_unique_flows(flows_file_)


#Add two minutes for the time range, Just in case. Original attack time is > attack_time = ["10:17:00", "11:24:00"]
attack_name1 = 'Brute Force-Web'
attack_time1 = ["14:17:00", "15:26:00"]


#Add two minutes for the time range, Just in case. Original attack time is > attack_time = ["16:17", "16:29:00"]
attack_name2 = 'Brute Force-XSS'
attack_time2 = ["17:50:00", "18:31:00"]


attack_name3 = 'SQL Injection'
attack_time3 = ["20:15:00", "20:31:00"]



labels_file = csv_dist_path+'uni_labels.txt'
get_attacks_labels(unique_flows, attack_time1, attack_name1, labels_file)
get_attacks_labels(unique_flows, attack_time2, attack_name2, labels_file)
get_attacks_labels(unique_flows, attack_time3, attack_name3, labels_file)


#  Day 7: Brute Force -Web AND Brute Force -XSS AND SQL Injection


In [6]:
#read processed data
day7 = read_file(files[6])
features = file_features(day7)

#fix only for day4 because the file is large
x_day7 = np.array([[item[0],item[-1]] for item in day7])
attacks = day_attack(x_day7)
attack_records,dstPort, protocol = extract_flowdata(day7, attacks)
print("Features:\n {}\n\nAttacks:{}\n\nSize of attack recods: {}".format(features,attacks,attack_records.shape))

Features:
 ['Dst Port', 'Protocol', 'Timestamp', 'Flow Duration', 'Tot Fwd Pkts', 'Tot Bwd Pkts', 'TotLen Fwd Pkts', 'TotLen Bwd Pkts', 'Fwd Pkt Len Max', 'Fwd Pkt Len Min', 'Fwd Pkt Len Mean', 'Fwd Pkt Len Std', 'Bwd Pkt Len Max', 'Bwd Pkt Len Min', 'Bwd Pkt Len Mean', 'Bwd Pkt Len Std', 'Flow Byts/s', 'Flow Pkts/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min', 'Fwd IAT Tot', 'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Max', 'Fwd IAT Min', 'Bwd IAT Tot', 'Bwd IAT Mean', 'Bwd IAT Std', 'Bwd IAT Max', 'Bwd IAT Min', 'Fwd PSH Flags', 'Bwd PSH Flags', 'Fwd URG Flags', 'Bwd URG Flags', 'Fwd Header Len', 'Bwd Header Len', 'Fwd Pkts/s', 'Bwd Pkts/s', 'Pkt Len Min', 'Pkt Len Max', 'Pkt Len Mean', 'Pkt Len Std', 'Pkt Len Var', 'FIN Flag Cnt', 'SYN Flag Cnt', 'RST Flag Cnt', 'PSH Flag Cnt', 'ACK Flag Cnt', 'URG Flag Cnt', 'CWE Flag Count', 'ECE Flag Cnt', 'Down/Up Ratio', 'Pkt Size Avg', 'Fwd Seg Size Avg', 'Bwd Seg Size Avg', 'Fwd Byts/b Avg', 'Fwd Pkts/b Avg', 'Fwd Blk Rate Av

In [7]:
records = write_to_file(attack_records, files[6])
records.head()

Unnamed: 0,Dst Port,Protocol,Timestamp,Flow Duration,Tot Fwd Pkts,Tot Bwd Pkts,TotLen Fwd Pkts,TotLen Bwd Pkts,Fwd Pkt Len Max,Fwd Pkt Len Min,...,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,80,6,23/02/2018 10:04:30,5821967,3,1,0,0,0,0,...,20,0,0,0,0,0.0,0.0,0,0,Brute Force -Web\n
1,80,6,23/02/2018 10:04:30,55332482,154,107,54442,80771,646,0,...,20,0,0,0,0,0.0,0.0,0,0,Brute Force -Web\n
2,80,6,23/02/2018 10:05:25,177,2,0,0,0,0,0,...,20,0,0,0,0,0.0,0.0,0,0,Brute Force -Web\n
3,0,0,23/02/2018 10:02:37,112640956,3,0,0,0,0,0,...,0,0,0,0,0,56320478.0,53.7401153702,56320516,56320440,Brute Force -Web\n
4,500,17,23/02/2018 10:02:50,89479476,6,0,3000,0,500,500,...,8,4000257,0,4000257,4000257,21369804.75,15281005.6764957,41990219,7200399,Brute Force -Web\n


In [13]:
#read pcap file
attackers =['18.218.115.60']

pcap_file1='/media/mo/HDD/intrusion_detection/dataset/OriginalNetwork TrafficandLogdata/Friday-23-02-2018/pcap/UCAP172.31.69.28'
flows_file_= csv_dist_path+'day7'

cap1 = open(pcap_file1, 'rb')
get_all_flows_2(cap1, attackers,flows_file_)
cap1.close()

unique_flows = read_unique_flows(flows_file_)


#Add two minutes for the time range, Just in case. 
attack_name1 = 'Brute Force-Web'
attack_time1 = ["14:03:00", "15:05:00"]


#Add two minutes for the time range, Just in case.
attack_name2 = 'Brute Force-XSS'
attack_time2 = ["17:00:00", "18:12:00"]


attack_name3 = 'SQL Injection'
attack_time3 = ["19:13:00", "19:20:00"]



labels_file = csv_dist_path+'uni_labels.txt'
get_attacks_labels(unique_flows, attack_time1, attack_name1, labels_file)
get_attacks_labels(unique_flows, attack_time2, attack_name2, labels_file)
get_attacks_labels(unique_flows, attack_time3, attack_name3, labels_file)


#  Day 8: Infiltration


In [6]:
#read processed data
day8 = read_file(files[7])
features = file_features(day8)

#fix only for day4 because the file is large
x_day8 = np.array([[item[0],item[-1]] for item in day8])
attacks = day_attack(x_day8)
attack_records,dstPort, protocol = extract_flowdata(day8, attacks)
print("Features:\n {}\n\nAttacks:{}\n\nSize of attack recods: {}".format(features,attacks,attack_records.shape))

Features:
 ['Dst Port', 'Protocol', 'Timestamp', 'Flow Duration', 'Tot Fwd Pkts', 'Tot Bwd Pkts', 'TotLen Fwd Pkts', 'TotLen Bwd Pkts', 'Fwd Pkt Len Max', 'Fwd Pkt Len Min', 'Fwd Pkt Len Mean', 'Fwd Pkt Len Std', 'Bwd Pkt Len Max', 'Bwd Pkt Len Min', 'Bwd Pkt Len Mean', 'Bwd Pkt Len Std', 'Flow Byts/s', 'Flow Pkts/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min', 'Fwd IAT Tot', 'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Max', 'Fwd IAT Min', 'Bwd IAT Tot', 'Bwd IAT Mean', 'Bwd IAT Std', 'Bwd IAT Max', 'Bwd IAT Min', 'Fwd PSH Flags', 'Bwd PSH Flags', 'Fwd URG Flags', 'Bwd URG Flags', 'Fwd Header Len', 'Bwd Header Len', 'Fwd Pkts/s', 'Bwd Pkts/s', 'Pkt Len Min', 'Pkt Len Max', 'Pkt Len Mean', 'Pkt Len Std', 'Pkt Len Var', 'FIN Flag Cnt', 'SYN Flag Cnt', 'RST Flag Cnt', 'PSH Flag Cnt', 'ACK Flag Cnt', 'URG Flag Cnt', 'CWE Flag Count', 'ECE Flag Cnt', 'Down/Up Ratio', 'Pkt Size Avg', 'Fwd Seg Size Avg', 'Bwd Seg Size Avg', 'Fwd Byts/b Avg', 'Fwd Pkts/b Avg', 'Fwd Blk Rate Av

In [7]:
records = write_to_file(attack_records, files[7])
records.head()

Unnamed: 0,Dst Port,Protocol,Timestamp,Flow Duration,Tot Fwd Pkts,Tot Bwd Pkts,TotLen Fwd Pkts,TotLen Bwd Pkts,Fwd Pkt Len Max,Fwd Pkt Len Min,...,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,443,6,28/02/2018 10:50:03,198,3,0,77,0,46,0,...,20,0,0,0,0,0,0,0,0,Infilteration\n
1,52345,6,28/02/2018 10:50:03,0,2,0,0,0,0,0,...,20,0,0,0,0,0,0,0,0,Infilteration\n
2,443,6,28/02/2018 10:50:05,223,3,0,77,0,46,0,...,20,0,0,0,0,0,0,0,0,Infilteration\n
3,443,6,28/02/2018 10:50:05,587,3,0,77,0,46,0,...,20,0,0,0,0,0,0,0,0,Infilteration\n
4,443,6,28/02/2018 10:50:05,763,3,0,77,0,46,0,...,20,0,0,0,0,0,0,0,0,Infilteration\n


In [6]:
#read pcap file
attackers =['13.58.225.34']

pcap_file1='/media/mo/HDD/intrusion_detection/dataset/OriginalNetwork TrafficandLogdata/Wednesday-28-02-2018/pcap/capEC2AMAZ-O4EL3NG-172.31.69.24- part1'
pcap_file2='/media/mo/HDD/intrusion_detection/dataset/OriginalNetwork TrafficandLogdata/Wednesday-28-02-2018/pcap/capEC2AMAZ-O4EL3NG-172.31.69.24-part2'

flows_file_= csv_dist_path+'day8'

cap1 = open(pcap_file1, 'rb')
get_all_flows_2(cap1, attackers,flows_file_)
cap1.close()

cap2 = open(pcap_file2, 'rb')
get_all_flows_2(cap2, attackers,flows_file_)
cap1.close()

unique_flows = read_unique_flows(flows_file_)


#Add two minutes for the time range, Just in case. 
attack_name1 = 'Infiltration'
attack_time1 = ["14:50:00", "16:07:00"]


#Add two minutes for the time range, Just in case.
attack_name2 = 'Infiltration'
attack_time2 = ["17:42:00", "18:42:00"]


labels_file = csv_dist_path+'uni_labels.txt'
get_attacks_labels(unique_flows, attack_time1, attack_name1, labels_file)
get_attacks_labels(unique_flows, attack_time2, attack_name2, labels_file)


#  Day 9: Infiltration


In [6]:
#read processed data
day9 = read_file(files[8])
features = file_features(day9)

#fix only for day4 because the file is large
x_day9 = np.array([[item[0],item[-1]] for item in day9])
attacks = day_attack(x_day9)
attack_records,dstPort, protocol = extract_flowdata(day9, attacks)
print("Features:\n {}\n\nAttacks:{}\n\nSize of attack recods: {}".format(features,attacks,attack_records.shape))

Features:
 ['Dst Port', 'Protocol', 'Timestamp', 'Flow Duration', 'Tot Fwd Pkts', 'Tot Bwd Pkts', 'TotLen Fwd Pkts', 'TotLen Bwd Pkts', 'Fwd Pkt Len Max', 'Fwd Pkt Len Min', 'Fwd Pkt Len Mean', 'Fwd Pkt Len Std', 'Bwd Pkt Len Max', 'Bwd Pkt Len Min', 'Bwd Pkt Len Mean', 'Bwd Pkt Len Std', 'Flow Byts/s', 'Flow Pkts/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min', 'Fwd IAT Tot', 'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Max', 'Fwd IAT Min', 'Bwd IAT Tot', 'Bwd IAT Mean', 'Bwd IAT Std', 'Bwd IAT Max', 'Bwd IAT Min', 'Fwd PSH Flags', 'Bwd PSH Flags', 'Fwd URG Flags', 'Bwd URG Flags', 'Fwd Header Len', 'Bwd Header Len', 'Fwd Pkts/s', 'Bwd Pkts/s', 'Pkt Len Min', 'Pkt Len Max', 'Pkt Len Mean', 'Pkt Len Std', 'Pkt Len Var', 'FIN Flag Cnt', 'SYN Flag Cnt', 'RST Flag Cnt', 'PSH Flag Cnt', 'ACK Flag Cnt', 'URG Flag Cnt', 'CWE Flag Count', 'ECE Flag Cnt', 'Down/Up Ratio', 'Pkt Size Avg', 'Fwd Seg Size Avg', 'Bwd Seg Size Avg', 'Fwd Byts/b Avg', 'Fwd Pkts/b Avg', 'Fwd Blk Rate Av

In [7]:
records = write_to_file(attack_records, files[8])
records.head()

Unnamed: 0,Dst Port,Protocol,Timestamp,Flow Duration,Tot Fwd Pkts,Tot Bwd Pkts,TotLen Fwd Pkts,TotLen Bwd Pkts,Fwd Pkt Len Max,Fwd Pkt Len Min,...,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,3389,6,01/03/2018 02:00:00,1669202,8,7,1128,1581,661,0,...,20,0,0,0,0,0,0.0,0,0,Infilteration\n
1,3389,6,01/03/2018 02:00:01,1665774,8,7,1128,1581,661,0,...,20,0,0,0,0,0,0.0,0,0,Infilteration\n
2,35076,6,01/03/2018 02:00:02,447631,1,1,0,0,0,0,...,32,0,0,0,0,0,0.0,0,0,Infilteration\n
3,0,0,01/03/2018 02:00:03,112634925,60,0,0,0,0,0,...,0,27300000,33700000,66100000,4952610,10200000,3990749.302,14100000,6109541,Infilteration\n
4,3389,6,01/03/2018 02:00:03,2523028,11,7,1132,1581,661,0,...,20,0,0,0,0,0,0.0,0,0,Infilteration\n


In [8]:
#read pcap file
attackers =['13.58.225.34']

pcap_file1='/media/mo/HDD/intrusion_detection/dataset/OriginalNetwork TrafficandLogdata/Thursday-01-03-2018/pcap/capEC2AMAZ-O4EL3NG-172.31.69.13 part1'
pcap_file2='/media/mo/HDD/intrusion_detection/dataset/OriginalNetwork TrafficandLogdata/Thursday-01-03-2018/pcap/capEC2AMAZ-O4EL3NG-172.31.69.13 part2'
pcap_file3='/media/mo/HDD/intrusion_detection/dataset/OriginalNetwork TrafficandLogdata/Thursday-01-03-2018/pcap/capEC2AMAZ-O4EL3NG-172.31.69.13 part3'

flows_file_= csv_dist_path+'day9'

cap1 = open(pcap_file1, 'rb')
get_all_flows_2(cap1, attackers,flows_file_)
cap1.close()

cap2 = open(pcap_file2, 'rb')
get_all_flows_2(cap2, attackers,flows_file_)
cap1.close()

cap3 = open(pcap_file3, 'rb')
get_all_flows_2(cap3, attackers,flows_file_)
cap1.close()

unique_flows = read_unique_flows(flows_file_)


#Add two minutes for the time range, Just in case. 
attack_name1 = 'Infiltration'
attack_time1 = ["13:57:00", "14:57:00"]


#Add two minutes for the time range, Just in case.
attack_name2 = 'Infiltration'
attack_time2 = ["18:00:00", "19:39:00"]



labels_file = csv_dist_path+'uni_labels.txt'
get_attacks_labels(unique_flows, attack_time1, attack_name1, labels_file)
get_attacks_labels(unique_flows, attack_time2, attack_name2, labels_file)


#  Day 10: Bot

In [6]:
#read processed data
day10 = read_file(files[9])
features = file_features(day10)

#fix only for day4 because the file is large
x_day10 = np.array([[item[0],item[-1]] for item in day10])
attacks = day_attack(x_day10)
attack_records,dstPort, protocol = extract_flowdata(day10, attacks)
print("Features:\n {}\n\nAttacks:{}\n\nSize of attack recods: {}".format(features,attacks,attack_records.shape))

Features:
 ['Dst Port', 'Protocol', 'Timestamp', 'Flow Duration', 'Tot Fwd Pkts', 'Tot Bwd Pkts', 'TotLen Fwd Pkts', 'TotLen Bwd Pkts', 'Fwd Pkt Len Max', 'Fwd Pkt Len Min', 'Fwd Pkt Len Mean', 'Fwd Pkt Len Std', 'Bwd Pkt Len Max', 'Bwd Pkt Len Min', 'Bwd Pkt Len Mean', 'Bwd Pkt Len Std', 'Flow Byts/s', 'Flow Pkts/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min', 'Fwd IAT Tot', 'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Max', 'Fwd IAT Min', 'Bwd IAT Tot', 'Bwd IAT Mean', 'Bwd IAT Std', 'Bwd IAT Max', 'Bwd IAT Min', 'Fwd PSH Flags', 'Bwd PSH Flags', 'Fwd URG Flags', 'Bwd URG Flags', 'Fwd Header Len', 'Bwd Header Len', 'Fwd Pkts/s', 'Bwd Pkts/s', 'Pkt Len Min', 'Pkt Len Max', 'Pkt Len Mean', 'Pkt Len Std', 'Pkt Len Var', 'FIN Flag Cnt', 'SYN Flag Cnt', 'RST Flag Cnt', 'PSH Flag Cnt', 'ACK Flag Cnt', 'URG Flag Cnt', 'CWE Flag Count', 'ECE Flag Cnt', 'Down/Up Ratio', 'Pkt Size Avg', 'Fwd Seg Size Avg', 'Bwd Seg Size Avg', 'Fwd Byts/b Avg', 'Fwd Pkts/b Avg', 'Fwd Blk Rate Av

In [7]:
records = write_to_file(attack_records, files[9])
records.head()

Unnamed: 0,Dst Port,Protocol,Timestamp,Flow Duration,Tot Fwd Pkts,Tot Bwd Pkts,TotLen Fwd Pkts,TotLen Bwd Pkts,Fwd Pkt Len Max,Fwd Pkt Len Min,...,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,8080,6,02/03/2018 10:17:07,16079,3,4,326,129,326,0,...,20,0,0,0,0,0,0,0,0,Bot\n
1,8080,6,02/03/2018 10:17:07,577,2,0,0,0,0,0,...,20,0,0,0,0,0,0,0,0,Bot\n
2,8080,6,02/03/2018 10:17:09,10279,3,4,326,129,326,0,...,20,0,0,0,0,0,0,0,0,Bot\n
3,8080,6,02/03/2018 10:17:09,457,2,0,0,0,0,0,...,20,0,0,0,0,0,0,0,0,Bot\n
4,8080,6,02/03/2018 10:17:11,10271,3,4,326,129,326,0,...,20,0,0,0,0,0,0,0,0,Bot\n


In [8]:
#read pcap file
attackers =['18.219.211.138']

pcap_files=['/media/mo/HDD/intrusion_detection/dataset/OriginalNetwork TrafficandLogdata/Friday-02-03-2018/pcap/capEC2AMAZ-O4EL3NG-172.31.69.23',
            '/media/mo/HDD/intrusion_detection/dataset/OriginalNetwork TrafficandLogdata/Friday-02-03-2018/pcap/capEC2AMAZ-O4EL3NG-172.31.69.17',
            '/media/mo/HDD/intrusion_detection/dataset/OriginalNetwork TrafficandLogdata/Friday-02-03-2018/pcap/capEC2AMAZ-O4EL3NG-172.31.69.14',
            '/media/mo/HDD/intrusion_detection/dataset/OriginalNetwork TrafficandLogdata/Friday-02-03-2018/pcap/capEC2AMAZ-O4EL3NG-172.31.69.12',
            '/media/mo/HDD/intrusion_detection/dataset/OriginalNetwork TrafficandLogdata/Friday-02-03-2018/pcap/capEC2AMAZ-O4EL3NG-172.31.69.10',
            '/media/mo/HDD/intrusion_detection/dataset/OriginalNetwork TrafficandLogdata/Friday-02-03-2018/pcap/capEC2AMAZ-O4EL3NG-172.31.69.8',
            '/media/mo/HDD/intrusion_detection/dataset/OriginalNetwork TrafficandLogdata/Friday-02-03-2018/pcap/capEC2AMAZ-O4EL3NG-172.31.69.6',
            '/media/mo/HDD/intrusion_detection/dataset/OriginalNetwork TrafficandLogdata/Friday-02-03-2018/pcap/capEC2AMAZ-O4EL3NG-172.31.69.26',
            '/media/mo/HDD/intrusion_detection/dataset/OriginalNetwork TrafficandLogdata/Friday-02-03-2018/pcap/capEC2AMAZ-O4EL3NG-172.31.69.29',
            '/media/mo/HDD/intrusion_detection/dataset/OriginalNetwork TrafficandLogdata/Friday-02-03-2018/pcap/capEC2AMAZ-O4EL3NG-172.31.69.30']


flows_file_= csv_dist_path+'day10'

for pcap_file in pcap_files:
    cap = open(pcap_file, 'rb')
    get_all_flows_2(cap, attackers,flows_file_)
    cap.close()

unique_flows = read_unique_flows(flows_file_)


#Add two minutes for the time range, Just in case. 
attack_name = 'Bot'
attack_time1 = ["14:11:00", "15:36:00"]
attack_time2 = ["18:24:00", "19:57:00"]

labels_file = csv_dist_path+'uni_labels.txt'
get_attacks_labels(unique_flows, attack_time1, attack_name, labels_file)
get_attacks_labels(unique_flows, attack_time2, attack_name, labels_file)
