# DATASETS AND PCAP FILE SOURCES

## UNSW-NB15
### source: https://research.unsw.edu.au/projects/unsw-nb15-dataset

## KDD CUP99 (no pcap)
### source: http://kdd.ics.uci.edu/databases/kddcup99/kddcup99.html

## CIC-IDS
### source: https://www.unb.ca/cic/datasets/ids-2017.html



# CODE

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
pd.set_option("display.max_columns", 200)
import warnings
warnings.filterwarnings('ignore')
from socket import getservbyname as getServNum
import socket
from datetime import datetime

### Read datasets and labels

In [2]:
df = pd.read_csv("./csv/1/argus.csv")                                             # dataset Argus

zconn = pd.read_csv("./csv/1/conn.log", sep='\t', skiprows = [0, 1, 2, 3, 4, 5, 7]) # dataset Zeek Conn
zconn.columns = np.concatenate([zconn.columns[1:], ['drop']])                 # mark extra column for drop
zconn.drop('drop', axis = 1, inplace = True)                                  # drop marked column

zhttp = pd.read_csv("./csv/1/http.log", sep='\t', skiprows = [0, 1, 2, 3, 4, 5, 7]) # dataset Zeek http
zhttp.columns = np.concatenate([zhttp.columns[1:], ['drop']])                 # mark extra column for drop
zhttp.drop('drop', axis = 1, inplace = True)                                  # drop marked column
# trans_depth and response_body_len

zftp = pd.read_csv("./csv/1/ftp.log", sep='\t', skiprows = [0, 1, 2, 3, 4, 5, 7])   # dataset Zeek ftp
zftp.columns = np.concatenate([zftp.columns[1:], ['drop']])                   # mark extra column for drop
zftp.drop('drop', axis = 1, inplace = True)                                   # drop marked column

print(df.shape, zconn.shape, zhttp.shape, zftp.shape)

(46260, 36) (43411, 21) (7820, 30) (4215, 19)


# Features taken from Argus and Zeek log files

### As shown in original UNSW-NB15 CSV file available online 
dur, proto, service, state, spkts, dpkts, sbytes, dbytes, rate, sttl, dttl, sload, dload, sloss, dloss, sinpkt, dinpkt, sjit, djit, swin, stcpb, dtcpb, dwin, tcprtt, synack, ackdat, smean, dmean, trans_depth, response_body_len, ct_srv_src, ct_state_ttl, ct_dst_ltm, ct_src_dport_ltm, ct_dst_sport_ltm, ct_dst_src_ltm, is_ftp_login, ct_ftp_cmd, ct_flw_http_mthd, ct_src_ltm, ct_srv_dst, is_sm_ips_ports

> ### Argus
> - 1 SrcAddr
> - 2 Sport
> - 3 DstAddr
> - 4 Dport
> - 5 Proto
> - 6 State
> - 7 dur
> - 8 SrcBytes
> - 9 DstBytes
> - 10 sTtl
> - 11 dTtl
> - 12 SrcLoss
> - 13 DstLoss
> - 14
> - 15 SrcLoad
> - 16 DstLoad
> - 17 SrcPkts
> - 18 DstPkts
> - 19 SrcWin (swin)
> - 20 DstWin (dwin)
> - 21 SrcTCPBase
> - 22 DstTCPBase
> - 23 sPktSz (smeansz/sMeanPktSz)
> - 24 dPktSz (dmeansz/dMeanPktSz)
> - 25
> - 26
> - 27 SrcJitter
> - 28 DstJitter
> - 29 StartTime
> - 30 LastTime
> - 31 SIntPkt
> - 32 DIntPkt
> - 33 TcpRtt
> - 34 SynAck
> - 35 AckDat
> - @ Rate @
> - @ TotAppByte @
> - @ PCRatio @
> - Additionally: Trans, Min, Max, Sum.

> ### conn.log
> - 14 service
> - 7 duration
> - conn_state

> ### http.log
> - 25 trans_depth
> - 26 response_body_len
> - 38! method

> ### ftp.log
> - user 
> - password
> - command 
> - Additionally: arg, mime_type, file_size, reply_code, reply_msg, data_channel.passive, data_channel.orig_h, data_channel.resp_h, data_channel.resp_p

# Formating and Merging Data

### Format Argus.csv data

In [3]:
## convert to int
def portsAsInt(x):
    if(isinstance(x,str)):   #if is string
        if x.isnumeric():
            return int(x)    #if contains only decimals
        else:
            return int(x,16) #if contains hex number
    return 0

df[['Sport','Dport']].fillna(0, inplace=True)
df = df.astype({'SrcAddr':'string', 'Sport':'string', 'DstAddr':'string', 'Dport':'string', 'Proto':'string', 'State':'string'})
df['Dport'] = df['Dport'].apply(lambda x: portsAsInt(x))
df['Sport'] = df['Sport'].apply(lambda x: portsAsInt(x))

if (df['Dport'].notna().all() and df['Sport'].notna().all()):
    if (df['Dport'].apply(lambda x: isinstance(x,int)).all() and df['Sport'].apply(lambda x: isinstance(x,int)).all()):
        print("all ports are properly parsed")
    else:
        print("not all port properly parsed")
else:
    print("some ports are NA")
    
df = df.astype({'SrcAddr':'string', 'Sport':'int32', 'DstAddr':'string', 'Dport':'int32', 'Proto':'int32', 'State':'string'})
if isinstance(df['StartTime'][0],str):
    df['StartTime'] = df['StartTime'].apply(lambda x: float(x))
    df['LastTime'] = df['LastTime'].apply(lambda x: float(x))
#    df['StartTime'] = df['StartTime'].apply(lambda x: datetime.strptime(x, '%H:%M:%S.%f').time())
#    df['LastTime'] = df['LastTime'].apply(lambda x: datetime.strptime(x, '%H:%M:%S.%f').time())
df.head(5)

all ports are properly parsed


Unnamed: 0,SrcAddr,Sport,DstAddr,Dport,Proto,State,Dur,SrcBytes,DstBytes,sTtl,dTtl,SrcLoss,DstLoss,SrcLoad,DstLoad,SrcPkts,DstPkts,SrcWin,DstWin,SrcTCPBase,DstTCPBase,sMeanPktSz,dMeanPktSz,SrcJitter,DstJitter,StartTime,LastTime,SIntPkt,DIntPkt,TcpRtt,SynAck,AckDat,Trans,Min,Max,Sum
0,175.45.176.3,22592,149.171.126.16,143,6,FIN,0.649902,734,42014,62.0,252.0,2,17,8395.112305,503571.3125,14,38,16383.0,16383.0,1417884000.0,3077388000.0,52.42857,1105.631592,61.426934,1387.778333,1424219000.0,1424219000.0,49.915,15.432865,0.0,0.0,0.0,1,0.649902,0.649902,0.649902
1,175.45.176.0,62762,149.171.126.16,56430,6,FIN,1.623129,364,13186,62.0,252.0,1,6,1572.271851,60929.230469,8,16,16383.0,16383.0,2116151000.0,2963115000.0,45.5,824.125,17179.586864,11420.926187,1424219000.0,1424219000.0,231.875578,102.737203,0.111897,0.061458,0.050439,1,1.623129,1.623129,1.623129
2,175.45.176.0,45235,149.171.126.16,21,6,FIN,1.681642,628,770,62.0,252.0,1,3,2740.178955,3358.62207,12,12,16383.0,16383.0,1107119000.0,1047443000.0,52.333332,64.166664,259.080172,4991.784691,1424219000.0,1424219000.0,152.876547,90.235727,0.0,0.0,0.0,1,1.681642,1.681642,1.681642
3,149.171.126.14,179,175.45.176.3,33159,6,FIN,0.121478,258,172,252.0,254.0,0,0,14158.942383,8495.365234,6,4,16383.0,16383.0,621772700.0,2202534000.0,43.0,43.0,30.177549,11.830604,1424219000.0,1424219000.0,24.2956,8.375,0.0,0.0,0.0,1,0.121478,0.121478,0.121478
4,175.45.176.3,43722,149.171.126.14,179,6,FIN,0.380537,534,268,254.0,252.0,2,1,10112.025391,4709.134766,10,6,16383.0,16383.0,3984156000.0,1796040000.0,53.400002,44.666668,2223.730342,82.5505,1424219000.0,1424219000.0,39.928777,52.241,0.172934,0.119331,0.053603,1,0.380537,0.380537,0.380537


## conn.log

### Format conn.log data

In [4]:
if zconn.columns.isin(['id.orig_h','id.orig_p','id.resp_h','id.resp_p']).any():
    badIndex = zconn[['id.orig_p','id.resp_p']].isna().all(axis=1)
    badIndex = badIndex[badIndex].index
    zconn.drop(badIndex, axis=0, inplace=True)
    zconn.reset_index(drop=True, inplace=True)
    zconn = zconn.astype({'id.orig_h':'string', 'id.orig_p':'int32', 'id.resp_h':'string', 'id.resp_p':'int32', 'proto':'string','service':'string'})
    
zconn.columns = ['StartTime', 'uid', 'SrcAddr', 'Sport', 'DstAddr','Dport','Proto', 'service', 'duration', 'orig_bytes', 'resp_bytes','conn_state', 'local_orig', 'local_resp', 'missed_bytes', 'history','orig_pkts', 'orig_ip_bytes', 'resp_pkts', 'resp_ip_bytes','tunnel_parents']

test = zconn['Proto']
for loc in test.index:
    if not(str(test.iloc[loc]).isnumeric()):
        if test.iloc[loc] == "tcp":
            zconn['Proto'].iloc[loc] = '6'
        if test.iloc[loc] == "udp":
            zconn['Proto'].iloc[loc] = '17'
        if test.iloc[loc] == "ipv4":
            zconn['Proto'].iloc[loc] = '4'
        if test.iloc[loc] == "icmp":
            zconn['Proto'].iloc[loc] = '1'
        if test.iloc[loc] == "igmp":
            zconn['Proto'].iloc[loc] = '2'

zconn = zconn.astype({'Proto':'int32'})

zconn['StartTime'] = zconn['StartTime'].apply(lambda x: float(x))
print("Unique protocol list :", zconn['Proto'].unique())
zconn.head(5)

Unique protocol list : [ 6 17  1]


Unnamed: 0,StartTime,uid,SrcAddr,Sport,DstAddr,Dport,Proto,service,duration,orig_bytes,resp_bytes,conn_state,local_orig,local_resp,missed_bytes,history,orig_pkts,orig_ip_bytes,resp_pkts,resp_ip_bytes,tunnel_parents
0,1424219000.0,CgC5eY1bMIRGTskljg,149.171.126.14,179,175.45.176.3,33159,6,-,0.077402,0.0,0.0,SF,-,-,0.0,AFfa,6.0,240.0,4.0,160.0,-
1,1424219000.0,CDla584K7VWGT5A5G4,175.45.176.3,22592,149.171.126.16,143,6,-,0.530764,75.0,20241.0,SF,-,-,0.0,DTdttAFfa,14.0,710.0,38.0,42002.0,-
2,1424219000.0,CVLPPyE38J0ROGxz8,175.45.176.3,43722,149.171.126.14,179,6,-,0.340929,45.0,0.0,SF,-,-,0.0,ShADTfFa,10.0,522.0,6.0,256.0,-
3,1424219000.0,CT5Kzj3cNEBjLauWV6,175.45.176.3,62994,149.171.126.14,179,6,-,0.39147,45.0,0.0,SF,-,-,0.0,ShADTfFa,10.0,522.0,6.0,256.0,-
4,1424219000.0,C7oed52eMrAeyNZdzf,175.45.176.0,62762,149.171.126.16,56430,6,-,1.543715,0.0,6259.0,SF,-,-,0.0,ShAdtFfa,8.0,352.0,16.0,13174.0,-


### Merging data from conn.log

In [5]:
DS = df.merge(zconn[['SrcAddr','Sport','DstAddr','Dport', 'Proto','StartTime','service','duration','conn_state']], how='left', left_on=['SrcAddr', 'Sport', 'DstAddr', 'Dport','Proto','StartTime'], right_on=['SrcAddr', 'Sport', 'DstAddr', 'Dport','Proto','StartTime'])
print("Flows: ",DS.shape[0], "\nFlows not in conn.log: ", df.shape[0] - df.merge(zconn[['SrcAddr','Sport','DstAddr','Dport', 'Proto','StartTime','service','duration','conn_state']], how='inner', left_on=['SrcAddr', 'Sport', 'DstAddr', 'Dport','Proto','StartTime'], right_on=['SrcAddr', 'Sport', 'DstAddr', 'Dport','Proto','StartTime']).shape[0], "\nFlows only in conn.log: ", zconn.shape[0] - df.merge(zconn[['SrcAddr','Sport','DstAddr','Dport', 'Proto','StartTime','service','duration','conn_state']], how='inner', left_on=['SrcAddr', 'Sport', 'DstAddr', 'Dport','Proto','StartTime'], right_on=['SrcAddr', 'Sport', 'DstAddr', 'Dport','Proto','StartTime']).shape[0])
DS.fillna(value={'service': '-','duration': 0,'conn_state': '-'}, inplace=True)

Flows:  46260 
Flows not in conn.log:  2872 
Flows only in conn.log:  22


## http.log

### Formating http.log

In [6]:
if zhttp.columns.isin(['ts','id.orig_h','id.orig_p','id.resp_h','id.resp_p']).any():
    zhttp.columns = ['StartTime', 'uid', 'SrcAddr', 'Sport', 'DstAddr','Dport','trans_depth', 'method', 'host', 'uri', 'referrer', 'version','user_agent', 'origin', 'request_body_len', 'response_body_len','status_code', 'status_msg', 'info_code', 'info_msg', 'tags','username', 'password', 'proxied', 'orig_fuids', 'orig_filenames','orig_mime_types', 'resp_fuids', 'resp_filenames', 'resp_mime_types']
badIndex = zhttp[['Sport','Dport']].isna().all(axis=1)
badIndex = badIndex[badIndex].index
zhttp.drop(badIndex, axis=0, inplace=True)
zhttp.reset_index(drop=True, inplace=True)
zhttp['service'] = 'http'
zhttp['Proto'] = 6
zhttp = zhttp.astype({'StartTime':'float','SrcAddr':'string', 'Sport':'int32', 'DstAddr':'string','Dport':'int32','Proto':'int32','service':'string','trans_depth':'int32','response_body_len':'int32','method':'string'})

### Merging data from http.log (port 80)

In [7]:
DS2 = DS.merge(zhttp[['SrcAddr','Sport','DstAddr','Dport','Proto','service','trans_depth','response_body_len','method']], how='left', left_on=['SrcAddr', 'Sport', 'DstAddr', 'Dport','Proto','service'], right_on=['SrcAddr', 'Sport', 'DstAddr', 'Dport','Proto','service'])
print("Flows: ", DS2.shape[0], "\nFlows not in http.log: ", DS2.shape[0] - DS.merge(zhttp[['SrcAddr','Sport','DstAddr','Dport','Proto','service','trans_depth','response_body_len','method']], how='inner', left_on=['SrcAddr', 'Sport', 'DstAddr', 'Dport','Proto','service'], right_on=['SrcAddr', 'Sport', 'DstAddr', 'Dport','Proto','service']).shape[0], "\nFlows only in http.log: ", zhttp.shape[0] - DS.merge(zhttp[['SrcAddr','Sport','DstAddr','Dport','Proto','service','trans_depth','response_body_len','method']], how='inner', left_on=['SrcAddr', 'Sport', 'DstAddr', 'Dport','Proto','service'], right_on=['SrcAddr', 'Sport', 'DstAddr', 'Dport','Proto','service']).shape[0], "\nFlows in http.log: ", zhttp.shape[0])
print("HTTP Flows in DS: ", DS[DS['service']=='http'].shape[0])
DS2.fillna(value={'trans_depth': 0,'response_body_len': 0,'method': '-'}, inplace=True)
DS2.head(5)

Flows:  49247 
Flows not in http.log:  41477 
Flows only in http.log:  49 
Flows in http.log:  7819
HTTP Flows in DS:  4783


Unnamed: 0,SrcAddr,Sport,DstAddr,Dport,Proto,State,Dur,SrcBytes,DstBytes,sTtl,dTtl,SrcLoss,DstLoss,SrcLoad,DstLoad,SrcPkts,DstPkts,SrcWin,DstWin,SrcTCPBase,DstTCPBase,sMeanPktSz,dMeanPktSz,SrcJitter,DstJitter,StartTime,LastTime,SIntPkt,DIntPkt,TcpRtt,SynAck,AckDat,Trans,Min,Max,Sum,service,duration,conn_state,trans_depth,response_body_len,method
0,175.45.176.3,22592,149.171.126.16,143,6,FIN,0.649902,734,42014,62.0,252.0,2,17,8395.112305,503571.3125,14,38,16383.0,16383.0,1417884000.0,3077388000.0,52.42857,1105.631592,61.426934,1387.778333,1424219000.0,1424219000.0,49.915,15.432865,0.0,0.0,0.0,1,0.649902,0.649902,0.649902,-,0.530764,SF,0.0,0.0,-
1,175.45.176.0,62762,149.171.126.16,56430,6,FIN,1.623129,364,13186,62.0,252.0,1,6,1572.271851,60929.230469,8,16,16383.0,16383.0,2116151000.0,2963115000.0,45.5,824.125,17179.586864,11420.926187,1424219000.0,1424219000.0,231.875578,102.737203,0.111897,0.061458,0.050439,1,1.623129,1.623129,1.623129,-,1.543715,SF,0.0,0.0,-
2,175.45.176.0,45235,149.171.126.16,21,6,FIN,1.681642,628,770,62.0,252.0,1,3,2740.178955,3358.62207,12,12,16383.0,16383.0,1107119000.0,1047443000.0,52.333332,64.166664,259.080172,4991.784691,1424219000.0,1424219000.0,152.876547,90.235727,0.0,0.0,0.0,1,1.681642,1.681642,1.681642,ftp,1.61751,SF,0.0,0.0,-
3,149.171.126.14,179,175.45.176.3,33159,6,FIN,0.121478,258,172,252.0,254.0,0,0,14158.942383,8495.365234,6,4,16383.0,16383.0,621772700.0,2202534000.0,43.0,43.0,30.177549,11.830604,1424219000.0,1424219000.0,24.2956,8.375,0.0,0.0,0.0,1,0.121478,0.121478,0.121478,-,0.077402,SF,0.0,0.0,-
4,175.45.176.3,43722,149.171.126.14,179,6,FIN,0.380537,534,268,254.0,252.0,2,1,10112.025391,4709.134766,10,6,16383.0,16383.0,3984156000.0,1796040000.0,53.400002,44.666668,2223.730342,82.5505,1424219000.0,1424219000.0,39.928777,52.241,0.172934,0.119331,0.053603,1,0.380537,0.380537,0.380537,-,0.340929,SF,0.0,0.0,-


## ftp.log

### Formating ftp.log data

In [8]:
if zftp.columns.isin(['id.orig_h','id.orig_p','id.resp_h','id.resp_p']).any():
    zftp.columns = ['StartTime', 'uid', 'SrcAddr', 'Sport', 'DstAddr','Dport','user','password','command','arg','mime_type','file_size','reply_code','reply_msg','data_channel.passive','data_channel.orig_h','data_channel.resp_h','data_channel.resp_p','fuid']
badIndex = zftp[['Sport','Dport']].isna().all(axis=1)
badIndex = badIndex[badIndex].index
zftp.drop(badIndex, axis=0, inplace=True)
zftp.reset_index(drop=True, inplace=True)
zftp['service'] = 'ftp'
zftp['Proto'] = 6
zftp = zftp.astype({'StartTime':'float','SrcAddr':'string', 'Sport':'int32', 'DstAddr':'string','Dport':'int32','Proto':'int32','service':'string','user':'string','password':'string', 'command':'string'})

### Merging data from ftp.log (port 21)

In [9]:
DS3 = DS2.merge(zftp[['SrcAddr','Sport','DstAddr','Dport','Proto','service','user','password','command']], how='left', left_on=['SrcAddr', 'Sport', 'DstAddr', 'Dport','Proto','service'], right_on=['SrcAddr', 'Sport', 'DstAddr', 'Dport','Proto','service'])
print("Flows in DS3: ", DS3.shape[0], "\nFlows in ftp.log: ", zftp.shape[0], "\nFlow in both: ", DS2.merge(zftp[['SrcAddr','Sport','DstAddr','Dport','Proto','service','user','password','command']], how='inner', left_on=['SrcAddr', 'Sport', 'DstAddr', 'Dport','Proto','service'], right_on=['SrcAddr', 'Sport', 'DstAddr', 'Dport','Proto','service']).shape[0], "\nNew Flows: ", DS3.shape[0]-DS2.shape[0])
print("Non repeated in zftp", zftp[zftp.duplicated(subset=['SrcAddr', 'Sport', 'DstAddr', 'Dport','Proto','service'], keep='first')].shape[0])
DS3.fillna(value={'user': '-','password': '-','command': '-'}, inplace=True)
DS3[DS3['service']=='ftp'].head(5)

Flows in DS3:  52592 
Flows in ftp.log:  4214 
Flow in both:  4214 
New Flows:  3345
Non repeated in zftp 3345


Unnamed: 0,SrcAddr,Sport,DstAddr,Dport,Proto,State,Dur,SrcBytes,DstBytes,sTtl,dTtl,SrcLoss,DstLoss,SrcLoad,DstLoad,SrcPkts,DstPkts,SrcWin,DstWin,SrcTCPBase,DstTCPBase,sMeanPktSz,dMeanPktSz,SrcJitter,DstJitter,StartTime,LastTime,SIntPkt,DIntPkt,TcpRtt,SynAck,AckDat,Trans,Min,Max,Sum,service,duration,conn_state,trans_depth,response_body_len,method,user,password,command
2,175.45.176.0,45235,149.171.126.16,21,6,FIN,1.681642,628,770,62.0,252.0,1,3,2740.178955,3358.62207,12,12,16383.0,16383.0,1107119000.0,1047443000.0,52.333332,64.166664,259.080172,4991.784691,1424219000.0,1424219000.0,152.876547,90.235727,0.0,0.0,0.0,1,1.681642,1.681642,1.681642,ftp,1.61751,SF,0.0,0.0,-,<unknown>,-,RETR
23,175.45.176.3,1025,149.171.126.11,21,6,CON,0.964656,690,950,62.0,252.0,5,6,5315.884766,7223.300293,14,12,16383.0,16383.0,996917600.0,59266790.0,49.285713,79.166664,3869.615403,65.75518,1424219000.0,1424219000.0,74.204305,59.700727,0.112429,0.052589,0.05984,1,0.964656,0.964656,0.964656,ftp,0.964656,S1,0.0,0.0,-,anonymous,IEUser@,PASV
186,175.45.176.1,28136,149.171.126.11,21,6,FIN,2.562474,1166,1792,62.0,252.0,7,9,3474.766846,5382.29834,22,26,16383.0,16383.0,1491364000.0,1967567000.0,53.0,68.92308,8291.316651,6151.716475,1424219000.0,1424219000.0,122.02257,101.706,0.046345,0.019188,0.027157,1,2.562474,2.562474,2.562474,ftp,2.494518,SF,0.0,0.0,-,anonymous,IEUser@,PASV
187,175.45.176.1,28136,149.171.126.11,21,6,FIN,2.562474,1166,1792,62.0,252.0,7,9,3474.766846,5382.29834,22,26,16383.0,16383.0,1491364000.0,1967567000.0,53.0,68.92308,8291.316651,6151.716475,1424219000.0,1424219000.0,122.02257,101.706,0.046345,0.019188,0.027157,1,2.562474,2.562474,2.562474,ftp,2.494518,SF,0.0,0.0,-,anonymous,IEUser@,RETR
225,175.45.176.2,43663,149.171.126.15,21,6,FIN,1.434179,1194,1622,62.0,252.0,7,9,6359.039062,8640.483398,22,22,16383.0,16383.0,3121205000.0,1699268000.0,54.272728,73.727272,3674.251123,3695.707862,1424219000.0,1424219000.0,68.294234,64.34043,0.106371,0.059237,0.047134,1,1.434179,1.434179,1.434179,ftp,1.391192,SF,0.0,0.0,-,anonymous,IEUser@,PASV


## Fitting into UNSW-NB15 format

In [10]:
DS = DS3[['SrcAddr', 'Sport', 'DstAddr', 'Dport', 'Proto', 'State', 'Dur','SrcBytes', 'DstBytes', 'sTtl', 'dTtl',
           'SrcLoss', 'DstLoss','service', 'SrcLoad', 'DstLoad', 'SrcPkts', 'DstPkts', 'SrcWin', 'DstWin', 'SrcTCPBase',
           'DstTCPBase', 'sMeanPktSz', 'dMeanPktSz', 'trans_depth','response_body_len', 'SrcJitter', 'DstJitter','StartTime',
           'LastTime', 'SIntPkt', 'DIntPkt', 'TcpRtt', 'SynAck', 'AckDat', 'Trans', 'Min',
           'Max', 'Sum', 'duration', 'conn_state', 'method', 'user', 'password', 'command']]
DS.columns = ['srcip', 'sport', 'dstip', 'dport', 'proto', 'state', 'dur', 'sbytes', 'dbytes', 'sttl', 'dttl',
               'sloss', 'dloss', 'service', 'sload', 'dload', 'spkts', 'dpkts', 'swin', 'dwin', 'stcpb',
               'dtcpb', 'smeansz', 'dmeansz', 'trans_depth', 'res_bdy_len', 'sjit', 'djit', 'stime',
               'ltime', 'sintpkt', 'dintpkt', 'tcprtt', 'synack', 'ackdat', 'Trans', 'Min',
               'Max', 'Sum', 'duration', 'conn_state', 'method', 'user', 'password', 'command']
print(DS.shape)
DS.head(5)

(52592, 45)


Unnamed: 0,srcip,sport,dstip,dport,proto,state,dur,sbytes,dbytes,sttl,dttl,sloss,dloss,service,sload,dload,spkts,dpkts,swin,dwin,stcpb,dtcpb,smeansz,dmeansz,trans_depth,res_bdy_len,sjit,djit,stime,ltime,sintpkt,dintpkt,tcprtt,synack,ackdat,Trans,Min,Max,Sum,duration,conn_state,method,user,password,command
0,175.45.176.3,22592,149.171.126.16,143,6,FIN,0.649902,734,42014,62.0,252.0,2,17,-,8395.112305,503571.3125,14,38,16383.0,16383.0,1417884000.0,3077388000.0,52.42857,1105.631592,0.0,0.0,61.426934,1387.778333,1424219000.0,1424219000.0,49.915,15.432865,0.0,0.0,0.0,1,0.649902,0.649902,0.649902,0.530764,SF,-,-,-,-
1,175.45.176.0,62762,149.171.126.16,56430,6,FIN,1.623129,364,13186,62.0,252.0,1,6,-,1572.271851,60929.230469,8,16,16383.0,16383.0,2116151000.0,2963115000.0,45.5,824.125,0.0,0.0,17179.586864,11420.926187,1424219000.0,1424219000.0,231.875578,102.737203,0.111897,0.061458,0.050439,1,1.623129,1.623129,1.623129,1.543715,SF,-,-,-,-
2,175.45.176.0,45235,149.171.126.16,21,6,FIN,1.681642,628,770,62.0,252.0,1,3,ftp,2740.178955,3358.62207,12,12,16383.0,16383.0,1107119000.0,1047443000.0,52.333332,64.166664,0.0,0.0,259.080172,4991.784691,1424219000.0,1424219000.0,152.876547,90.235727,0.0,0.0,0.0,1,1.681642,1.681642,1.681642,1.61751,SF,-,<unknown>,-,RETR
3,149.171.126.14,179,175.45.176.3,33159,6,FIN,0.121478,258,172,252.0,254.0,0,0,-,14158.942383,8495.365234,6,4,16383.0,16383.0,621772700.0,2202534000.0,43.0,43.0,0.0,0.0,30.177549,11.830604,1424219000.0,1424219000.0,24.2956,8.375,0.0,0.0,0.0,1,0.121478,0.121478,0.121478,0.077402,SF,-,-,-,-
4,175.45.176.3,43722,149.171.126.14,179,6,FIN,0.380537,534,268,254.0,252.0,2,1,-,10112.025391,4709.134766,10,6,16383.0,16383.0,3984156000.0,1796040000.0,53.400002,44.666668,0.0,0.0,2223.730342,82.5505,1424219000.0,1424219000.0,39.928777,52.241,0.172934,0.119331,0.053603,1,0.380537,0.380537,0.380537,0.340929,SF,-,-,-,-


In [11]:
#--------------------------#
# General Purpose Features #
#--------------------------#

#'is_sm_ips_ports'
DS['is_sm_ips_ports'] = DS['srcip'] == DS['dstip']
DS['is_sm_ips_ports'].replace(to_replace={True: 1, False: 0}, inplace=True)
# DS.fillna(value={'is_sm_ips_ports': 0}, inplace=True)

# 'ct_state_ttl'
teste = DS.groupby(['state','sttl','dttl'], as_index=False).size()
for line in teste.index:
    DS.loc[ (DS['state'] == teste.iloc[line,0]) & (DS['sttl'] == teste.iloc[line,1]) & (DS['dttl'] == teste.iloc[line,2]),
           'ct_state_ttl'] = teste.iloc[line,3]

DS.fillna(value={'ct_state_ttl': 0}, inplace=True)
DS['ct_state_ttl'] = DS['ct_state_ttl'].apply(int)
#DS['ct_state_ttl']

# 'ct_flw_http_mthd' 
teste = DS.groupby(['method'], as_index=False).size()
teste.loc[ teste['method'] == '-', 'size'] = 0
for line in teste.index:
    DS.loc[ DS['method'] == teste.iloc[line,0] , 'ct_flw_http_mthd'] = teste.iloc[line,1]
    
DS.fillna(value={'ct_flw_http_mthd': 0}, inplace=True)
DS['ct_flw_http_mthd'] = DS['ct_flw_http_mthd'].apply(int)
DS['ct_flw_http_mthd'].unique()

# is_ftp_login
DS['is_ftp_login'] = '-'
DS.loc[ (DS['user'] == '-') | (DS['user'] == '<unknown>') | (DS['user'] == 'anonymous') | (DS['password'] == '-'), 'is_ftp_login'] = 0
DS.loc[ (DS['is_ftp_login'] != 0) & (DS['service'] == 'ftp'), 'is_ftp_login'] = 1
DS[(DS['is_ftp_login'] == 1)]

# ct_ftp_cmd
teste = DS[DS['service']=='ftp'].groupby(['srcip','dstip','sport','dport','command'], as_index=False).size()
teste.drop(index=teste[teste['command']=='-'].index, inplace=True)
teste = teste.groupby(['srcip','dstip','sport','dport'], as_index=False).size()
teste['service'] = 'ftp'
teste.rename(columns={"size":"ct_ftp_cmd"}, inplace=True)
if not(DS.columns.str.contains('ct_ftp_cmd', regex=False).any()):
    DS = DS.merge(teste, how='left', left_on=['srcip','dstip','sport','dport','service'],
                                right_on=['srcip','dstip','sport','dport','service'])
    DS.fillna(value={'ct_ftp_cmd': 0}, inplace=True)
    DS['ct_ftp_cmd'] = DS['ct_ftp_cmd'].apply(int)
teste.columns

#---------------------#
# Connection Features #
#---------------------#
DS.sort_values('ltime', inplace=True, kind='mergesort', ignore_index=True)

for indice in range(len(DS.index)):
    if indice == 0:
        DS[['ct_srv_src', 'ct_srv_dst', 'ct_dst_ltm', 'ct_src_ltm', 'ct_src_dport_ltm', 'ct_dst_sport_ltm', 'ct_dst_src_ltm']] = np.zeros(7,dtype=int)
        continue
    temp = min(101,indice)
    priors = DS.iloc[range(indice-temp,indice)]
    
    # ct_srv_src
    teste = priors.groupby(['srcip','service'], as_index=False).size()
    teste = teste[(teste[['srcip','service']] == DS[['srcip','service']].iloc[indice]).all(axis=1)]['size']
    if not teste.empty:
        DS.at[indice,'ct_srv_src'] = teste.iloc[0]
    
    # ct_srv_dst
    teste = priors.groupby(['dstip','service'], as_index=False).size()
    teste = teste[(teste[['dstip','service']] == DS[['dstip','service']].iloc[indice]).all(axis=1)]['size']
    if not teste.empty:
        DS.at[indice,'ct_srv_dst'] = teste.iloc[0]
    
    # ct_dst_ltm 
    teste = priors.groupby(['dstip'], as_index=False).size()
    teste = teste[(teste['dstip'] == DS['dstip'].iloc[indice])]['size']
    if not teste.empty:
        DS.at[indice,'ct_dst_ltm'] = teste.iloc[0]
    
    # ct_src_ltm
    teste = priors.groupby(['srcip'], as_index=False).size()
    teste = teste[(teste['srcip'] == DS['srcip'].iloc[indice])]['size']
    if not teste.empty:
        DS.at[indice,'ct_src_ltm'] = teste.iloc[0]
    
    # ct_src_dport_ltm
    teste = priors.groupby(['srcip','dport'], as_index=False).size()
    teste = teste[(teste[['srcip','dport']] == DS[['srcip','dport']].iloc[indice]).all(axis=1)]['size']
    if not teste.empty:
        DS.at[indice,'ct_src_dport_ltm'] = teste.iloc[0]
    
    # ct_dst_sport_ltm
    teste = priors.groupby(['dstip','sport'], as_index=False).size()
    teste = teste[(teste[['dstip','sport']] == DS[['dstip','sport']].iloc[indice]).all(axis=1)]['size']
    if not teste.empty:
        DS.at[indice,'ct_dst_sport_ltm'] = teste.iloc[0]
    
    # ct_dst_src_ltm
    teste = priors.groupby(['srcip','dstip'], as_index=False).size()
    teste = teste[(teste[['srcip','dstip']] == DS[['srcip','dstip']].iloc[indice]).all(axis=1)]['size']
    if not teste.empty:
        DS.at[indice,'ct_dst_src_ltm'] = teste.iloc[0]
    
DS.head(5)

Unnamed: 0,srcip,sport,dstip,dport,proto,state,dur,sbytes,dbytes,sttl,dttl,sloss,dloss,service,sload,dload,spkts,dpkts,swin,dwin,stcpb,dtcpb,smeansz,dmeansz,trans_depth,res_bdy_len,sjit,djit,stime,ltime,sintpkt,dintpkt,tcprtt,synack,ackdat,Trans,Min,Max,Sum,duration,conn_state,method,user,password,command,is_sm_ips_ports,ct_state_ttl,ct_flw_http_mthd,is_ftp_login,ct_ftp_cmd,ct_srv_src,ct_srv_dst,ct_dst_ltm,ct_src_ltm,ct_src_dport_ltm,ct_dst_sport_ltm,ct_dst_src_ltm
0,149.171.126.14,179,175.45.176.3,33159,6,FIN,0.121478,258,172,252.0,254.0,0,0,-,14158.942383,8495.365234,6,4,16383.0,16383.0,621772700.0,2202534000.0,43.0,43.0,0.0,0.0,30.177549,11.830604,1424219000.0,1424219000.0,24.2956,8.375,0.0,0.0,0.0,1,0.121478,0.121478,0.121478,0.077402,SF,-,-,-,-,0,1,0,0,0,0,0,0,0,0,0,0
1,175.45.176.3,22592,149.171.126.16,143,6,FIN,0.649902,734,42014,62.0,252.0,2,17,-,8395.112305,503571.3125,14,38,16383.0,16383.0,1417884000.0,3077388000.0,52.42857,1105.631592,0.0,0.0,61.426934,1387.778333,1424219000.0,1424219000.0,49.915,15.432865,0.0,0.0,0.0,1,0.649902,0.649902,0.649902,0.530764,SF,-,-,-,-,0,834,0,0,0,0,0,0,0,0,0,0
2,175.45.176.3,43722,149.171.126.14,179,6,FIN,0.380537,534,268,254.0,252.0,2,1,-,10112.025391,4709.134766,10,6,16383.0,16383.0,3984156000.0,1796040000.0,53.400002,44.666668,0.0,0.0,2223.730342,82.5505,1424219000.0,1424219000.0,39.928777,52.241,0.172934,0.119331,0.053603,1,0.380537,0.380537,0.380537,0.340929,SF,-,-,-,-,0,2645,0,0,0,1,0,0,1,0,0,0
3,175.45.176.3,62994,149.171.126.14,179,6,FIN,0.449454,534,268,254.0,252.0,2,1,-,8561.499023,3987.059814,10,6,16383.0,16383.0,2436138000.0,1977154000.0,53.400002,44.666668,0.0,0.0,2415.837634,115.806992,1424219000.0,1424219000.0,47.750332,75.659602,0.128381,0.071147,0.057234,1,0.449454,0.449454,0.449454,0.39147,SF,-,-,-,-,0,2645,0,0,0,2,1,1,2,1,0,1
4,175.45.176.0,62762,149.171.126.16,56430,6,FIN,1.623129,364,13186,62.0,252.0,1,6,-,1572.271851,60929.230469,8,16,16383.0,16383.0,2116151000.0,2963115000.0,45.5,824.125,0.0,0.0,17179.586864,11420.926187,1424219000.0,1424219000.0,231.875578,102.737203,0.111897,0.061458,0.050439,1,1.623129,1.623129,1.623129,1.543715,SF,-,-,-,-,0,834,0,0,0,0,1,1,0,0,0,0


# Labels

In [12]:
if DS.columns.isin(['Trans', 'Min', 'Max', 'Sum', 'duration', 'conn_state', 'method', 'user', 'password', 'command']).any():
    DS.drop(['Trans', 'Min', 'Max', 'Sum', 'duration', 'conn_state', 'method', 'user', 'password', 'command'], axis = 1, inplace = True)
labels = pd.read_csv("./labels/NUSW-NB15_GT.csv")
labels.head(5)

Unnamed: 0,Start time,Last time,Attack category,Attack subcategory,Protocol,Source IP,Source Port,Destination IP,Destination Port,Attack Name,Attack Reference,.
0,1421927414,1421927416,Reconnaissance,HTTP,tcp,175.45.176.0,13284,149.171.126.16,80,Domino Web Server Database Access: /doladmin.n...,-,.
1,1421927415,1421927415,Exploits,Unix 'r' Service,udp,175.45.176.3,21223,149.171.126.18,32780,Solaris rwalld Format String Vulnerability (ht...,CVE 2002-0573 (http://cve.mitre.org/cgi-bin/cv...,.
2,1421927416,1421927416,Exploits,Browser,tcp,175.45.176.2,23357,149.171.126.16,80,Windows Metafile (WMF) SetAbortProc() Code Exe...,CVE 2005-4560 (http://cve.mitre.org/cgi-bin/cv...,.
3,1421927417,1421927417,Exploits,Miscellaneous Batch,tcp,175.45.176.2,13792,149.171.126.16,5555,HP Data Protector Backup (https://strikecenter...,CVE 2011-1729 (http://cve.mitre.org/cgi-bin/cv...,.
4,1421927418,1421927418,Exploits,Cisco IOS,tcp,175.45.176.2,26939,149.171.126.10,80,Cisco IOS HTTP Authentication Bypass Level 64 ...,CVE 2001-0537 (http://cve.mitre.org/cgi-bin/cv...,.


### insert attack category and label

In [13]:
labels.rename(columns={'Source IP':'srcip', 'Destination IP':'dstip', 'Source Port':'sport',
                             'Destination Port':'dport', 'Attack category':'attack_cat'}, inplace=True)
labels['attack_cat'] = labels['attack_cat'].str.strip()
if DS.columns.isin(['Label','attack_cat']).any():
    DS.drop(['Label','attack_cat'], axis = 1, inplace = True)
DS = DS.merge(labels[['srcip','dstip','sport','dport','attack_cat']], how='left',
         left_on=['srcip','dstip','sport','dport'],
         right_on=['srcip','dstip','sport','dport'])
DS.fillna(value={'attack_cat': '-'}, inplace=True)
DS['Label'] = 1
DS.loc[DS['attack_cat'] == '-','Label'] = 0
DS.groupby(['attack_cat','Label'], as_index=False).size()

Unnamed: 0,attack_cat,Label,size
0,-,0,51418
1,Analysis,1,243
2,Backdoor,1,1704
3,Backdoors,1,88
4,DoS,1,15277
5,Exploits,1,37873
6,Fuzzers,1,12530
7,Generic,1,3162
8,Reconnaissance,1,5104
9,Shellcode,1,20


In [14]:
DS['attack_cat'].unique()

array(['-', 'Exploits', 'Fuzzers', 'Reconnaissance', 'Backdoor', 'DoS',
       'Generic', 'Analysis', 'Shellcode', 'Worms', 'Backdoors'],
      dtype=object)

In [15]:
DS.to_csv('./output/myNB15.csv', index=None, header=True)

# END!

# CIC Flow Meter

### source: https://pypi.org/project/cicflowmeter/


| Feature Name | Description |
| :--- | :---: |
| Feduration | Duration of the flow in Microsecond |
| Flow Feduration | Duration of the flow in Microsecond |
| total FWwd Packet | Total packets in the forward direction |
| total Bwd packets | Total packets in the backward direction |
| total Length of Fwd Packet | Total size of packet in forward direction |
| total Length of Bwd Packet | Total size of packet in backward direction |
| Fwd Packet Length Min | Minimum size of packet in forward direction |
| Fwd Packet Length Max | Maximum size of packet in forward direction |
| Fwd Packet Length Mean | Mean size of packet in forward direction |
| Fwd Packet Length Std | Standard deviation size of packet in forward direction |
| Bwd Packet Length Min | Minimum size of packet in backward direction |
| Bwd Packet Length Max | Maximum size of packet in backward direction |
| Bwd Packet Length Mean | Mean size of packet in backward direction |
| Bwd Packet Length Std | Standard deviation size of packet in backward direction |
| Flow Byte/s | Number of flow packets per second |
| Flow Packets/s | Number of flow bytes per second |
| Flow IAT Mean | Mean time between two packets sent in the flow |
| Flow IAT Std | Standard deviation time between two packets sent in the flow |
| Flow IAT Max | Maximum time between two packets sent in the flow |
| Flow IAT Min | Minimum time between two packets sent in the flow |
| Fwd IAT Min | Minimum time between two packets sent in the forward direction |
| Fwd IAT Max | Maximum time between two packets sent in the forward direction |
| Fwd IAT Mean | Mean time between two packets sent in the forward direction |
| Fwd IAT Std | Standard deviation time between two packets sent in the forward direction |
| Fwd IAT Total | Total time between two packets sent in the forward direction |
| Bwd IAT Min | Minimum time between two packets sent in the backward direction|
| Bwd IAT Max | Maximum time between two packets sent in the backward direction |
| Bwd IAT Mean | Mean time between two packets sent in the backward direction |
| Bwd IAT Std | Standard deviation time between two packets sent in the backward direction |
| Bwd IAT Total | Total time between two packets sent in the backward direction |
| Fwd PSH flag | Number of times the PSH flag was set in packets travelling in the forward direction (0 for UDP) |
| Bwd PSH Flag | Number of times the PSH flag was set in packets travelling in the backward direction (0 for UDP) |
| Fwd URG Flag | Number of times the URG flag was set in packets travelling in the forward direction (0 for UDP) |
| Bwd URG Flag | Number of times the URG flag was set in packets travelling in the backward direction (0 for UDP) |
| Fwd Header Length | Total bytes used for headers in the forward direction |
| Bwd Header Length | Total bytes used for headers in the backward direction |
| FWD Packets/s | Number of forward packets per second |
| Bwd Packets/s | Number of backward packets per second |
| Min Packet Length | Minimum length of a packet |
| Max Packet Length | Maximum length of a packet |
| Packet Length Mean | Mean length of a packet |
| Packet Length Std | Standard deviation length of a packet |
| Packet Length Variance | Variance length of a packet |
| FIN Flag Count | Number of packets with FIN |
| SYN Flag Count | Number of packets with SYN |
| RST Flag Count | Number of packets with RST |
| PSH Flag Count | Number of packets with PUSH |
| ACK Flag Count | Number of packets with ACK |
| URG Flag Count | Number of packets with URG |
| CWR Flag Count | Number of packets with CWE |
| ECE Flag Count | Number of packets with ECE |
| down/Up Ratio | Download and upload ratio |
| Average Packet Size | Average size of packet |
| Avg Fwd Segment Size | Average size observed in the forward direction |
| AVG Bwd Segment Size | Average number of bytes bulk rate in the forward direction |
| Fwd Header Length | Length of header for forward packet |
| Fwd Avg Bytes/Bulk | Average number of bytes bulk rate in the forward direction |
| Fwd AVG Packet/Bulk | Average number of packets bulk rate in the forward direction |
| Fwd AVG Bulk Rate | Average number of bulk rate in the forward direction |
| Bwd Avg Bytes/Bulk | Average number of bytes bulk rate in the backward direction |
| Bwd AVG Packet/Bulk | Average number of packets bulk rate in the backward direction |
| Bwd AVG Bulk Rate | Average number of bulk rate in the backward direction |
| Subflow Fwd Packets | The average number of packets in a sub flow in the forward direction |
| Subflow Fwd Bytes | The average number of bytes in a sub flow in the forward direction |
| Subflow Bwd Packet | The average number of packets in a sub flow in the backward direction |
| Subflow Bwd Bytes | The average number of bytes in a sub flow in the backward direction |
| Init_Win_bytes_forward | The total number of bytes sent in initial window in the forward direction |
| Init_Win_bytes_backward | The total number of bytes sent in initial window in the backward direction |
| Act_data_pkt_forward | Count of packets with at least 1 byte of TCP data payload in the forward direction |
| min_seg_size_forward | Minimum segment size observed in the forward direction |
| Active Min | Minimum time a flow was active before becoming idle |
| Active Mean | Mean time a flow was active before becoming idle |
| Active Max | Maximum time a flow was active before becoming idle |
| Active Std | Standard deviation time a flow was active before becoming idle |
| Idle Min | Minimum time a flow was idle before becoming active |
| Idle Mean | Mean time a flow was idle before becoming active |
| Idle Max | Maximum time a flow was idle before becoming active |
| Idle Std | Standard deviation time a flow was idle before becoming active |
| total_fpackets | Total packets in the forward direction |
| total_bpackets | Total packets in the backward direction |
| total_fpktl | Total size of packet in forward direction |
| total_bpktl | Total size of packet in backward direction |
| min_fpktl | Minimum size of packet in forward direction |
| min_bpktl | Minimum size of packet in backward direction |
| max_fpktl | Maximum size of packet in forward direction |
| max_bpktl | Maximum size of packet in backward direction |
| mean_fpktl | Mean size of packet in forward direction |
| mean_bpktl | Mean size of packet in backward direction |
| std_fpktl | Standard deviation size of packet in forward direction |
| std_bpktl | Standard deviation size of packet in backward direction |
| total_fiat | Total time between two packets sent in the forward direction |
| total_biat | Total time between two packets sent in the backward direction |
| min_fiat | Minimum time between two packets sent in the forward direction |
| min_biat | Minimum time between two packets sent in the backward direction |
| max_fiat | Maximum time between two packets sent in the forward direction |
| max_biat | Maximum time between two packets sent in the backward direction |
| mean_fiat | Mean time between two packets sent in the forward direction |
| mean_biat | Mean time between two packets sent in the backward direction |
| std_fiat | Standard deviation time between two packets sent in the forward direction |
| std_biat | Standard deviation time between two packets sent in the backward direction |
| fpsh_cnt | Number of times the PSH flag was set in packets travelling in the forward direction (0 for UDP) |
| bpsh_cnt | Number of times the PSH flag was set in packets travelling in the backward direction (0 for UDP) |
| furg_cnt | Number of times the URG flag was set in packets travelling in the forward direction (0 for UDP) |
| burg_cnt | Number of times the URG flag was set in packets travelling in the backward direction (0 for UDP) |
| total_fhlen | Total bytes used for headers in the forward direction |
| total_bhlen | Total bytes used for headers in the backward direction |
| fPktsPerSecond | Number of forward packets per second |
| bPktsPerSecond | Number of backward packets per second |
| flowPktsPerSecond | Number of flow packets per second |
| flowBytesPerSecond | Number of flow bytes per second |
| min_flowpktl | Minimum length of a flow |
| max_flowpktl | Maximum length of a flow |
| mean_flowpktl | Mean length of a flow |
| std_flowpktl | Standard deviation length of a flow |
| min_flowiat | Minimum inter-arrival time of packet |
| max_flowiat | Maximum inter-arrival time of packet |
| mean_flowiat | Mean inter-arrival time of packet |
| std_flowiat | Standard deviation inter-arrival time of packet |
| flow_fin | Number of packets with FIN |
| flow_syn | Number of packets with SYN |
| flow_rst | Number of packets with RST |
| flow_psh | Number of packets with PUSH |
| flow_ack | Number of packets with ACK |
| flow_urg | Number of packets with URG |
| flow_cwr | Number of packets with CWE |
| flow_ece | Number of packets with ECE |
| downUpRatio | Download and upload ratio |
| avgPacketSize | Average size of packet |
| fAvgSegmentSize | Average size observed in the forward direction |
| fAvgBytesPerBulk | Average number of bytes bulk rate in the forward direction |
| fAvgPacketsPerBulk | Average number of packets bulk rate in the forward direction |
| fAvgBulkRate | Average number of bulk rate in the forward direction |
| bAvgSegmentSize | Average size observed in the backward direction |
| bAvgBytesPerBulk | Average number of bytes bulk rate in the backward direction |
| bAvgPacketsPerBulk | Average number of packets bulk rate in the backward direction |
| bAvgBulkRate | Average number of bulk rate in the backward direction |
| sflow_fpacket | The average number of packets in a sub flow in the forward direction |
| sflow_fbytes | The average number of bytes in a sub flow in the forward direction |
| sflow_bpacket | The average number of packets in a sub flow in the backward direction |
| sflow_bbytes | The average number of bytes in a sub flow in the backward direction |
| min_active | Minimum time a flow was active before becoming idle |
| mean_active | Mean time a flow was active before becoming idle |
| max_active | Maximum time a flow was active before becoming idle |
| std_active | Standard deviation time a flow was active before becoming idle |
| min_idle | Minimum time a flow was idle before becoming active |
| mean_idle | Mean time a flow was idle before becoming active |
| max_idle | Maximum time a flow was idle before becoming active |
| std_idle | Standard deviation time a flow was idle before becoming active |
| Init_Win_bytes_forward | The total number of bytes sent in initial window in the forward direction |
| Init_Win_bytes_backward | The total number of bytes sent in initial window in the backward direction |
| Act_data_pkt_forward | Count of packets with at least 1 byte of TCP data payload in the forward direction |
| min_seg_size_forward | Minimum segment size observed in the forward direction |

In [16]:
cicfm = pd.read_csv("./csv/1/cic.csv", sep=',') # dataset CICFlow Meter
cicfm.head(2)

Unnamed: 0,src_ip,dst_ip,src_port,dst_port,protocol,timestamp,flow_duration,flow_byts_s,flow_pkts_s,fwd_pkts_s,bwd_pkts_s,tot_fwd_pkts,tot_bwd_pkts,totlen_fwd_pkts,totlen_bwd_pkts,fwd_pkt_len_max,fwd_pkt_len_min,fwd_pkt_len_mean,fwd_pkt_len_std,bwd_pkt_len_max,bwd_pkt_len_min,bwd_pkt_len_mean,bwd_pkt_len_std,pkt_len_max,pkt_len_min,pkt_len_mean,pkt_len_std,pkt_len_var,fwd_header_len,bwd_header_len,fwd_seg_size_min,fwd_act_data_pkts,flow_iat_mean,flow_iat_max,flow_iat_min,flow_iat_std,fwd_iat_tot,fwd_iat_max,fwd_iat_min,fwd_iat_mean,fwd_iat_std,bwd_iat_tot,bwd_iat_max,bwd_iat_min,bwd_iat_mean,bwd_iat_std,fwd_psh_flags,bwd_psh_flags,fwd_urg_flags,bwd_urg_flags,fin_flag_cnt,syn_flag_cnt,rst_flag_cnt,psh_flag_cnt,ack_flag_cnt,urg_flag_cnt,ece_flag_cnt,down_up_ratio,pkt_size_avg,init_fwd_win_byts,init_bwd_win_byts,active_max,active_min,active_mean,active_std,idle_max,idle_min,idle_mean,idle_std,fwd_byts_b_avg,fwd_pkts_b_avg,bwd_byts_b_avg,bwd_pkts_b_avg,fwd_blk_rate_avg,bwd_blk_rate_avg,fwd_seg_size_avg,bwd_seg_size_avg,cwe_flag_count,subflow_fwd_pkts,subflow_bwd_pkts,subflow_fwd_byts,subflow_bwd_byts
0,175.45.176.3,149.171.126.16,22592,143,2048,2015-02-17 21:23:27,649902.0,67056.263867,80.012063,21.541709,58.470354,14,38,958,42622,84.0,56.0,68.428571,11.286618,1516.0,56.0,1121.631579,607.775427,1516,56,838.076923,698.721135,488211.224852,280,760,20,10,12743.176471,118119.0,1.0,28237.265879,648895.0,154326.0,9.0,49915.0,61426.932029,571016.0,160432.0,1.0,15432.864865,41085.199596,0,0,0,0,0,0,0,0,0,0,0,2.714286,838.076923,16383,16383,81542.0,41.0,52662.5,26924.705088,118116.0,22841.0,64466.2,27145.743113,0.0,0.0,20153.0,15.0,0.0,10382790.0,68.428571,1121.631579,0,14,38,958,42622
1,175.45.176.0,149.171.126.16,62762,56430,2048,2015-02-17 21:23:27,1623129.0,8584.653469,14.786255,4.928752,9.857504,8,16,492,13442,64.0,56.0,61.5,3.278719,1516.0,56.0,840.125,687.681328,1516,56,580.583333,670.818711,449997.743056,160,320,20,2,70570.826087,1344549.0,1.0,272712.659267,1623129.0,1430863.0,9.0,231875.571429,491350.886918,1541058.0,1394997.0,1.0,102737.2,346255.936674,0,0,0,0,0,0,0,0,0,0,0,2.0,580.583333,16383,16383,1344548.0,9.0,266569.0,482755.645605,1344540.0,20589.0,269998.5,480915.856596,0.0,0.0,12518.0,10.0,0.0,5899152.0,61.5,840.125,0,8,16,492,13442


In [32]:

# flow ID, Source IP, Source Port, Destination IP, Destination Port, Protocol, Timestamp,
# Flow Duration, Total Fwd Packets, Total Backward Packets,vTotal Length of Fwd Packets,
# Total Length of Bwd Packets, Fwd Packet Length Max, Fwd Packet Length Min,
# Fwd Packet Length Mean, Fwd Packet Length Std, Bwd Packet Length Max,
# Bwd Packet Length Min, Bwd Packet Length Mean, Bwd Packet Length Std, 
# Flow Bytes/s, Flow Packets/s,
# Flow IAT Mean, Flow IAT Std, Flow IAT Max, Flow IAT Min,Fwd IAT Total, Fwd IAT Mean, Fwd IAT Std, Fwd IAT Max, Fwd IAT Min,
# Bwd IAT Total, Bwd IAT Mean, Bwd IAT Std, Bwd IAT Max, Bwd IAT Min,Fwd PSH Flags, Bwd PSH Flags, Fwd URG Flags, Bwd URG Flags,
# Fwd Header Length, Bwd Header Length, Fwd Packets/s, Bwd Packets/s,
# Min Packet Length, Max Packet Length, Packet Length Mean, Packet Length Std, Packet Length Variance,
# FIN Flag Count, SYN Flag Count, RST Flag Count, PSH Flag Count, ACK Flag Count, URG Flag Count, 
# CWE Flag Count,ECE Flag Count, Down/Up Ratio, Average Packet Size,
# Avg Fwd Segment Size, Avg Bwd Segment Size, Fwd Header Length,
# Fwd Avg Bytes/Bulk, Fwd Avg Packets/Bulk, Fwd Avg Bulk Rate, Bwd Avg Bytes/Bulk, Bwd Avg Packets/Bulk,Bwd Avg Bulk Rate,
# Subflow Fwd Packets, Subflow Fwd Bytes, Subflow Bwd Packets, Subflow Bwd Bytes,
# Init_Win_bytes_forward, Init_Win_bytes_backward, act_data_pkt_fwd, min_seg_size_forward,Active Mean, Active Std, Active Max, Active Min,
# Idle Mean, Idle Std, Idle Max, Idle Min, Label

cicfm['flow_ID'] = cicfm['dst_ip'] + '-' + cicfm['dst_port'].apply(str) + '-' + cicfm['src_ip'] + '-' + cicfm['src_port'].apply(str) + '-' + cicfm['protocol'].apply(str)
cicfm.columns
cicfm = cicfm.merge(labels[['srcip','dstip','sport','dport','attack_cat']], how='left',
         left_on=['src_ip','dst_ip','src_port','dst_port'],
         right_on=['srcip','dstip','sport','dport'])
cicfm.fillna(value={'attack_cat': 'benign'}, inplace=True)
cicfm.drop(['srcip','dstip','sport','dport'], axis = 1, inplace = True)
cicfm.head(5)

In [36]:
cicfm.to_csv('./dataset/1_CIC.csv', index=None, header=True)
cicfm['attack_cat'].unique()

array(['benign', 'Exploits', 'Fuzzers', 'Reconnaissance', 'DoS',
       'Generic', 'Shellcode', 'Backdoor', 'Worms'], dtype=object)

# OTHER !

In [142]:
NB[NB['service']=='ftp']['ct_ftp_cmd'].unique()
DS[DS['service']=='ftp']['command'].unique()
lero = DS[DS['service']=='ftp'].groupby(['srcip','dstip','sport','dport','command'], as_index=False).size()
lero.drop(index=lero[lero['command']=='-'].index, inplace=True)
lero.groupby(['srcip','dstip','sport','dport'], as_index=False).size()

Unnamed: 0,srcip,dstip,sport,dport,size
0,175.45.176.0,149.171.126.10,33348,21,2
1,175.45.176.0,149.171.126.11,1730,21,2
2,175.45.176.0,149.171.126.14,41600,21,1
3,175.45.176.0,149.171.126.16,45235,21,1
4,175.45.176.0,149.171.126.17,9886,21,2
...,...,...,...,...,...
864,59.166.0.9,149.171.126.9,48969,21,3
865,59.166.0.9,149.171.126.9,51100,21,3
866,59.166.0.9,149.171.126.9,57025,21,3
867,59.166.0.9,149.171.126.9,59659,21,3


In [118]:
#NB[['Sport','Dport']].iloc[(NB[['Sport','Dport']] == '-')]
[0, np.ones((1,70))[:]]

[0,
 array([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1., 1.]])]

In [125]:
col_NB = ['SrcAddr', 'Sport', 'DstAddr', 'Dport', 'Proto', 'State', 'Dur','SrcBytes', 'DstBytes', 'sTtl', 'dTtl', 'SrcLoss', 'DstLoss', 'service', 'SrcLoad','DstLoad', 'SrcPkts', 'DstPkts', 'SrcWin', 'DstWin', 'SrcTCPBase','DstTCPBase', 'sMeanPktSz', 'dMeanPktSz', 'trans_depth','response_body_len','SrcJitter', 'DstJitter','StartTime', 'LastTime', 'SIntPkt', 'DIntPkt', 'TcpRtt', 'SynAck','AckDat', 'is_sm_ips_ports', 'ct_state_ttl', 'ct_flw_http_mthd', 'is_ftp_login', 'ct_ftp_cmd', 'ct_srv_src', 'ct_srv_dst', 'ct_dst_ltm', 'ct_src_ltm', 'ct_src_dport_ltm', 'ct_dst_sport_ltm', 'ct_dst_src_ltm', 'attack_cat', 'Label']
NB = pd.read_csv("./output/UNSW-NB15_1.csv",header=None)
print(NB.shape)
NB.columns = col_NB
NB[NB['service']=="ftp"]
#NB[['Sport','Dport']].loc[[NB[['Sport','Dport']] == '-']] = 0
#NB['Dport'] = NB['Dport'].apply(lambda x: portsAsInt(x))
#NB['Sport'] = NB['Sport'].apply(lambda x: portsAsInt(x))
#NB.astype({'SrcAddr':'string', 'Sport':'int32', 'DstAddr':'string', 'Dport':'int32', 'Proto':'int32', 'State':'string', 'service':'string', 'trans_depth':'int32','response_body_len':'int32','StartTime':'float', 'LastTime':'float', 'SIntPkt':'int32', 'DIntPkt':'int32', 'TcpRtt':'int32', 'SynAck':'int32','AckDat':'int32', 'is_sm_ips_ports':'int32', 'ct_state_ttl':'int32', 'ct_flw_http_mthd':'int32', 'is_ftp_login':'int32', 'ct_ftp_cmd':'int32', 'ct_srv_src':'int32', 'ct_srv_dst':'int32', 'ct_dst_ltm':'int32', 'ct_src_ltm':'int32', 'ct_src_dport_ltm':'int32', 'ct_dst_sport_ltm':'int32', 'ct_dst_src_ltm':'int32', 'attack_cat':'string', 'Label':'int32'})
#NB.head(5)

(700001, 49)


Unnamed: 0,SrcAddr,Sport,DstAddr,Dport,Proto,State,Dur,SrcBytes,DstBytes,sTtl,dTtl,SrcLoss,DstLoss,service,SrcLoad,DstLoad,SrcPkts,DstPkts,SrcWin,DstWin,SrcTCPBase,DstTCPBase,sMeanPktSz,dMeanPktSz,trans_depth,response_body_len,SrcJitter,DstJitter,StartTime,LastTime,SIntPkt,DIntPkt,TcpRtt,SynAck,AckDat,is_sm_ips_ports,ct_state_ttl,ct_flw_http_mthd,is_ftp_login,ct_ftp_cmd,ct_srv_src,ct_srv_dst,ct_dst_ltm,ct_src_ltm,ct_src_dport_ltm,ct_dst_sport_ltm,ct_dst_src_ltm,attack_cat,Label
667,59.166.0.3,23415,149.171.126.1,21,tcp,FIN,12.405860,2934,3742,31,29,11,15,ftp,1855.897095,2.368558e+03,52,54,255,255,3046397311,902068474,56,69,0,0,41542.549870,1258.217000,1421927434,1421927446,243.245533,234.062437,0.000687,0.000546,0.000141,0,0,0,0,0,1,1,10,10,1,1,2,,0
725,59.166.0.0,36184,149.171.126.3,21,tcp,FIN,17.583885,2934,3742,31,29,11,15,ftp,1309.380737,1.671076e+03,52,54,255,255,2793880380,2794976958,56,69,0,0,59192.876870,1804.117250,1421927430,1421927447,344.773995,331.761281,0.000669,0.000532,0.000137,0,0,0,0,0,1,1,11,2,1,1,1,,0
1102,59.166.0.6,54406,149.171.126.8,21,tcp,FIN,32.390953,2934,3742,31,29,11,15,ftp,710.815735,9.071669e+02,52,54,255,255,4008010357,4009160932,56,69,0,0,114036.804200,3472.006000,1421927418,1421927450,635.109980,611.139000,0.000716,0.000575,0.000141,0,0,0,0,0,1,1,5,2,1,1,1,,0
1202,59.166.0.4,60847,149.171.126.3,21,tcp,FIN,26.507467,2934,3742,31,29,11,15,ftp,868.585449,1.108518e+03,52,54,255,255,1883462564,1884537373,56,69,0,0,81978.888050,2493.330250,1421927425,1421927451,519.748152,500.130031,0.000707,0.000559,0.000148,0,0,0,0,0,1,1,10,5,1,1,1,,0
1220,59.166.0.3,40867,149.171.126.7,21,tcp,FIN,25.361938,2934,3742,31,29,11,15,ftp,907.817017,1.158587e+03,52,54,255,255,2601364904,456991140,56,69,0,0,77232.793740,2345.908500,1421927426,1421927451,497.285362,478.516406,0.000704,0.000564,0.000140,0,0,0,0,0,1,1,7,7,1,1,1,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
699765,59.166.0.5,4178,149.171.126.4,21,tcp,FIN,0.026040,2934,3742,31,29,11,15,ftp,884178.187500,1.128418e+06,52,54,255,255,2875329070,726712674,56,69,0,0,38.267225,0.980866,1421955831,1421955831,0.504412,0.482302,0.000603,0.000472,0.000131,0,0,0,1,1,1,1,5,4,1,1,4,,0
699834,59.166.0.7,9475,149.171.126.6,21,tcp,FIN,0.900632,2934,3742,31,29,11,15,ftp,25564.269530,3.262598e+04,52,54,255,255,1985748710,4132169817,56,69,0,0,1454.415151,54.886449,1421955833,1421955834,17.653293,16.982867,0.000667,0.000536,0.000131,0,0,0,1,1,1,1,3,2,1,1,2,,0
699840,59.166.0.1,22235,149.171.126.8,21,tcp,FIN,1.163812,2934,3742,31,29,11,15,ftp,19783.263670,2.524806e+04,52,54,255,255,2403651776,2424383189,56,69,0,0,1564.292296,54.898602,1421955834,1421955835,22.813353,21.948754,0.000663,0.000522,0.000141,0,0,0,1,1,1,1,4,5,1,1,3,,0
699865,59.166.0.5,51051,149.171.126.9,21,tcp,FIN,1.419832,2934,3742,31,29,11,15,ftp,16216.002930,2.069541e+04,52,54,255,255,558265705,2842579507,56,69,0,0,2105.035328,73.649758,1421955834,1421955836,27.833157,26.779490,0.000644,0.000512,0.000132,0,0,0,1,1,1,1,2,6,1,1,2,,0


### Trash

In [18]:
## convert to int
#def portsAsInt(x):
#    if(not np.isnan(x)):
#        return int(x)
#    return np.nan

#zconn['id.orig_p'] = zconn['id.orig_p'].apply(lambda x: portsAsInt(x))
#zconn['id.resp_p'] = zconn['id.resp_p'].apply(lambda x: portsAsInt(x))
##

## read csv skipping
#zconnf = pd.read_csv("./conn.log", sep='\t', nrows=7, skiprows = [0, 1, 2, 3, 4, 5]) # dataset Zeek Conn
#zconnf.drop(zconnf.columns[[0]], axis = 1, inplace = True)
##

## counting flow intersections
#df[['Sport','Dport']].isin(zconn[['id.orig_p','id.resp_p']]).value_counts()
#zconn.drop([len(zconn)-1])[['id.orig_p','id.resp_p']].astype(int).isin(zconn[['id.orig_p','id.resp_p']]).value_counts()
##

## others...
# range(0,8)) 
#test = pd.isna(df[['Dport']])
#test = test[test].index
#pd.isna(df['Dport'][1])
#[test[1]]
#print(df['Dport'][1], getServNum(df['Dport'][1]))

## get list of protocols not available in gerServNum()
#listaProt = list(())
#test = df['Dport']
#for loc in test.index:
#    if not(str(test[loc]).isnumeric()):
#        try:
#            test[loc] = getServNum(test[loc])
#        except:
#            listaProt.append(test[loc])
#listaProt = set(listaProt)
#listaProt

# df['Dport'][1].isnumeric()
# df['Dport'].index

#for loc in df['Dport'].index:
#    if !df['Dport'][loc].isnumeric()
#        df['Dport'][loc] = df['Dport'][loc].apply(lambda x: getServName(x))
        
#df = df.drop(teste[teste].index)
#pd.isna(df['Dport']).value_counts()
#df[['Sport','Dport']].dtypes#.astype(int)
#zconn[['id.orig_p','id.resp_p']].astype(int).isin(df[['Sport','Dport']].astype(int)).value_counts()
#zconn[zconn['ts'] == df[df.index == 1]['StartTime']]
#df[df.index == 1]['StartTime']
#zconn['ts'].astype('float64')
#df.head(2)

#print(df[df[['Sport','Dport','SrcAddr','DstAddr']].isin(zconn[['Sport','Dport','SrcAddr','DstAddr']]).all(axis=1)][['Sport','Dport','SrcAddr','DstAddr']])
#print(zconn[zconn[['Sport','Dport','SrcAddr','DstAddr']].isin(df[['Sport','Dport','SrcAddr','DstAddr']]).all(axis=1)][['Sport','Dport','SrcAddr','DstAddr']])
#zconn[zconn[['Sport','Dport']].isin(df[['Sport','Dport']]).all(axis=1)]
#df[df[['Sport','Dport']].isin(zconn[['Sport','Dport']]).all(axis=1)]
#for x in df.itertuples(index=False):
#    print(x)


#zconnf = pd.read_csv("./conn.log", sep='\t', nrows=7, skiprows = [0, 1, 2, 3, 4, 5]) # dataset Zeek Conn
#zconnf.drop(zconnf.columns[[0]], axis = 1, inplace = True)

#df[['Sport','Dport']].isin(zconn[['id.orig_p','id.resp_p']]).value_counts()
#zconn.drop([len(zconn)-1])[['id.orig_p','id.resp_p']].astype(int).isin(zconn[['id.orig_p','id.resp_p']]).value_counts()

# range(0,8)) 
#teste = pd.isna(df[['Sport', 'Dport']]).any(axis=1)
#df = df.drop(teste[teste].index)
#pd.isna(df['Dport']).value_counts()
#df[['Sport','Dport']].dtypes#.astype(int)
#zconn[['id.orig_p','id.resp_p']].astype(int).isin(df[['Sport','Dport']].astype(int)).value_counts()
#print(zconn['id.orig_p'].astype(int), df['Sport'])

#zconn['id.orig_p'][zconn['id.orig_p'].notna()].apply(lambda x: int(x))
#for i in zconn[zconn['id.orig_p'].notna()].index :
#    zconn[i, 'id.orig_p'] = int(zconn[i, 'id.orig_p'])
#
#zconn['id.orig_p']

#zconn[zconn['ts'] == df[df.index == 1]['StartTime']]
#df[df.index == 1]['StartTime']
#zconn['ts'].astype('float64')
#df.head(2)

#for x in zconn.itertuples():
#    row = x[3:8]
#    match = DS[['SrcAddr','Sport','DstAddr','Dport','Proto']].isin(row).all(axis=1)
#    if (match.to_list().count(True) == 1):
#        print(DS[match].service, x.service)
#        DS[match].service = x.service
#        DS[match].duration = x.conn_state
#        DS[match].conn_state = x.conn_state
#DS.head(5)

#DS.loc[ (DS['SrcAddr'] == '149.171.126.14' and DS['DstAddr'] == '175.45.176.3' and DS['Proto'] == 6), ['conn_state']]
#DS.loc[DS[['SrcAddr','DstAddr','Proto']].isin(next(DS.iterrows())[1][[0,2,4]]).all(axis=1)]
#DS[['SrcAddr','DstAddr','Proto']].isin(next(DS.iterrows())[1][[0,2,4]])#.any(axis=0)
#DS.iloc[DS.index.size-1][['Dport','Sport']] = [1,1]
#(DS['duration'] != 0).any()
#DS[DS[['SrcAddr','Sport','DstAddr','Dport','Proto']].isin(next(zconn.itertuples())).all(axis=1)]
#df[['SrcAddr','Sport','DstAddr','Dport','Proto']].isin(next(zconn.itertuples())).all(axis=1).to_list().count(True)
#next(zconn.iterrows())[1][2:7]
#next(zconn.itertuples())
#DS[['SrcAddr','Sport','DstAddr','Dport','Proto']].isin(['149.171.126.14',179,'175.45.176.3',33159,6])
#DS.loc[DS[(DS['SrcAddr'] == '149.171.126.14')].index]['SrcAddr'] == next(zconn.iterrows())[1][2]
#DS[['SrcAddr','Sport','DstAddr','Dport','Proto']].isin(['149.171.126.14'])
#print(df.shape, zconn.shape, df.merge(zconn[['SrcAddr','Sport','DstAddr','Dport', 'Proto','service','duration','conn_state']], how='inner', left_on=['SrcAddr', 'Sport', 'DstAddr', 'Dport','Proto'], right_on=['SrcAddr', 'Sport', 'DstAddr', 'Dport','Proto']).shape, df.merge(zconn[['SrcAddr','Sport','DstAddr','Dport', 'Proto','service','duration','conn_state']], how='inner', left_on=['SrcAddr', 'Sport', 'DstAddr', 'Dport','Proto'], right_on=['SrcAddr', 'Sport', 'DstAddr', 'Dport','Proto']).duplicated(['SrcAddr', 'Sport', 'DstAddr', 'Dport','Proto']).to_list().count(True),df.duplicated(['SrcAddr', 'Sport', 'DstAddr', 'Dport','Proto']).to_list().count(True),zconn.duplicated(['SrcAddr', 'Sport', 'DstAddr', 'Dport','Proto']).to_list().count(True),df.duplicated().to_list().count(True),zconn.duplicated().to_list().count(True))
#datetime.fromtimestamp(zconn['ts'][1]).time()
#df['StartTime'].apply(lambda x: (x.hour == zconn['ts'][3].hour)).any()
#df[df['StartTime'].apply(lambda x: (x == zconn['ts'][3]))]
#df['StartTime'][3]

#for x in teste2.itertuples():
#    print(teste[teste.isin(x[0:5]).all(axis=1)]['indiceOrig'])
#    if (match.to_list().count(True) == 1):
#        print(x[match]['indiceOrig'])
#    row = x[3:8]
#    match = DS[['SrcAddr','Sport','DstAddr','Dport','Proto']].isin(row).all(axis=1)
#    if (match.to_list().count(True) == 1):
#        print(DS[match].service, x.service)
#        DS[match].service = x.service
#        DS[match].duration = x.conn_state
#        DS[match].conn_state = x.conn_state
#DS.head(5)

#DS = df
#DS[['service','duration','conn_state']] = ['-',0,'-']
# next(teste.itertuples())[:]

#(DS[DS['service']=='ftp']['Proto'] != 6).any()