In [2]:
import pandas as pd
import glob
import warnings
# Suppress FutureWarning messages
warnings.simplefilter(action='ignore', category=FutureWarning)

# Packet Cleaning
In this notebook, we will access the various network captures for processing and cleaning.

## Clean Packet Capture

In [3]:
# List of all csv in a dir
csv_files = glob.glob('captures1_v2/clean/*.csv')

clean_df = pd.DataFrame()

# append to combined dataframe
for csv_file in csv_files:
    df = pd.read_csv(csv_file)
    capture = csv_file.split('/')[-1].replace('.csv', '')
    df['CaptureName'] = capture
    clean_df = pd.concat([clean_df, df])

clean_df['AttackName'] = 'Clean'

clean_df.head()


Unnamed: 0,No.,Time,SrcIP,DstIP,Protocol,Length,Info,SrcMAC,SrcMACResolved,SrcOUIResolved,...,DstPort,DstMACResolved.1,SYNFlag,ACKFlag,ProtocolType,EpochTime,RelativeTime,TimeDelta,CaptureName,AttackName
0,1,0.0,172.27.224.70,172.27.224.250,TCP,60,49499 > 502 [ACK] Seq=1 Ack=1 Win=65051 Len=0,00:0c:29:9d:9e:9e,VMware_9d:9e:9e,"VMware, Inc.",...,502.0,Telemech_09:51:3b,Not set,Set,TCP,1535046000.0,0.0,0.0,"clean\eth2dump-clean-0,5h_1",Clean
1,2,0.02094,HewlettP_8e:40:b3,Spanning-tree-(for-bridges)_00,STP,64,RST. Root = 32768/0/00:18:6e:d7:8a:c0 Cost = ...,d0:7e:28:8e:40:b3,HewlettP_8e:40:b3,Hewlett Packard,...,,Spanning-tree-(for-bridges)_00,,,STP,1535046000.0,0.02094,0.02094,"clean\eth2dump-clean-0,5h_1",Clean
2,3,0.094309,172.27.224.70,172.27.224.250,Modbus/TCP,66,"Query: Trans: 0; Unit: 1, Func: 3: ...",00:0c:29:9d:9e:9e,VMware_9d:9e:9e,"VMware, Inc.",...,502.0,Telemech_09:51:3b,Not set,Set,Modbus/TCP,1535046000.0,0.094309,0.073369,"clean\eth2dump-clean-0,5h_1",Clean
3,4,0.097427,172.27.224.250,172.27.224.70,Modbus/TCP,85,"Response: Trans: 0; Unit: 1, Func: 3: ...",00:80:f4:09:51:3b,Telemech_09:51:3b,Telemechanique Electrique,...,49499.0,VMware_9d:9e:9e,Not set,Set,Modbus/TCP,1535046000.0,0.097427,0.003118,"clean\eth2dump-clean-0,5h_1",Clean
4,5,0.311972,172.27.224.70,172.27.224.250,TCP,60,49499 > 502 [ACK] Seq=13 Ack=32 Win=65020 Len=0,00:0c:29:9d:9e:9e,VMware_9d:9e:9e,"VMware, Inc.",...,502.0,Telemech_09:51:3b,Not set,Set,TCP,1535046000.0,0.311972,0.214545,"clean\eth2dump-clean-0,5h_1",Clean


In [4]:
# Check for missing values
missing_values = clean_df.isnull().sum()
print(missing_values)

No.                     0
Time                    0
SrcIP                   0
DstIP                   0
Protocol                0
Length                  0
Info                    0
SrcMAC                  0
SrcMACResolved          0
SrcOUIResolved          0
SrcPort             40771
SequenceNumber      46224
SrcOUI                  0
DstMAC                  0
DstMACResolved          0
DstOUI                  0
DstOUResolved       35895
DstPort             40771
DstMACResolved.1        0
SYNFlag             46224
ACKFlag             46224
ProtocolType            0
EpochTime               0
RelativeTime            0
TimeDelta               0
CaptureName             0
AttackName              0
dtype: int64


In [5]:
# Count the number of packets where SrcPort is not applicable (null)
srcport_not_applicable = clean_df['SrcPort'].isnull().sum()
print(f"Number of packets where SrcPort is not applicable (null): {srcport_not_applicable}")

# Count the number of packets where DstPort is not applicable (null)
dstport_not_applicable = clean_df['DstPort'].isnull().sum()
print(f"Number of packets where DstPort is not applicable (null): {dstport_not_applicable}")

# Count the number of packets where SrcPort should be applicable but is null
srcport_missing = clean_df[(clean_df['Protocol'] == 'TCP') | (clean_df['Protocol'] == 'UDP')]['SrcPort'].isnull().sum()
print(f"Number of packets where SrcPort should be applicable but is null: {srcport_missing}")

# Count the number of packets where DstPort should be applicable but is null
dstport_missing = clean_df[(clean_df['Protocol'] == 'TCP') | (clean_df['Protocol'] == 'UDP')]['DstPort'].isnull().sum()
print(f"Number of packets where DstPort should be applicable but is null: {dstport_missing}")


Number of packets where SrcPort is not applicable (null): 40771
Number of packets where DstPort is not applicable (null): 40771
Number of packets where SrcPort should be applicable but is null: 0
Number of packets where DstPort should be applicable but is null: 0


In [6]:
# Count the number of packets where SequenceNumber is not applicable (null)
seqnum_not_applicable = clean_df['SequenceNumber'].isnull().sum()
print(f"Number of packets where SequenceNumber is not applicable (null): {seqnum_not_applicable}")

# Count the number of packets where SequenceNumber should be applicable but is null
seqnum_missing = clean_df[(clean_df['Protocol'] == 'TCP')]['SequenceNumber'].isnull().sum()
print(f"Number of packets where SequenceNumber should be applicable but is null: {seqnum_missing}")


Number of packets where SequenceNumber is not applicable (null): 46224
Number of packets where SequenceNumber should be applicable but is null: 0


In [7]:
# Count the number of packets where SYNFlag is not applicable (null)
synflag_not_applicable = clean_df['SYNFlag'].isnull().sum()
print(f"Number of packets where SYNFlag is not applicable (null): {synflag_not_applicable}")

# Count the number of packets where ACKFlag is not applicable (null)
ackflag_not_applicable = clean_df['ACKFlag'].isnull().sum()
print(f"Number of packets where ACKFlag is not applicable (null): {ackflag_not_applicable}")

# Count the number of packets where SYNFlag should be applicable but is null
synflag_missing = clean_df[(clean_df['Protocol'] == 'TCP')]['SYNFlag'].isnull().sum()
print(f"Number of packets where SYNFlag should be applicable but is null: {synflag_missing}")

# Count the number of packets where ACKFlag should be applicable but is null
ackflag_missing = clean_df[(clean_df['Protocol'] == 'TCP')]['ACKFlag'].isnull().sum()
print(f"Number of packets where ACKFlag should be applicable but is null: {ackflag_missing}")


Number of packets where SYNFlag is not applicable (null): 46224
Number of packets where ACKFlag is not applicable (null): 46224
Number of packets where SYNFlag should be applicable but is null: 0
Number of packets where ACKFlag should be applicable but is null: 0


In [8]:
# Count the number of packets where DstOUResolved is not applicable (null)
dstou_not_applicable = clean_df['DstOUResolved'].isnull().sum()
print(f"Number of packets where DstOUResolved is not applicable (null): {dstou_not_applicable}")

# Count the number of packets where DstOUResolved should be applicable but is null
dstou_missing = clean_df['DstOUResolved'].isnull().sum()  # No specific protocol condition since DstOUResolved can be applicable to various protocols
print(f"Number of packets where DstOUResolved should be applicable but is null: {dstou_missing}")


Number of packets where DstOUResolved is not applicable (null): 35895
Number of packets where DstOUResolved should be applicable but is null: 35895


In [9]:
# Count the number of packets where DstOUResolved is not applicable (null)
dstou_not_applicable = clean_df['DstOUResolved'].isnull().sum()
print(f"Number of packets where DstOUResolved is not applicable (null): {dstou_not_applicable}")

# Count the number of packets where DstOUResolved should be applicable but is null
dstou_missing = clean_df[((clean_df['Protocol'] == 'TCP') | (clean_df['Protocol'] == 'UDP')) & clean_df['DstOUResolved'].isnull()].shape[0]
print(f"Number of packets where DstOUResolved should be applicable but is null: {dstou_missing}")


Number of packets where DstOUResolved is not applicable (null): 35895
Number of packets where DstOUResolved should be applicable but is null: 1286


In [10]:

# 1. Handling missing values in SrcPort and DstPort
clean_df['SrcPort'].fillna(-1, inplace=True)  # Filling missing SrcPort with -1
clean_df['DstPort'].fillna(-1, inplace=True)  # Filling missing DstPort with -1

# 2. Handling missing values in SequenceNumber
# Assuming TCP packets have SequenceNumber and using median imputation
median_seqnum = clean_df[clean_df['Protocol'] == 'TCP']['SequenceNumber'].median()
clean_df['SequenceNumber'].fillna(median_seqnum, inplace=True)

# 3. Handling missing values in DstOUResolved
clean_df['DstOUResolved'].fillna('Unknown', inplace=True)  # Filling missing DstOUResolved with 'Unknown'

# 4. Handling missing values in SYNFlag and ACKFlag
clean_df['SYNFlag'].fillna(False, inplace=True)  # Filling missing SYNFlag with False
clean_df['ACKFlag'].fillna(False, inplace=True)  # Filling missing ACKFlag with False

In [11]:
# Check for missing values
missing_values = clean_df.isnull().sum()
print(missing_values)

No.                 0
Time                0
SrcIP               0
DstIP               0
Protocol            0
Length              0
Info                0
SrcMAC              0
SrcMACResolved      0
SrcOUIResolved      0
SrcPort             0
SequenceNumber      0
SrcOUI              0
DstMAC              0
DstMACResolved      0
DstOUI              0
DstOUResolved       0
DstPort             0
DstMACResolved.1    0
SYNFlag             0
ACKFlag             0
ProtocolType        0
EpochTime           0
RelativeTime        0
TimeDelta           0
CaptureName         0
AttackName          0
dtype: int64


In [12]:
# Convert EpochTime to datetime format and create 'date_time' column
clean_df['date_time'] = pd.to_datetime(df['EpochTime'], unit='s')

## Man in the Middle Attack

In [13]:
csv_files = glob.glob('captures1_v2/mitm/*.csv')

mitm_df = pd.DataFrame()

for csv_file in csv_files:
    df = pd.read_csv(csv_file)
    capture = csv_file.split('/')[-1].replace('.csv', '')
    df['CaptureName'] = capture
    mitm_df = pd.concat([mitm_df, df])

mitm_df['AttackName'] = 'mitm'

mitm_df.head()

Unnamed: 0,No.,Time,SrcIP,DstIP,Protocol,Length,Info,SrcMAC,SrcMACResolved,SrcOUIResolved,...,DstPort,DstMACResolved.1,SYNFlag,ACKFlag,ProtocolType,EpochTime,RelativeTime,TimeDelta,CaptureName,AttackName
0,1,0.0,172.27.224.70,172.27.224.250,TCP,60,49499 > 502 [ACK] Seq=1 Ack=1 Win=65237 Len=0,00:0c:29:9d:9e:9e,VMware_9d:9e:9e,"VMware, Inc.",...,502.0,Telemech_09:51:3b,Not set,Set,TCP,1535061000.0,0.0,0.0,"mitm\eth2dump-mitm-change-15m-0,5h_1",mitm
1,2,0.094173,172.27.224.70,172.27.224.250,Modbus/TCP,66,"Query: Trans: 0; Unit: 1, Func: 3: ...",00:0c:29:9d:9e:9e,VMware_9d:9e:9e,"VMware, Inc.",...,502.0,Telemech_09:51:3b,Not set,Set,Modbus/TCP,1535061000.0,0.094173,0.094173,"mitm\eth2dump-mitm-change-15m-0,5h_1",mitm
2,3,0.102293,172.27.224.250,172.27.224.70,Modbus/TCP,85,"Response: Trans: 0; Unit: 1, Func: 3: ...",00:80:f4:09:51:3b,Telemech_09:51:3b,Telemechanique Electrique,...,49499.0,VMware_9d:9e:9e,Not set,Set,Modbus/TCP,1535061000.0,0.102293,0.00812,"mitm\eth2dump-mitm-change-15m-0,5h_1",mitm
3,4,0.312027,172.27.224.70,172.27.224.250,TCP,60,49499 > 502 [ACK] Seq=13 Ack=32 Win=65206 Len=0,00:0c:29:9d:9e:9e,VMware_9d:9e:9e,"VMware, Inc.",...,502.0,Telemech_09:51:3b,Not set,Set,TCP,1535061000.0,0.312027,0.209734,"mitm\eth2dump-mitm-change-15m-0,5h_1",mitm
4,5,0.406373,172.27.224.70,172.27.224.250,Modbus/TCP,66,"Query: Trans: 0; Unit: 1, Func: 3: ...",00:0c:29:9d:9e:9e,VMware_9d:9e:9e,"VMware, Inc.",...,502.0,Telemech_09:51:3b,Not set,Set,Modbus/TCP,1535061000.0,0.406373,0.094346,"mitm\eth2dump-mitm-change-15m-0,5h_1",mitm


In [14]:
# Check for missing values
missing_values = mitm_df.isnull().sum()
print(missing_values)

No.                      0
Time                     0
SrcIP                    0
DstIP                    0
Protocol                 0
Length                   0
Info                     0
SrcMAC                   0
SrcMACResolved           0
SrcOUIResolved           0
SrcPort             142616
SequenceNumber      155786
SrcOUI                   0
DstMAC                   0
DstMACResolved           0
DstOUI                   0
DstOUResolved       112079
DstPort             142616
DstMACResolved.1         0
SYNFlag             155786
ACKFlag             155786
ProtocolType             0
EpochTime                0
RelativeTime             0
TimeDelta                0
CaptureName              0
AttackName               0
dtype: int64


In [15]:
# 1. Handling missing values in SrcPort and DstPort
mitm_df['SrcPort'].fillna(-1, inplace=True)  # Filling missing SrcPort with -1
mitm_df['DstPort'].fillna(-1, inplace=True)  # Filling missing DstPort with -1

# 2. Handling missing values in SequenceNumber
# Assuming TCP packets have SequenceNumber and using median imputation
median_seqnum = mitm_df[mitm_df['Protocol'] == 'TCP']['SequenceNumber'].median()
mitm_df['SequenceNumber'].fillna(median_seqnum, inplace=True)

# 3. Handling missing values in DstOUResolved
mitm_df['DstOUResolved'].fillna('Unknown', inplace=True)  # Filling missing DstOUResolved with 'Unknown'

# 4. Handling missing values in SYNFlag and ACKFlag
mitm_df['SYNFlag'].fillna(False, inplace=True)  # Filling missing SYNFlag with False
mitm_df['ACKFlag'].fillna(False, inplace=True)  # Filling missing ACKFlag with False

In [16]:
# Check for missing values
missing_values = mitm_df.isnull().sum()
print(missing_values)

No.                 0
Time                0
SrcIP               0
DstIP               0
Protocol            0
Length              0
Info                0
SrcMAC              0
SrcMACResolved      0
SrcOUIResolved      0
SrcPort             0
SequenceNumber      0
SrcOUI              0
DstMAC              0
DstMACResolved      0
DstOUI              0
DstOUResolved       0
DstPort             0
DstMACResolved.1    0
SYNFlag             0
ACKFlag             0
ProtocolType        0
EpochTime           0
RelativeTime        0
TimeDelta           0
CaptureName         0
AttackName          0
dtype: int64


In [17]:
# Convert EpochTime to datetime format and create 'date_time' column
mitm_df['date_time'] = pd.to_datetime(df['EpochTime'], unit='s')

## modbusQuery2Flooding

In [18]:
csv_files = glob.glob('captures1_v2/modbusQuery2Flooding/*.csv')

modbusQuery2Flooding_df = pd.DataFrame()

for csv_file in csv_files:
    try:
        df = pd.read_csv(csv_file)
        capture = csv_file.split('/')[-1].replace('.csv', '')
        df['CaptureName'] = capture
        modbusQuery2Flooding_df = pd.concat([modbusQuery2Flooding_df, df])
    except Exception as e:
        print(f"Failed to read {csv_file}: {e}")

modbusQuery2Flooding_df['AttackName'] = 'modbusQuery2Flooding'

modbusQuery2Flooding_df.head()

Failed to read captures1_v2/modbusQuery2Flooding\eth2dump-modbusQuery2Flooding30m-1h_1.csv: 'utf-8' codec can't decode byte 0x86 in position 246153: invalid start byte
Failed to read captures1_v2/modbusQuery2Flooding\eth2dump-modbusQuery2Flooding30m-6h_1.csv: Error tokenizing data. C error: Expected 25 fields in line 1258221, saw 38



Unnamed: 0,No.,Time,SrcIP,DstIP,Protocol,Length,Info,SrcMAC,SrcMACResolved,SrcOUIResolved,...,DstPort,DstMACResolved.1,SYNFlag,ACKFlag,ProtocolType,EpochTime,RelativeTime,TimeDelta,CaptureName,AttackName
0,1,0.0,172.27.224.70,172.27.224.250,TCP,60,49179 > 502 [ACK] Seq=1 Ack=1 Win=64927 Len=0,00:0c:29:9d:9e:9e,VMware_9d:9e:9e,"VMware, Inc.",...,502.0,Telemech_09:51:3b,Not set,Set,TCP,1529551000.0,0.0,0.0,modbusQuery2Flooding\eth2dump-modbusQuery2Floo...,modbusQuery2Flooding
1,2,0.057897,172.27.224.251,172.27.224.250,TCP,60,"51111 > 502 [FIN, ACK] Seq=1 Ack=1 Win=2036 ...",48:5b:39:64:40:79,ASUSTekC_64:40:79,ASUSTek COMPUTER INC.,...,502.0,Telemech_09:51:3b,Not set,Set,TCP,1529551000.0,0.057897,0.057897,modbusQuery2Flooding\eth2dump-modbusQuery2Floo...,modbusQuery2Flooding
2,3,0.067569,172.27.224.250,172.27.224.251,TCP,60,502 > 51111 [ACK] Seq=1 Ack=2 Win=8712 Len=0,00:80:f4:09:51:3b,Telemech_09:51:3b,Telemechanique Electrique,...,51111.0,ASUSTekC_64:40:79,Not set,Set,TCP,1529551000.0,0.067569,0.009672,modbusQuery2Flooding\eth2dump-modbusQuery2Floo...,modbusQuery2Flooding
3,4,0.068851,172.27.224.250,172.27.224.251,TCP,60,"502 > 51111 [FIN, ACK] Seq=1 Ack=2 Win=8712 ...",00:80:f4:09:51:3b,Telemech_09:51:3b,Telemechanique Electrique,...,51111.0,ASUSTekC_64:40:79,Not set,Set,TCP,1529551000.0,0.068851,0.001282,modbusQuery2Flooding\eth2dump-modbusQuery2Floo...,modbusQuery2Flooding
4,5,0.068872,172.27.224.251,172.27.224.250,TCP,60,51111 > 502 [ACK] Seq=2 Ack=2 Win=2036 Len=0,48:5b:39:64:40:79,ASUSTekC_64:40:79,ASUSTek COMPUTER INC.,...,502.0,Telemech_09:51:3b,Not set,Set,TCP,1529551000.0,0.068872,2.1e-05,modbusQuery2Flooding\eth2dump-modbusQuery2Floo...,modbusQuery2Flooding


In [19]:
# Check for missing values
missing_values = modbusQuery2Flooding_df.isnull().sum()
print(missing_values)

No.                      0
Time                     0
SrcIP                    0
DstIP                    0
Protocol                 0
Length                   0
Info                     0
SrcMAC                   0
SrcMACResolved           0
SrcOUIResolved           0
SrcPort             342203
SequenceNumber      378862
SrcOUI                   0
DstMAC                   0
DstMACResolved           0
DstOUI                   0
DstOUResolved       283669
DstPort             342203
DstMACResolved.1         0
SYNFlag             378862
ACKFlag             378862
ProtocolType             0
EpochTime                0
RelativeTime             0
TimeDelta                0
CaptureName              0
AttackName               0
dtype: int64


In [20]:
# 1. Handling missing values in SrcPort and DstPort
modbusQuery2Flooding_df['SrcPort'].fillna(-1, inplace=True)  # Filling missing SrcPort with -1
modbusQuery2Flooding_df['DstPort'].fillna(-1, inplace=True)  # Filling missing DstPort with -1

# 2. Handling missing values in SequenceNumber
# Assuming TCP packets have SequenceNumber and using median imputation
median_seqnum = modbusQuery2Flooding_df[modbusQuery2Flooding_df['Protocol'] == 'TCP']['SequenceNumber'].median()
modbusQuery2Flooding_df['SequenceNumber'].fillna(median_seqnum, inplace=True)

# 3. Handling missing values in DstOUResolved
modbusQuery2Flooding_df['DstOUResolved'].fillna('Unknown', inplace=True)  # Filling missing DstOUResolved with 'Unknown'

# 4. Handling missing values in SYNFlag and ACKFlag
modbusQuery2Flooding_df['SYNFlag'].fillna(False, inplace=True)  # Filling missing SYNFlag with False
modbusQuery2Flooding_df['ACKFlag'].fillna(False, inplace=True)  # Filling missing ACKFlag with False

In [21]:
# Check for missing values
missing_values = modbusQuery2Flooding_df.isnull().sum()
print(missing_values)

No.                 0
Time                0
SrcIP               0
DstIP               0
Protocol            0
Length              0
Info                0
SrcMAC              0
SrcMACResolved      0
SrcOUIResolved      0
SrcPort             0
SequenceNumber      0
SrcOUI              0
DstMAC              0
DstMACResolved      0
DstOUI              0
DstOUResolved       0
DstPort             0
DstMACResolved.1    0
SYNFlag             0
ACKFlag             0
ProtocolType        0
EpochTime           0
RelativeTime        0
TimeDelta           0
CaptureName         0
AttackName          0
dtype: int64


In [22]:
# Convert EpochTime to datetime format and create 'date_time' column
modbusQuery2Flooding_df['date_time'] = pd.to_datetime(df['EpochTime'], unit='s')

## modbusQueryFlooding

In [23]:
csv_files = glob.glob('captures1_v2/modbusQueryFlooding/*.csv')

modbusQueryFlooding_df = pd.DataFrame()

for csv_file in csv_files:
    try:
        df = pd.read_csv(csv_file)
        capture = csv_file.split('/')[-1].replace('.csv', '')
        df['CaptureName'] = capture
        modbusQueryFlooding_df = pd.concat([modbusQueryFlooding_df, df])
    except Exception as e:
        print(f"Failed to read {csv_file}: {e}")

modbusQueryFlooding_df['AttackName'] = 'modbusQueryFlooding'

modbusQueryFlooding_df.head()

Unnamed: 0,No.,Time,SrcIP,DstIP,Protocol,Length,Info,SrcMAC,SrcMACResolved,SrcOUIResolved,...,DstPort,DstMACResolved.1,SYNFlag,ACKFlag,ProtocolType,EpochTime,TimeDelta,RelativeTime,CaptureName,AttackName
0,1,0.0,172.27.224.70,172.27.224.250,TCP,60,49179 > 502 [ACK] Seq=1 Ack=1 Win=65175 Len=0,VMware_9d:9e:9e,VMware_9d:9e:9e,"VMware, Inc.",...,502.0,Telemecaniqu_09:51:3b,Not set,Set,TCP,1529547000.0,Not set,0.0,modbusQueryFlooding\eth2dump-modbusQueryFloodi...,modbusQueryFlooding
1,2,0.095448,172.27.224.70,172.27.224.250,Modbus/TCP,66,"Query: Trans: 0; Unit: 1, Func: 3: ...",VMware_9d:9e:9e,VMware_9d:9e:9e,"VMware, Inc.",...,502.0,Telemecaniqu_09:51:3b,Not set,Set,Modbus/TCP,1529547000.0,Not set,0.095448,modbusQueryFlooding\eth2dump-modbusQueryFloodi...,modbusQueryFlooding
2,3,0.102847,172.27.224.250,172.27.224.70,Modbus/TCP,85,"Response: Trans: 0; Unit: 1, Func: 3: ...",Telemecaniqu_09:51:3b,Telemecaniqu_09:51:3b,Telemecanique Electrique,...,49179.0,VMware_9d:9e:9e,Not set,Set,Modbus/TCP,1529547000.0,Not set,0.102847,modbusQueryFlooding\eth2dump-modbusQueryFloodi...,modbusQueryFlooding
3,4,0.273109,HewlettPacka_8e:40:b3,Spanning-tree-(for-bridges)_00,STP,64,RST. Root = 32768/0/00:18:6e:d7:8a:c0 Cost = ...,HewlettPacka_8e:40:b3,HewlettPacka_8e:40:b3,Hewlett Packard,...,,Spanning-tree-(for-bridges)_00,,,STP,1529547000.0,,0.273109,modbusQueryFlooding\eth2dump-modbusQueryFloodi...,modbusQueryFlooding
4,5,0.311994,172.27.224.70,172.27.224.250,TCP,60,49179 > 502 [ACK] Seq=13 Ack=32 Win=65144 Len=0,VMware_9d:9e:9e,VMware_9d:9e:9e,"VMware, Inc.",...,502.0,Telemecaniqu_09:51:3b,Not set,Set,TCP,1529547000.0,Not set,0.311994,modbusQueryFlooding\eth2dump-modbusQueryFloodi...,modbusQueryFlooding


In [24]:
# Check for missing values
missing_values = modbusQueryFlooding_df.isnull().sum()
print(missing_values)

No.                      0
Time                     0
SrcIP                    0
DstIP                    0
Protocol                 0
Length                   0
Info                     0
SrcMAC                   0
SrcMACResolved           0
SrcOUIResolved           0
SrcPort             376013
SequenceNumber      417962
SrcOUI                   0
DstMAC                   0
DstMACResolved           0
DstOUI                   0
DstOUResolved       168917
DstPort             376013
DstMACResolved.1         0
SYNFlag             417962
ACKFlag             417962
ProtocolType             0
EpochTime                0
TimeDelta           417962
RelativeTime             0
CaptureName              0
AttackName               0
dtype: int64


In [25]:
# Count the number of packets where TimeDelta should be present but is null
timedelta_missing = modbusQueryFlooding_df[modbusQueryFlooding_df['TimeDelta'].isnull() & ((modbusQueryFlooding_df['Time'].notnull()) & (modbusQueryFlooding_df['EpochTime'].notnull()))].shape[0]
print(f"Number of packets where TimeDelta should be present but is null: {timedelta_missing}")


Number of packets where TimeDelta should be present but is null: 417962


In [26]:
# Sort the DataFrame by EpochTime to ensure the data is in chronological order
modbusQueryFlooding_df.sort_values(by='EpochTime', inplace=True)

# Interpolate missing TimeDelta values based on the EpochTime column
modbusQueryFlooding_df['TimeDelta'] = modbusQueryFlooding_df['EpochTime'].diff().fillna(method='backfill')

# Check if there are any remaining null values in TimeDelta
null_timedelta_count = modbusQueryFlooding_df['TimeDelta'].isnull().sum()
print(f"Number of remaining null values in TimeDelta after interpolation: {null_timedelta_count}")

Number of remaining null values in TimeDelta after interpolation: 0


In [27]:
# 1. Handling missing values in SrcPort and DstPort
modbusQueryFlooding_df['SrcPort'].fillna(-1, inplace=True)  # Filling missing SrcPort with -1
modbusQueryFlooding_df['DstPort'].fillna(-1, inplace=True)  # Filling missing DstPort with -1

# 2. Handling missing values in SequenceNumber
# Assuming TCP packets have SequenceNumber and using median imputation
median_seqnum = modbusQueryFlooding_df[modbusQueryFlooding_df['Protocol'] == 'TCP']['SequenceNumber'].median()
modbusQueryFlooding_df['SequenceNumber'].fillna(median_seqnum, inplace=True)

# 3. Handling missing values in DstOUResolved
modbusQueryFlooding_df['DstOUResolved'].fillna('Unknown', inplace=True)  # Filling missing DstOUResolved with 'Unknown'

# 4. Handling missing values in SYNFlag and ACKFlag
modbusQueryFlooding_df['SYNFlag'].fillna(False, inplace=True)  # Filling missing SYNFlag with False
modbusQueryFlooding_df['ACKFlag'].fillna(False, inplace=True)  # Filling missing ACKFlag with False

In [28]:
# Check for missing values
missing_values = modbusQueryFlooding_df.isnull().sum()
print(missing_values)

No.                 0
Time                0
SrcIP               0
DstIP               0
Protocol            0
Length              0
Info                0
SrcMAC              0
SrcMACResolved      0
SrcOUIResolved      0
SrcPort             0
SequenceNumber      0
SrcOUI              0
DstMAC              0
DstMACResolved      0
DstOUI              0
DstOUResolved       0
DstPort             0
DstMACResolved.1    0
SYNFlag             0
ACKFlag             0
ProtocolType        0
EpochTime           0
TimeDelta           0
RelativeTime        0
CaptureName         0
AttackName          0
dtype: int64


In [29]:
# Convert EpochTime to datetime format and create 'date_time' column
modbusQueryFlooding_df['date_time'] = pd.to_datetime(df['EpochTime'], unit='s')

## TcpSYNFlood

In [30]:
csv_files = glob.glob('captures1_v2/tcpSYNFloodDDoS/*.csv')

# Create an empty dataframe to store the combined data
tcpSYNFlood_df = pd.DataFrame()

# Loop through each CSV file and append its contents to the combined dataframe
for csv_file in csv_files:
    try:
        df = pd.read_csv(csv_file)
        capture = csv_file.split('/')[-1].replace('.csv', '')
        df['CaptureName'] = capture
        tcpSYNFlood_df = pd.concat([tcpSYNFlood_df, df])
    except Exception as e:
        print(f"Failed to read {csv_file}: {e}")

tcpSYNFlood_df['AttackName'] = 'tcpSYNFloodDDoS'

tcpSYNFlood_df.head()

Failed to read captures1_v2/tcpSYNFloodDDoS\eth2dump-tcpSYNFloodDDoS30m-6h_1.csv: 'utf-8' codec can't decode byte 0x86 in position 261916: invalid start byte


Unnamed: 0,No.,Time,SrcIP,DstIP,Protocol,Length,Info,SrcMAC,SrcMACResolved,SrcOUIResolved,...,DstPort,DstMACResolved.1,SYNFlag,ACKFlag,ProtocolType,EpochTime,RelativeTime,TimeDelta,CaptureName,AttackName
0,1,0.0,172.27.224.70,172.27.224.250,TCP,60,49179 > 502 [ACK] Seq=1 Ack=1 Win=64865 Len=0,00:0c:29:9d:9e:9e,VMware_9d:9e:9e,"VMware, Inc.",...,502.0,Telemech_09:51:3b,Not set,Set,TCP,1529543000.0,0.0,0.0,tcpSYNFloodDDoS\eth2dump-tcpSYNFloodDDoS-15m-1...,tcpSYNFloodDDoS
1,2,0.111408,172.27.224.70,172.27.224.250,Modbus/TCP,66,"Query: Trans: 0; Unit: 1, Func: 3: ...",00:0c:29:9d:9e:9e,VMware_9d:9e:9e,"VMware, Inc.",...,502.0,Telemech_09:51:3b,Not set,Set,Modbus/TCP,1529543000.0,0.111408,0.111408,tcpSYNFloodDDoS\eth2dump-tcpSYNFloodDDoS-15m-1...,tcpSYNFloodDDoS
2,3,0.120144,172.27.224.250,172.27.224.70,Modbus/TCP,85,"Response: Trans: 0; Unit: 1, Func: 3: ...",00:80:f4:09:51:3b,Telemech_09:51:3b,Telemechanique Electrique,...,49179.0,VMware_9d:9e:9e,Not set,Set,Modbus/TCP,1529543000.0,0.120144,0.008736,tcpSYNFloodDDoS\eth2dump-tcpSYNFloodDDoS-15m-1...,tcpSYNFloodDDoS
3,4,0.238885,HewlettP_8e:40:b3,Spanning-tree-(for-bridges)_00,STP,64,RST. Root = 32768/0/00:18:6e:d7:8a:c0 Cost = ...,d0:7e:28:8e:40:b3,HewlettP_8e:40:b3,Hewlett Packard,...,,Spanning-tree-(for-bridges)_00,,,STP,1529543000.0,0.238885,0.118741,tcpSYNFloodDDoS\eth2dump-tcpSYNFloodDDoS-15m-1...,tcpSYNFloodDDoS
4,5,0.327575,172.27.224.70,172.27.224.250,TCP,60,49179 > 502 [ACK] Seq=13 Ack=32 Win=65392 Len=0,00:0c:29:9d:9e:9e,VMware_9d:9e:9e,"VMware, Inc.",...,502.0,Telemech_09:51:3b,Not set,Set,TCP,1529543000.0,0.327575,0.08869,tcpSYNFloodDDoS\eth2dump-tcpSYNFloodDDoS-15m-1...,tcpSYNFloodDDoS


In [31]:
# Check for missing values
missing_values = tcpSYNFlood_df.isnull().sum()
print(missing_values)

No.                      0
Time                     0
SrcIP                    0
DstIP                    0
Protocol                 0
Length                   0
Info                   391
SrcMAC                   0
SrcMACResolved           0
SrcOUIResolved           0
SrcPort             348148
SequenceNumber      389327
SrcOUI                   0
DstMAC                   0
DstMACResolved           0
DstOUI                   0
DstOUResolved       291851
DstPort             348148
DstMACResolved.1         0
SYNFlag             389327
ACKFlag             389327
ProtocolType             0
EpochTime                0
RelativeTime             0
TimeDelta                0
CaptureName              0
AttackName               0
dtype: int64


In [32]:
# 1. Handling missing values in SrcPort and DstPort
tcpSYNFlood_df['SrcPort'].fillna(-1, inplace=True)  # Filling missing SrcPort with -1
tcpSYNFlood_df['DstPort'].fillna(-1, inplace=True)  # Filling missing DstPort with -1

# 2. Handling missing values in SequenceNumber
# Assuming TCP packets have SequenceNumber and using median imputation
median_seqnum = tcpSYNFlood_df[tcpSYNFlood_df['Protocol'] == 'TCP']['SequenceNumber'].median()
tcpSYNFlood_df['SequenceNumber'].fillna(median_seqnum, inplace=True)

# 3. Handling missing values in DstOUResolved
tcpSYNFlood_df['DstOUResolved'].fillna('Unknown', inplace=True)  # Filling missing DstOUResolved with 'Unknown'

# 4. Handling missing values in SYNFlag and ACKFlag
tcpSYNFlood_df['SYNFlag'].fillna(False, inplace=True)  # Filling missing SYNFlag with False
tcpSYNFlood_df['ACKFlag'].fillna(False, inplace=True)  # Filling missing ACKFlag with False

# Fill missing Info values with a placeholder like "Unknown"
df['Info'].fillna('Unknown', inplace=True)

In [33]:
# Check for missing values
missing_values = tcpSYNFlood_df.isnull().sum()
print(missing_values)

No.                   0
Time                  0
SrcIP                 0
DstIP                 0
Protocol              0
Length                0
Info                391
SrcMAC                0
SrcMACResolved        0
SrcOUIResolved        0
SrcPort               0
SequenceNumber        0
SrcOUI                0
DstMAC                0
DstMACResolved        0
DstOUI                0
DstOUResolved         0
DstPort               0
DstMACResolved.1      0
SYNFlag               0
ACKFlag               0
ProtocolType          0
EpochTime             0
RelativeTime          0
TimeDelta             0
CaptureName           0
AttackName            0
dtype: int64


In [34]:
# Convert EpochTime to datetime format and create 'date_time' column
tcpSYNFlood_df['date_time'] = pd.to_datetime(df['EpochTime'], unit='s')

## pingFloodDDoS

In [35]:
csv_files = glob.glob('captures1_v2/pingFloodDDoS/*.csv')

pingFloodDDos_df = pd.DataFrame()

for csv_file in csv_files:
    try:
        df = pd.read_csv(csv_file)
        capture = csv_file.split('/')[-1].replace('.csv', '')
        df['CaptureName'] = capture
        pingFloodDDos_df = pd.concat([pingFloodDDos_df, df])
    except Exception as e:
        print(f"Failed to read {csv_file}: {e}")

pingFloodDDos_df['AttackName'] = 'pingFloodDDoS'

pingFloodDDos_df.head()

Unnamed: 0,No.,Time,SrcIP,DstIP,Protocol,Length,Info,SrcMAC,SrcMACResolved,SrcOUIResolved,...,DstPort,DstMACResolved.1,SYNFlag,ACKFlag,ProtocolType,EpochTime,RelativeTime,TimeDelta,CaptureName,AttackName
0,1,0.0,172.27.224.70,172.27.224.250,TCP,60,49179 > 502 [ACK] Seq=1 Ack=1 Win=65299 Len=0,00:0c:29:9d:9e:9e,VMware_9d:9e:9e,"VMware, Inc.",...,502.0,Telemech_09:51:3b,Not set,Set,TCP,1529539000.0,0.0,0.0,pingFloodDDoS\eth2dump-pingFloodDDoS-15m-12h_1,pingFloodDDoS
1,2,0.142182,172.27.224.70,172.27.224.250,Modbus/TCP,66,"Query: Trans: 0; Unit: 1, Func: 3: ...",00:0c:29:9d:9e:9e,VMware_9d:9e:9e,"VMware, Inc.",...,502.0,Telemech_09:51:3b,Not set,Set,Modbus/TCP,1529539000.0,0.142182,0.142182,pingFloodDDoS\eth2dump-pingFloodDDoS-15m-12h_1,pingFloodDDoS
2,3,0.145178,172.27.224.250,172.27.224.70,Modbus/TCP,85,"Response: Trans: 0; Unit: 1, Func: 3: ...",00:80:f4:09:51:3b,Telemech_09:51:3b,Telemechanique Electrique,...,49179.0,VMware_9d:9e:9e,Not set,Set,Modbus/TCP,1529539000.0,0.145178,0.002996,pingFloodDDoS\eth2dump-pingFloodDDoS-15m-12h_1,pingFloodDDoS
3,4,0.358785,172.27.224.70,172.27.224.250,TCP,60,49179 > 502 [ACK] Seq=13 Ack=32 Win=65268 Len=0,00:0c:29:9d:9e:9e,VMware_9d:9e:9e,"VMware, Inc.",...,502.0,Telemech_09:51:3b,Not set,Set,TCP,1529539000.0,0.358785,0.213607,pingFloodDDoS\eth2dump-pingFloodDDoS-15m-12h_1,pingFloodDDoS
4,5,0.380756,HewlettP_8e:40:b3,Spanning-tree-(for-bridges)_00,STP,64,RST. Root = 32768/0/00:18:6e:d7:8a:c0 Cost = ...,d0:7e:28:8e:40:b3,HewlettP_8e:40:b3,Hewlett Packard,...,,Spanning-tree-(for-bridges)_00,,,STP,1529539000.0,0.380756,0.021971,pingFloodDDoS\eth2dump-pingFloodDDoS-15m-12h_1,pingFloodDDoS


In [36]:
# Check for missing values
missing_values = pingFloodDDos_df.isnull().sum()
print(missing_values)

No.                       0
Time                      0
SrcIP                     0
DstIP                     0
Protocol                  0
Length                    0
Info                      0
SrcMAC                    0
SrcMACResolved            0
SrcOUIResolved            0
SrcPort             1803759
SequenceNumber      1845741
SrcOUI                    0
DstMAC                    0
DstMACResolved            0
DstOUI                    0
DstOUResolved        315586
DstPort             1803759
DstMACResolved.1          0
SYNFlag             1845741
ACKFlag             1845741
ProtocolType              0
EpochTime                 0
RelativeTime              0
TimeDelta                 0
CaptureName               0
AttackName                0
dtype: int64


In [37]:
# 1. Handling missing values in SrcPort and DstPort
pingFloodDDos_df['SrcPort'].fillna(-1, inplace=True)  # Filling missing SrcPort with -1
pingFloodDDos_df['DstPort'].fillna(-1, inplace=True)  # Filling missing DstPort with -1

# 2. Handling missing values in SequenceNumber
# Assuming TCP packets have SequenceNumber and using median imputation
median_seqnum = pingFloodDDos_df[pingFloodDDos_df['Protocol'] == 'TCP']['SequenceNumber'].median()
pingFloodDDos_df['SequenceNumber'].fillna(median_seqnum, inplace=True)

# 3. Handling missing values in DstOUResolved
pingFloodDDos_df['DstOUResolved'].fillna('Unknown', inplace=True)  # Filling missing DstOUResolved with 'Unknown'

# 4. Handling missing values in SYNFlag and ACKFlag
pingFloodDDos_df['SYNFlag'].fillna(False, inplace=True)  # Filling missing SYNFlag with False
pingFloodDDos_df['ACKFlag'].fillna(False, inplace=True)  # Filling missing ACKFlag with False


In [38]:
# Check for missing values
missing_values = pingFloodDDos_df.isnull().sum()
print(missing_values)

No.                 0
Time                0
SrcIP               0
DstIP               0
Protocol            0
Length              0
Info                0
SrcMAC              0
SrcMACResolved      0
SrcOUIResolved      0
SrcPort             0
SequenceNumber      0
SrcOUI              0
DstMAC              0
DstMACResolved      0
DstOUI              0
DstOUResolved       0
DstPort             0
DstMACResolved.1    0
SYNFlag             0
ACKFlag             0
ProtocolType        0
EpochTime           0
RelativeTime        0
TimeDelta           0
CaptureName         0
AttackName          0
dtype: int64


In [39]:
# Convert EpochTime to datetime format and create 'date_time' column
pingFloodDDos_df['date_time'] = pd.to_datetime(df['EpochTime'], unit='s')
pingFloodDDos_df.head()

Unnamed: 0,No.,Time,SrcIP,DstIP,Protocol,Length,Info,SrcMAC,SrcMACResolved,SrcOUIResolved,...,DstMACResolved.1,SYNFlag,ACKFlag,ProtocolType,EpochTime,RelativeTime,TimeDelta,CaptureName,AttackName,date_time
0,1,0.0,172.27.224.70,172.27.224.250,TCP,60,49179 > 502 [ACK] Seq=1 Ack=1 Win=65299 Len=0,00:0c:29:9d:9e:9e,VMware_9d:9e:9e,"VMware, Inc.",...,Telemech_09:51:3b,Not set,Set,TCP,1529539000.0,0.0,0.0,pingFloodDDoS\eth2dump-pingFloodDDoS-15m-12h_1,pingFloodDDoS,2018-05-28 15:38:47.982399940
1,2,0.142182,172.27.224.70,172.27.224.250,Modbus/TCP,66,"Query: Trans: 0; Unit: 1, Func: 3: ...",00:0c:29:9d:9e:9e,VMware_9d:9e:9e,"VMware, Inc.",...,Telemech_09:51:3b,Not set,Set,Modbus/TCP,1529539000.0,0.142182,0.142182,pingFloodDDoS\eth2dump-pingFloodDDoS-15m-12h_1,pingFloodDDoS,2018-05-28 15:38:48.077550888
2,3,0.145178,172.27.224.250,172.27.224.70,Modbus/TCP,85,"Response: Trans: 0; Unit: 1, Func: 3: ...",00:80:f4:09:51:3b,Telemech_09:51:3b,Telemechanique Electrique,...,VMware_9d:9e:9e,Not set,Set,Modbus/TCP,1529539000.0,0.145178,0.002996,pingFloodDDoS\eth2dump-pingFloodDDoS-15m-12h_1,pingFloodDDoS,2018-05-28 15:38:48.084661007
3,4,0.358785,172.27.224.70,172.27.224.250,TCP,60,49179 > 502 [ACK] Seq=13 Ack=32 Win=65268 Len=0,00:0c:29:9d:9e:9e,VMware_9d:9e:9e,"VMware, Inc.",...,Telemech_09:51:3b,Not set,Set,TCP,1529539000.0,0.358785,0.213607,pingFloodDDoS\eth2dump-pingFloodDDoS-15m-12h_1,pingFloodDDoS,2018-05-28 15:38:48.294389009
4,5,0.380756,HewlettP_8e:40:b3,Spanning-tree-(for-bridges)_00,STP,64,RST. Root = 32768/0/00:18:6e:d7:8a:c0 Cost = ...,d0:7e:28:8e:40:b3,HewlettP_8e:40:b3,Hewlett Packard,...,Spanning-tree-(for-bridges)_00,False,False,STP,1529539000.0,0.380756,0.021971,pingFloodDDoS\eth2dump-pingFloodDDoS-15m-12h_1,pingFloodDDoS,2018-05-28 15:38:48.321327925


In [40]:
#MITM
print(len(mitm_df))

#TCP_SYN
tcpSYNFlood_df = tcpSYNFlood_df.head(2069563)
print(len(tcpSYNFlood_df))
tcpSYNFlood_df

#PING
pingFloodDDos_df = pingFloodDDos_df.head(2069563)
pingFloodDDos_df

#modbusQueryFlooding
modbusQueryFlooding_df = modbusQueryFlooding_df.head(2069563)

#modbusQuery2Flooding_df
modbusQuery2Flooding_df = modbusQuery2Flooding_df.head(2069563)

mitm_df.to_csv('mitm.csv', index=False)
clean_df.to_csv('clean.csv', index=False)
tcpSYNFlood_df.to_csv('tcpSYNFlood.csv', index=False)
pingFloodDDos_df.to_csv('pingFloodDDos.csv', index=False)
modbusQueryFlooding_df.to_csv('modbusQueryFlooding.csv', index=False)
modbusQuery2Flooding_df.to_csv('modbusQuery2Flooding.csv', index=False)

2069563
2069563


In [41]:
modbusQueryFlooding_df

Unnamed: 0,No.,Time,SrcIP,DstIP,Protocol,Length,Info,SrcMAC,SrcMACResolved,SrcOUIResolved,...,DstMACResolved.1,SYNFlag,ACKFlag,ProtocolType,EpochTime,TimeDelta,RelativeTime,CaptureName,AttackName,date_time
0,1,0.000000,HewlettPacka_8e:40:b3,Spanning-tree-(for-bridges)_00,STP,64,RST. Root = 32768/0/00:18:6e:d7:8a:c0 Cost = ...,HewlettPacka_8e:40:b3,HewlettPacka_8e:40:b3,Hewlett Packard,...,Spanning-tree-(for-bridges)_00,False,False,STP,1.526983e+09,0.036320,0.000000,modbusQueryFlooding\eth2dump-modbusQueryFloodi...,modbusQueryFlooding,2018-05-26 00:39:34.756860018
1,2,0.036320,172.27.224.251,172.27.224.250,TCP,60,"50272 > 502 [FIN, ACK] Seq=1 Ack=1 Win=2036 ...",ASUSTekCOMPU_64:40:79,ASUSTekCOMPU_64:40:79,ASUSTek COMPUTER INC.,...,Telemecaniqu_09:51:3b,Not set,Set,TCP,1.526983e+09,0.036320,0.036320,modbusQueryFlooding\eth2dump-modbusQueryFloodi...,modbusQueryFlooding,2018-05-26 00:39:34.763644934
2,3,0.046578,172.27.224.250,172.27.224.251,TCP,60,502 > 50272 [ACK] Seq=1 Ack=2 Win=8712 Len=0,Telemecaniqu_09:51:3b,Telemecaniqu_09:51:3b,Telemecanique Electrique,...,ASUSTekCOMPU_64:40:79,Not set,Set,TCP,1.526983e+09,0.010258,0.046578,modbusQueryFlooding\eth2dump-modbusQueryFloodi...,modbusQueryFlooding,2018-05-26 00:39:34.858760118
3,4,0.048935,172.27.224.250,172.27.224.251,TCP,60,"502 > 50272 [FIN, ACK] Seq=1 Ack=2 Win=8712 ...",Telemecaniqu_09:51:3b,Telemecaniqu_09:51:3b,Telemecanique Electrique,...,ASUSTekCOMPU_64:40:79,Not set,Set,TCP,1.526983e+09,0.002357,0.048935,modbusQueryFlooding\eth2dump-modbusQueryFloodi...,modbusQueryFlooding,2018-05-26 00:39:34.869422913
4,5,0.048946,172.27.224.251,172.27.224.250,TCP,60,50272 > 502 [ACK] Seq=2 Ack=2 Win=2036 Len=0,ASUSTekCOMPU_64:40:79,ASUSTekCOMPU_64:40:79,ASUSTek COMPUTER INC.,...,Telemecaniqu_09:51:3b,Not set,Set,TCP,1.526983e+09,0.000011,0.048946,modbusQueryFlooding\eth2dump-modbusQueryFloodi...,modbusQueryFlooding,2018-05-26 00:39:35.075664043
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
398368,398369,19267.505692,172.27.224.251,172.27.224.250,TCP,60,"57909 > 502 [FIN, ACK] Seq=13 Ack=13 Win=203...",ASUSTekCOMPU_64:40:79,ASUSTekCOMPU_64:40:79,ASUSTek COMPUTER INC.,...,Telemecaniqu_09:51:3b,Not set,Set,TCP,1.527293e+09,0.355063,19267.505692,modbusQueryFlooding\eth2dump-modbusQueryFloodi...,modbusQueryFlooding,2018-05-26 03:25:26.499504089
398369,398370,19267.515908,172.27.224.250,172.27.224.251,TCP,60,502 > 57909 [ACK] Seq=13 Ack=14 Win=8712 Len=0,Telemecaniqu_09:51:3b,Telemecaniqu_09:51:3b,Telemecanique Electrique,...,ASUSTekCOMPU_64:40:79,Not set,Set,TCP,1.527293e+09,0.010216,19267.515908,modbusQueryFlooding\eth2dump-modbusQueryFloodi...,modbusQueryFlooding,2018-05-26 03:25:26.499751091
398370,398371,19267.516764,172.27.224.250,172.27.224.251,TCP,60,"502 > 57909 [FIN, ACK] Seq=13 Ack=14 Win=871...",Telemecaniqu_09:51:3b,Telemecaniqu_09:51:3b,Telemecanique Electrique,...,ASUSTekCOMPU_64:40:79,Not set,Set,TCP,1.527293e+09,0.000856,19267.516764,modbusQueryFlooding\eth2dump-modbusQueryFloodi...,modbusQueryFlooding,2018-05-26 03:25:26.499994993
398371,398372,19267.516784,172.27.224.251,172.27.224.250,TCP,60,57909 > 502 [ACK] Seq=14 Ack=14 Win=2036 Len=0,ASUSTekCOMPU_64:40:79,ASUSTekCOMPU_64:40:79,ASUSTek COMPUTER INC.,...,Telemecaniqu_09:51:3b,Not set,Set,TCP,1.527293e+09,0.000020,19267.516784,modbusQueryFlooding\eth2dump-modbusQueryFloodi...,modbusQueryFlooding,2018-05-26 03:25:26.500250101


In [42]:
print(tcpSYNFlood_df.dtypes)

No.                          int64
Time                       float64
SrcIP                       object
DstIP                       object
Protocol                    object
Length                       int64
Info                        object
SrcMAC                      object
SrcMACResolved              object
SrcOUIResolved              object
SrcPort                    float64
SequenceNumber             float64
SrcOUI                      object
DstMAC                      object
DstMACResolved              object
DstOUI                      object
DstOUResolved               object
DstPort                    float64
DstMACResolved.1            object
SYNFlag                     object
ACKFlag                     object
ProtocolType                object
EpochTime                  float64
RelativeTime               float64
TimeDelta                  float64
CaptureName                 object
AttackName                  object
date_time           datetime64[ns]
dtype: object
