In [1]:
# Import libraries and dependencies
import pandas as pd
import zed as zed
import networkx as nx
import matplotlib.pyplot as plt
import ipaddress
import math
from collections import Counter
import networkx.algorithms.community as nxcom

file_name = 'infected.pcap'
Verbose = False
PrintImage = True
viz_threshold = 500
sample_size = 250

client = zed.Client()
ZQL_query = '_path == "conn" | count() by id.orig_h, id.resp_h, id.resp_p | sort id.orig_h, id.resp_h, id.resp_p'
ZQL_query_suricata = 'event_type == "alert" | count() by src_ip, dest_ip, dest_port, alert.severity, alert.signature | sort src_ip, dest_ip, dest_port, alert.severity, alert.signature'

# Execute the ZQL query on the file
values = client.query(f'from {file_name} | {ZQL_query}')
values_suricata = client.query(f'from {file_name} | {ZQL_query_suricata}')

# Convert the generator to a list so we can use it with pd.json_normalize
values_list = list(values)
values_list_suricata = list(values_suricata)

# Iterate through the list:
for value in values_list:
    print(value)

# Normalize the JSON data into a flat table
# Create 2 data_frame, one for values, one for values_suricata
data_frame = pd.json_normalize(values_list)                          #contain the zeek result
data_frame_suricata = pd.json_normalize(values_list_suricata)        #contain the Suricata alert data

print(data_frame)
print(data_frame_suricata)
# Total number of record in the data_frame and data_frame_suricata
total_zeek = len(data_frame)
total_suricata = len(data_frame_suricata)

{'id': {'orig_h': IPv4Address('10.2.17.2'), 'resp_h': IPv4Address('10.2.17.101'), 'resp_p': 49787}, 'count': 1}
{'id': {'orig_h': IPv4Address('10.2.17.101'), 'resp_h': IPv4Address('3.222.126.94'), 'resp_p': 80}, 'count': 1}
{'id': {'orig_h': IPv4Address('10.2.17.101'), 'resp_h': IPv4Address('10.2.17.1'), 'resp_p': 445}, 'count': 1}
{'id': {'orig_h': IPv4Address('10.2.17.101'), 'resp_h': IPv4Address('10.2.17.2'), 'resp_p': 53}, 'count': 97}
{'id': {'orig_h': IPv4Address('10.2.17.101'), 'resp_h': IPv4Address('10.2.17.2'), 'resp_p': 88}, 'count': 27}
{'id': {'orig_h': IPv4Address('10.2.17.101'), 'resp_h': IPv4Address('10.2.17.2'), 'resp_p': 123}, 'count': 5}
{'id': {'orig_h': IPv4Address('10.2.17.101'), 'resp_h': IPv4Address('10.2.17.2'), 'resp_p': 135}, 'count': 8}
{'id': {'orig_h': IPv4Address('10.2.17.101'), 'resp_h': IPv4Address('10.2.17.2'), 'resp_p': 137}, 'count': 2}
{'id': {'orig_h': IPv4Address('10.2.17.101'), 'resp_h': IPv4Address('10.2.17.2'), 'resp_p': 138}, 'count': 2}
{'id':

In [2]:
# create 2 new data_frames for zeek and suricata:
df_zeek = pd.DataFrame({"source":data_frame["id.orig_h"], "target":data_frame["id.resp_h"], "target_port":data_frame["id.resp_p"], "conncount":data_frame["count"]})
df_suri = pd.DataFrame({"source":data_frame_suricata["src_ip"], "target":data_frame_suricata["dest_ip"], "target_port":data_frame_suricata["dest_port"], "severity":data_frame_suricata["alert.severity"], "count":data_frame_suricata["count"]})
print(df_zeek.head(5))
print(df_suri.head(5))

        source        target  target_port  conncount
0    10.2.17.2   10.2.17.101        49787          1
1  10.2.17.101  3.222.126.94           80          1
2  10.2.17.101     10.2.17.1          445          1
3  10.2.17.101     10.2.17.2           53         97
4  10.2.17.101     10.2.17.2           88         27
      source       target  target_port  severity  count
0  10.2.17.2  10.2.17.101        49674         3      1
1  10.2.17.2  10.2.17.101        49680         3      1
2  10.2.17.2  10.2.17.101        49687         3      1
3  10.2.17.2  10.2.17.101        49704         3      1
4  10.2.17.2  10.2.17.101        49709         3      1


In [3]:
# Merge two data frame:
df_merge = pd.concat([df_zeek, df_suri], ignore_index = True)
print(df_merge)

              source        target  target_port  conncount  severity  count
0          10.2.17.2   10.2.17.101        49787        1.0       NaN    NaN
1        10.2.17.101  3.222.126.94           80        1.0       NaN    NaN
2        10.2.17.101     10.2.17.1          445        1.0       NaN    NaN
3        10.2.17.101     10.2.17.2           53       97.0       NaN    NaN
4        10.2.17.101     10.2.17.2           88       27.0       NaN    NaN
..               ...           ...          ...        ...       ...    ...
114   179.191.108.58   10.2.17.101        49863        NaN       3.0    1.0
115   179.191.108.58   10.2.17.101        50130        NaN       3.0    1.0
116   179.191.108.58   10.2.17.101        50141        NaN       3.0    1.0
117  195.123.208.170   10.2.17.101        49864        NaN       1.0    1.0
118  195.123.208.170   10.2.17.101        49864        NaN       2.0    2.0

[119 rows x 6 columns]


In [None]:
# populate all NaN fields with 0s:
df_merge = df_merge.fillna(0)
print(df_merge)