In [1]:
import numpy as np
import matplotlib.pyplot as plt

import json # original json library

import pandas as pd

In [2]:
# format EVE json en sortie de Suricata :

# {
#---- common structure :
# "timestamp":"2009-11-24T21:27:09.534255",
# "event_type":"TYPE",
# ...tuple... ,
# "TYPE":{ ... type specific content ... }

#---- when processing a pcap file : 
# "pcap_cnt" : 123,

# }

#---- EVENT types :
# Alert
# "alert": {
#   "action": "allowed",
#   "gid": 1,
#   "signature_id": 1,
#   "rev": 1,
#   "app_proto": "http",
#   "signature": "HTTP body talking about corruption",
#   "severity": 3,
#   "source": {
#     "ip": "192.168.43.32",
#     "port": 36292
#   },
#   "target": {
#     "ip": "179.60.192.3",
#     "port": 80
#   },


# Anomaly

#     "type": Either "decode", "stream" or "applayer". In rare cases, type will be "unknown". When this occurs, an additional field named "code" will be present. Events with type "applayer" are detected by the application layer parsers.
#     "event" The name of the anomalous event. Events of type "decode" are prefixed with "decoder"; events of type "stream" are prefixed with "stream".
#     "code" If "type" is "unknown", than "code" contains the unrecognized event code. Otherwise, this field is not present.

# The following field is included when "type" has the value "applayer":

#     "layer" Indicates the handling layer that detected the event. This will be "proto_parser" (protocol parser), "proto_detect" (protocol detection) or "parser."

# EVENT TYPE FLOW

# 16.1.2.12.1. Fields

#     "pkts_toserver": total number of packets to server, include bypassed packets
#     "pkts_toclient": total number of packets to client
#     "bytes_toserver": total bytes count to server
#     "bytes_toclient": total bytes count to client
#     "bypassed.pkts_toserver": number of bypassed packets to server
#     "bypassed.pkts_toclient": number of bypassed packets to client
#     "bypassed.bytes_toserver": bypassed bytes count to server
#     "bypassed.bytes_toclient": bypassed bytes count to client
#     "start": date of start of the flow
#     "end": date of end of flow (last seen packet)
#     "age": duration of the flow
#     "bypass": if the flow has been bypassed, it is set to "local" (internal bypass) or "capture"
#     "state": display state of the flow (include "new", "established", "closed", "bypassed")
#     "reason": mechanism that did trigger the end of the flow (include "timeout", "forced" and "shutdown")
#     "alerted": "true" or "false" depending if an alert has been seen on flow


# EVENT TYPE HTTP

# EVENT TYPE DNS

In [3]:
# https://www.stamus-networks.com/blog/jupyter-playbooks-for-suricata-part-1

# https://malware-traffic-analysis.net/

In [4]:
!ls -al   # ! to launch a shell command

# % to invoke built-in functions

total 200
drwxr-xr-x 2 benjamin benjamin   4096 Jun 18 17:09 .
drwxr-xr-x 6 benjamin benjamin   4096 Jun 18 16:06 ..
-rw-r--r-- 1 benjamin benjamin  49475 Jun 18 15:57 kdd_toy.ipynb
-rw-r--r-- 1 benjamin benjamin  16197 Jun 18 18:12 SandBox.ipynb
-rw-r--r-- 1 benjamin benjamin 126823 Jun 18 16:15 toy_scapy.ipynb


In [5]:
with open ("../data/eve.json") as f:
    f.readline()  # reads a single line from the file (here, one packet)
    for i, line in enumerate(f):
        eve = json.loads(line)  # deserialize a string s into an object
        if i%100==0:
            print(json.dumps(eve, indent=2))   # dumps : serializes an object into a string
            print(f"------------------------------------------------------\n")

{
  "timestamp": "2023-06-17T10:46:07.301751+0200",
  "flow_id": 913732596112055,
  "pcap_cnt": 51,
  "event_type": "alert",
  "src_ip": "2a01:cb19:872e:3000:0e4f:3187:540c:d66c",
  "src_port": 54494,
  "dest_ip": "2a04:4e42:006a:0000:0000:0000:0000:0760",
  "dest_port": 443,
  "proto": "TCP",
  "community_id": "1:oOD614dpphn//UZsI8zItjDl5i4=",
  "alert": {
    "action": "allowed",
    "gid": 1,
    "signature_id": 2200077,
    "rev": 2,
    "signature": "SURICATA TCPv6 invalid checksum",
    "category": "Generic Protocol Command Decode",
    "severity": 3
  },
  "flow": {
    "pkts_toserver": 1,
    "pkts_toclient": 0,
    "bytes_toserver": 86,
    "bytes_toclient": 0,
    "start": "2023-06-17T10:46:07.301751+0200"
  }
}
------------------------------------------------------

{
  "timestamp": "2023-06-17T10:46:05.765754+0200",
  "flow_id": 146718008913722,
  "pcap_cnt": 3,
  "event_type": "alert",
  "src_ip": "2a01:cb19:872e:3000:0e4f:3187:540c:d66c",
  "src_port": 36106,
  "dest_ip":

In [6]:
# Pandas provides a useful method – json_normalize – for normalizing nested JSON fields into dataframe. Resulting columns use dot notation to signify nested objects, similar to how Elasticsearch does it

with open ("../data/eve.json") as packets:
    df = pd.json_normalize(
        [json.loads(packet) for packet in packets]
    )

In [13]:
df

Unnamed: 0,timestamp,flow_id,pcap_cnt,event_type,src_ip,src_port,dest_ip,dest_port,proto,community_id,...,stats.app_layer.tx.rdp,stats.app_layer.tx.dcerpc_udp,stats.app_layer.tx.dns_udp,stats.app_layer.tx.nfs_udp,stats.app_layer.tx.krb5_udp,stats.app_layer.expectations,stats.http.memuse,stats.http.memcap,stats.ftp.memuse,stats.ftp.memcap
0,2023-06-17T10:46:05.765756+0200,2.007446e+15,4.0,alert,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,36120.0,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,TCP,1:HhORRMa8pU37MFiMESZo7eeh7K0=,...,,,,,,,,,,
1,2023-06-17T10:46:07.301751+0200,9.137326e+14,51.0,alert,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,54494.0,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,TCP,1:oOD614dpphn//UZsI8zItjDl5i4=,...,,,,,,,,,,
2,2023-06-17T10:46:09.089755+0200,1.315192e+15,55.0,alert,192.168.1.10,57578.0,192.229.221.95,80.0,TCP,1:nAfGnlZMYrDt5CdOeI1UDx4XW6k=,...,,,,,,,,,,
3,2023-06-17T10:46:11.905771+0200,6.695723e+14,71.0,alert,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,36124.0,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,TCP,1:qS6b8DUCNL2QP3gHebXsXvfYWtM=,...,,,,,,,,,,
4,2023-06-17T10:46:14.465756+0200,9.175379e+14,78.0,alert,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,36152.0,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,TCP,1:X+PE30OIsulrPxRnEn/EoDaS3Zs=,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
711,2023-06-17T10:46:05.765744+0200,8.309170e+14,,flow,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,36184.0,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,TCP,1:8Ln1Mp0CeUsMen6xJ13diukJLWQ=,...,,,,,,,,,,
712,2023-06-17T10:46:05.765744+0200,2.680272e+14,,flow,fe80:0000:0000:0000:5efa:25ff:fe41:fc90,,ff02:0000:0000:0000:0000:0001:ff0c:d66c,,IPv6-ICMP,1:3WV+iOybOLpMJwWxLpxaISaKQ+U=,...,,,,,,,,,,
713,2023-06-17T10:46:05.765744+0200,2.753845e+14,,flow,192.168.1.10,43644.0,192.168.1.1,53.0,UDP,1:6Nd9Q5wFvHw7lOxhPDKWCUQ4zds=,...,,,,,,,,,,
714,2023-06-17T10:46:05.765744+0200,1.264715e+15,,flow,fe80:0000:0000:0000:5efa:25ff:fe41:fc90,,ff02:0000:0000:0000:0000:0001:ff8e:ee30,,IPv6-ICMP,1:IPRF5HxIYTVaPKmLajo1TmQLrR8=,...,,,,,,,,,,


In [9]:
df.shape

(716, 324)

In [12]:
print(df.iloc[0,:])

timestamp                               2023-06-17T10:46:05.765756+0200
flow_id                                              2007446017912636.0
pcap_cnt                                                            4.0
event_type                                                        alert
src_ip                          2a01:cb19:872e:3000:0e4f:3187:540c:d66c
                                                 ...                   
stats.app_layer.expectations                                        NaN
stats.http.memuse                                                   NaN
stats.http.memcap                                                   NaN
stats.ftp.memuse                                                    NaN
stats.ftp.memcap                                                    NaN
Name: 0, Length: 324, dtype: object
