# Imports

In [1]:
import numpy as np
import matplotlib.pyplot as plt

import json # original json library

import pandas as pd
import logging

## Log set-up

In [2]:
LOG_FILENAME = '/home/benjamin/Folders_Python/Cyber/logs/logfile.log'
LOG_FORMAT = '%(asctime)% -- %(name)s -- %(levelname)s -- %(message)s'
# LOG_LEVEL = logging.INFO

In [3]:
# specific logger for the module
logger = logging.getLogger(__name__)   # creates specific logger for the module
logger.setLevel(logging.DEBUG)    # entry level of messages from all handlers
LOG_FORMAT = '%(asctime)s -- %(name)s -- %(levelname)s -- %(message)s'
formatter = logging.Formatter(LOG_FORMAT)

# file handler to log everything
file_handler = logging.FileHandler(LOG_FILENAME, mode='w')
file_handler.setLevel(logging.INFO)  # all messages (DEBUG and up) get logged in the file
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)

# stream handler to show messages to the console
console = logging.StreamHandler()
console.setLevel(logging.WARNING)  # Warning messages and up get displayed to the console
console.setFormatter(formatter)
logger.addHandler(console)

# pcap file

In [4]:
# NB : tshark -r <file>.pcap -T json > <file_pcap>.json -t r
# c'est la commande shell qui prend un pcap est le passe en json

!tshark -r /home/benjamin/Folders_Python/Cyber/data/exemple.pcap -T json -t r > /home/benjamin/Folders_Python/Cyber/data/exemple_pcap.json

In [5]:
class Packet():
    """Utility self-made unperfect class to parse the json object and extract features from a packet-like dict
    """
    
    def __init__(self, raw_packet:dict) -> None:
        self.raw_packet = raw_packet
        self._packet_data = None
        logger.debug('constructor of Packet instance has finished')
        
    @property
    def packet_data(self):
        # returns the full dictionnary of features
        if self._packet_data is not None:
            return self._packet_data
        else:
            sl = self.raw_packet.get('_source').get('layers')
            slf = sl.get('frame')
            sle = sl.get('eth')
            sli = sl.get('ip', {})  # return empty dict as default not found value so it can handle another get method
            slu = sl.get('udp', {})
            slt = sl.get('tcp', {})
                                               
            self._packet_data = {
                'frame_time' : slf.get('frame.time'),
                'frame_time_relative' : slf.get('frame.time_relative'),
                'frame_length' : slf.get("frame.len"),
                'frame_protocols' : slf.get("frame.protocols"),
                'eth_source': sle.get("eth.src"),
                'eth_dest': sle.get("eth.dst") ,
                'ip_version': sli.get("ip.version"),
                'ip_header_length': sli.get("ip.hdr_len"),
                'ip_length': sli.get("ip.len"),
                'ip_id': sli.get("ip.id"),
                'ip_flags': sli.get("ip.flags"),
                'ip_ttl': sli.get("ip.ttl"),
                'ip_proto': sli.get("ip.proto"),
                'ip_source': sli.get("ip.src"),
                'ip_dest': sli.get("ip.dst"),
                'udp_source_port': slu.get("udp.srcport"),
                'udp_dest_port': slu.get("udp.port"),
                'udp_length': slu.get("udp.length"),
                'tcp_source_port': slt.get("tcp.srcport"),
                'tcp_dest_port': slt.get("tcp.dstport"),
                'tcp_length': slt.get("tcp.len"),
                'tcp_flags': slt.get("tcp.flags"),
            }
            logger.debug('packet_data @property method has finished')
            return self._packet_data
        
    @packet_data.setter
    def packet_data(self, input):
        """illegal attempt to write packet_data"""
        logger.warning('Illegal attempt to write a data_packet in a packet object')
        pass

In [6]:
PCAP_FILENAME = "/home/benjamin/Folders_Python/Cyber/data/exemple_pcap.json"

In [7]:
with open (PCAP_FILENAME) as raw_packets:
    json_object = json.load(raw_packets)    # load le fichier dans une structure Python

In [8]:
json_object[0]

{'_index': 'packets-2023-06-13',
 '_type': 'doc',
 '_score': None,
 '_source': {'layers': {'frame': {'frame.encap_type': '1',
    'frame.time': 'Jun 13, 2023 18:58:42.019861000 CEST',
    'frame.offset_shift': '0.000000000',
    'frame.time_epoch': '1686675522.019861000',
    'frame.time_delta': '0.000000000',
    'frame.time_delta_displayed': '0.000000000',
    'frame.time_relative': '0.000000000',
    'frame.number': '1',
    'frame.len': '84',
    'frame.cap_len': '84',
    'frame.marked': '0',
    'frame.ignored': '0',
    'frame.protocols': 'eth:ethertype:ip:udp:dns'},
   'eth': {'eth.dst': '9c:9d:7e:91:92:4b',
    'eth.dst_tree': {'eth.dst_resolved': 'BeijingX_91:92:4b',
     'eth.dst.oui': '10263934',
     'eth.dst.oui_resolved': 'Beijing Xiaomi Mobile Software Co., Ltd',
     'eth.addr': '9c:9d:7e:91:92:4b',
     'eth.addr_resolved': 'BeijingX_91:92:4b',
     'eth.addr.oui': '10263934',
     'eth.addr.oui_resolved': 'Beijing Xiaomi Mobile Software Co., Ltd',
     'eth.dst.lg': 

In [9]:
p = Packet(json_object[0])

In [10]:
p.packet_data

{'frame_time': 'Jun 13, 2023 18:58:42.019861000 CEST',
 'frame_time_relative': '0.000000000',
 'frame_length': '84',
 'frame_protocols': 'eth:ethertype:ip:udp:dns',
 'eth_source': '6c:88:14:eb:a4:5c',
 'eth_dest': '9c:9d:7e:91:92:4b',
 'ip_version': '4',
 'ip_header_length': '20',
 'ip_length': '70',
 'ip_id': '0x0000f987',
 'ip_flags': '0x00000040',
 'ip_ttl': '64',
 'ip_proto': '17',
 'ip_source': '192.168.31.236',
 'ip_dest': '192.168.31.1',
 'udp_source_port': '40271',
 'udp_dest_port': '53',
 'udp_length': '50',
 'tcp_source_port': None,
 'tcp_dest_port': None,
 'tcp_length': None,
 'tcp_flags': None}

In [11]:

packets = [ Packet(d).packet_data for d in json_object ]

In [12]:
df_packets = pd.DataFrame(packets)

In [13]:
df_packets

Unnamed: 0,frame_time,frame_time_relative,frame_length,frame_protocols,eth_source,eth_dest,ip_version,ip_header_length,ip_length,ip_id,...,ip_proto,ip_source,ip_dest,udp_source_port,udp_dest_port,udp_length,tcp_source_port,tcp_dest_port,tcp_length,tcp_flags
0,"Jun 13, 2023 18:58:42.019861000 CEST",0.000000000,84,eth:ethertype:ip:udp:dns,6c:88:14:eb:a4:5c,9c:9d:7e:91:92:4b,4,20,70,0x0000f987,...,17,192.168.31.236,192.168.31.1,40271,53,50,,,,
1,"Jun 13, 2023 18:58:42.019888000 CEST",0.000027000,84,eth:ethertype:ip:udp:dns,6c:88:14:eb:a4:5c,9c:9d:7e:91:92:4b,4,20,70,0x0000f988,...,17,192.168.31.236,192.168.31.1,40271,53,50,,,,
2,"Jun 13, 2023 18:58:42.041559000 CEST",0.021698000,198,eth:ethertype:ip:udp:dns,9c:9d:7e:91:92:4b,6c:88:14:eb:a4:5c,4,20,184,0x0000ee53,...,17,192.168.31.1,192.168.31.236,53,40271,164,,,,
3,"Jun 13, 2023 18:58:42.041588000 CEST",0.021727000,210,eth:ethertype:ip:udp:dns,9c:9d:7e:91:92:4b,6c:88:14:eb:a4:5c,4,20,196,0x0000ee54,...,17,192.168.31.1,192.168.31.236,53,40271,176,,,,
4,"Jun 13, 2023 18:58:42.068363000 CEST",0.048502000,74,eth:ethertype:ip:tcp,6c:88:14:eb:a4:5c,9c:9d:7e:91:92:4b,4,20,60,0x00007804,...,6,192.168.31.236,34.107.221.82,,,,38988,80,0,0x00000002
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2288,"Jun 13, 2023 18:59:29.790467000 CEST",47.770606000,66,eth:ethertype:ip:tcp,9c:9d:7e:91:92:4b,6c:88:14:eb:a4:5c,4,20,52,0x00008e91,...,6,34.250.170.156,192.168.31.236,,,,443,54220,0,0x00000011
2289,"Jun 13, 2023 18:59:29.790487000 CEST",47.770626000,66,eth:ethertype:ip:tcp,6c:88:14:eb:a4:5c,9c:9d:7e:91:92:4b,4,20,52,0x00000000,...,6,192.168.31.236,34.250.170.156,,,,54220,443,0,0x00000010
2290,"Jun 13, 2023 18:59:29.843419000 CEST",47.823558000,66,eth:ethertype:ip:tcp,9c:9d:7e:91:92:4b,6c:88:14:eb:a4:5c,4,20,52,0x000050d9,...,6,35.174.110.81,192.168.31.236,,,,443,43722,0,0x00000010
2291,"Jun 13, 2023 18:59:29.848744000 CEST",47.828883000,66,eth:ethertype:ip:tcp,9c:9d:7e:91:92:4b,6c:88:14:eb:a4:5c,4,20,52,0x000050da,...,6,35.174.110.81,192.168.31.236,,,,443,43722,0,0x00000011


# EVE JSON Output by Suricata

In [14]:
# Pandas provides a useful method – json_normalize – for normalizing nested JSON fields into dataframe. Resulting columns use dot notation to signify nested objects, similar to how Elasticsearch does it

with open ("../data/eve.json") as packets:
    df = pd.json_normalize(
        [json.loads(packet) for packet in packets]
    )

In [15]:
# format EVE json en sortie de Suricata :

# {
#---- common structure :
# "timestamp":"2009-11-24T21:27:09.534255",
# "event_type":"TYPE",
# ...tuple... ,
# "TYPE":{ ... type specific content ... }

#---- when processing a pcap file : 
# "pcap_cnt" : 123,

# }

#---- EVENT types :
# Alert
# "alert": {
#   "action": "allowed",
#   "gid": 1,
#   "signature_id": 1,
#   "rev": 1,
#   "app_proto": "http",
#   "signature": "HTTP body talking about corruption",
#   "severity": 3,
#   "source": {
#     "ip": "192.168.43.32",
#     "port": 36292
#   },
#   "target": {
#     "ip": "179.60.192.3",
#     "port": 80
#   },


# Anomaly

#     "type": Either "decode", "stream" or "applayer". In rare cases, type will be "unknown". When this occurs, an additional field named "code" will be present. Events with type "applayer" are detected by the application layer parsers.
#     "event" The name of the anomalous event. Events of type "decode" are prefixed with "decoder"; events of type "stream" are prefixed with "stream".
#     "code" If "type" is "unknown", than "code" contains the unrecognized event code. Otherwise, this field is not present.

# The following field is included when "type" has the value "applayer":

#     "layer" Indicates the handling layer that detected the event. This will be "proto_parser" (protocol parser), "proto_detect" (protocol detection) or "parser."

# EVENT TYPE FLOW

# 16.1.2.12.1. Fields

#     "pkts_toserver": total number of packets to server, include bypassed packets
#     "pkts_toclient": total number of packets to client
#     "bytes_toserver": total bytes count to server
#     "bytes_toclient": total bytes count to client
#     "bypassed.pkts_toserver": number of bypassed packets to server
#     "bypassed.pkts_toclient": number of bypassed packets to client
#     "bypassed.bytes_toserver": bypassed bytes count to server
#     "bypassed.bytes_toclient": bypassed bytes count to client
#     "start": date of start of the flow
#     "end": date of end of flow (last seen packet)
#     "age": duration of the flow
#     "bypass": if the flow has been bypassed, it is set to "local" (internal bypass) or "capture"
#     "state": display state of the flow (include "new", "established", "closed", "bypassed")
#     "reason": mechanism that did trigger the end of the flow (include "timeout", "forced" and "shutdown")
#     "alerted": "true" or "false" depending if an alert has been seen on flow


# EVENT TYPE HTTP

# EVENT TYPE DNS

In [16]:
# https://www.stamus-networks.com/blog/jupyter-playbooks-for-suricata-part-1

# https://malware-traffic-analysis.net/

In [17]:
!ls -al   # ! to launch a shell command

# % to invoke built-in functions

total 204
drwxr-xr-x 2 benjamin benjamin   4096 Jun 18 17:09 .
drwxr-xr-x 7 benjamin benjamin   4096 Jun 21 16:50 ..
-rw-r--r-- 1 benjamin benjamin  49475 Jun 18 15:57 kdd_toy.ipynb
-rw-r--r-- 1 benjamin benjamin  19577 Jun 21 17:09 SandBox.ipynb
-rw-r--r-- 1 benjamin benjamin 126823 Jun 18 16:15 toy_scapy.ipynb


In [18]:
with open ("../data/eve.json") as f:
    f.readline()  # reads a single line from the file (here, one packet)
    for i, line in enumerate(f):
        eve = json.loads(line)  # deserialize a string s into an object
        if i%100==0:
            print(json.dumps(eve, indent=2))   # dumps : serializes an object into a string
            print(f"------------------------------------------------------\n")

{
  "timestamp": "2023-06-17T10:46:07.301751+0200",
  "flow_id": 913732596112055,
  "pcap_cnt": 51,
  "event_type": "alert",
  "src_ip": "2a01:cb19:872e:3000:0e4f:3187:540c:d66c",
  "src_port": 54494,
  "dest_ip": "2a04:4e42:006a:0000:0000:0000:0000:0760",
  "dest_port": 443,
  "proto": "TCP",
  "community_id": "1:oOD614dpphn//UZsI8zItjDl5i4=",
  "alert": {
    "action": "allowed",
    "gid": 1,
    "signature_id": 2200077,
    "rev": 2,
    "signature": "SURICATA TCPv6 invalid checksum",
    "category": "Generic Protocol Command Decode",
    "severity": 3
  },
  "flow": {
    "pkts_toserver": 1,
    "pkts_toclient": 0,
    "bytes_toserver": 86,
    "bytes_toclient": 0,
    "start": "2023-06-17T10:46:07.301751+0200"
  }
}
------------------------------------------------------

{
  "timestamp": "2023-06-17T10:46:05.765754+0200",
  "flow_id": 146718008913722,
  "pcap_cnt": 3,
  "event_type": "alert",
  "src_ip": "2a01:cb19:872e:3000:0e4f:3187:540c:d66c",
  "src_port": 36106,
  "dest_ip":

In [19]:
# Pandas provides a useful method – json_normalize – for normalizing nested JSON fields into dataframe. Resulting columns use dot notation to signify nested objects, similar to how Elasticsearch does it

with open ("../data/eve.json") as packets:
    df = pd.json_normalize(
        [json.loads(packet) for packet in packets]
    )

In [20]:
df

Unnamed: 0,timestamp,flow_id,pcap_cnt,event_type,src_ip,src_port,dest_ip,dest_port,proto,community_id,...,stats.app_layer.tx.rdp,stats.app_layer.tx.dcerpc_udp,stats.app_layer.tx.dns_udp,stats.app_layer.tx.nfs_udp,stats.app_layer.tx.krb5_udp,stats.app_layer.expectations,stats.http.memuse,stats.http.memcap,stats.ftp.memuse,stats.ftp.memcap
0,2023-06-17T10:46:05.765756+0200,2.007446e+15,4.0,alert,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,36120.0,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,TCP,1:HhORRMa8pU37MFiMESZo7eeh7K0=,...,,,,,,,,,,
1,2023-06-17T10:46:07.301751+0200,9.137326e+14,51.0,alert,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,54494.0,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,TCP,1:oOD614dpphn//UZsI8zItjDl5i4=,...,,,,,,,,,,
2,2023-06-17T10:46:09.089755+0200,1.315192e+15,55.0,alert,192.168.1.10,57578.0,192.229.221.95,80.0,TCP,1:nAfGnlZMYrDt5CdOeI1UDx4XW6k=,...,,,,,,,,,,
3,2023-06-17T10:46:11.905771+0200,6.695723e+14,71.0,alert,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,36124.0,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,TCP,1:qS6b8DUCNL2QP3gHebXsXvfYWtM=,...,,,,,,,,,,
4,2023-06-17T10:46:14.465756+0200,9.175379e+14,78.0,alert,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,36152.0,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,TCP,1:X+PE30OIsulrPxRnEn/EoDaS3Zs=,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
711,2023-06-17T10:46:05.765744+0200,8.309170e+14,,flow,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,36184.0,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,TCP,1:8Ln1Mp0CeUsMen6xJ13diukJLWQ=,...,,,,,,,,,,
712,2023-06-17T10:46:05.765744+0200,2.680272e+14,,flow,fe80:0000:0000:0000:5efa:25ff:fe41:fc90,,ff02:0000:0000:0000:0000:0001:ff0c:d66c,,IPv6-ICMP,1:3WV+iOybOLpMJwWxLpxaISaKQ+U=,...,,,,,,,,,,
713,2023-06-17T10:46:05.765744+0200,2.753845e+14,,flow,192.168.1.10,43644.0,192.168.1.1,53.0,UDP,1:6Nd9Q5wFvHw7lOxhPDKWCUQ4zds=,...,,,,,,,,,,
714,2023-06-17T10:46:05.765744+0200,1.264715e+15,,flow,fe80:0000:0000:0000:5efa:25ff:fe41:fc90,,ff02:0000:0000:0000:0000:0001:ff8e:ee30,,IPv6-ICMP,1:IPRF5HxIYTVaPKmLajo1TmQLrR8=,...,,,,,,,,,,


In [21]:
df.shape

(716, 324)