# Imports

In [14]:
import numpy as np
import matplotlib.pyplot as plt

import json # original json library

import pandas as pd
import logging

## Log set-up

In [15]:
LOG_FILENAME = '/home/benjamin/Folders_Python/Cyber/logs/logfile.log'
LOG_FORMAT = '%(asctime)% -- %(name)s -- %(levelname)s -- %(message)s'
# LOG_LEVEL = logging.INFO

In [16]:
# specific logger for the module
logger = logging.getLogger(__name__)   # creates specific logger for the module
logger.setLevel(logging.DEBUG)    # entry level of messages from all handlers
LOG_FORMAT = '%(asctime)s -- %(name)s -- %(levelname)s -- %(message)s'
formatter = logging.Formatter(LOG_FORMAT)

# file handler to log everything
file_handler = logging.FileHandler(LOG_FILENAME, mode='w')
file_handler.setLevel(logging.INFO)  # all messages (DEBUG and up) get logged in the file
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)

# stream handler to show messages to the console
console = logging.StreamHandler()
console.setLevel(logging.WARNING)  # Warning messages and up get displayed to the console
console.setFormatter(formatter)
logger.addHandler(console)

# Import pcap file

In [17]:
# NB : tshark -r <file>.pcap -T json > <file_pcap>.json -t r
# commande shell qui prend un pcap et le passe en json

!tshark -r /home/benjamin/Folders_Python/Cyber/data/input_pcaps/input.pcap -T json -t r > /home/benjamin/Folders_Python/Cyber/data/outputs/exemple_pcap.json

logger.info("run tshark from input.pcap to creat json")

In [18]:
class Packet():
    """Utility self-made unperfect class to parse the json object and extract features from a packet-like dict
    """
    
    def __init__(self, raw_packet:dict) -> None:
        self.raw_packet = raw_packet
        self._packet_data = None
        logger.debug('constructor of Packet instance has finished')
        
    @property
    def packet_data(self):
        # returns the full dictionnary of features
        if self._packet_data is not None:
            return self._packet_data
        else:
            sl = self.raw_packet.get('_source').get('layers')
            slf = sl.get('frame')
            sle = sl.get('eth')
            sli = sl.get('ip', {})  # return empty dict as default not found value so it can handle another get method
            slu = sl.get('udp', {})
            slt = sl.get('tcp', {})
                                               
            self._packet_data = {
                'frame_time' : slf.get('frame.time'),
                'frame_time_relative' : slf.get('frame.time_relative'),
                'frame_length' : slf.get("frame.len"),
                'frame_protocols' : slf.get("frame.protocols"),
                'eth_source': sle.get("eth.src"),
                'eth_dest': sle.get("eth.dst") ,
                'ip_version': sli.get("ip.version"),
                'ip_header_length': sli.get("ip.hdr_len"),
                'ip_length': sli.get("ip.len"),
                'ip_id': sli.get("ip.id"),
                'ip_flags': sli.get("ip.flags"),
                'ip_ttl': sli.get("ip.ttl"),
                'ip_proto': sli.get("ip.proto"),
                'ip_source': sli.get("ip.src"),
                'ip_dest': sli.get("ip.dst"),
                'udp_source_port': slu.get("udp.srcport"),
                'udp_dest_port': slu.get("udp.port"),
                'udp_length': slu.get("udp.length"),
                'tcp_source_port': slt.get("tcp.srcport"),
                'tcp_dest_port': slt.get("tcp.dstport"),
                'tcp_length': slt.get("tcp.len"),
                'tcp_flags': slt.get("tcp.flags"),
            }
            logger.debug('packet_data @property method has finished')
            return self._packet_data
        
    @packet_data.setter
    def packet_data(self, input):
        """illegal attempt to write packet_data"""
        logger.warning('Illegal attempt to write a data_packet in a packet object')
        pass

In [19]:
PCAP_FILENAME = "/home/benjamin/Folders_Python/Cyber/data/outputs/exemple_pcap.json"

with open (PCAP_FILENAME) as raw_packets:
    json_object = json.load(raw_packets)    # load le fichier json dans une structure Python (list of dicts)

In [20]:
#Exemple : premier dict de la liste : c'est un paquet (=une frame Ethernet)

json_object[0]

{'_index': 'packets-2023-06-17',
 '_type': 'doc',
 '_score': None,
 '_source': {'layers': {'frame': {'frame.encap_type': '1',
    'frame.time': 'Jun 17, 2023 10:46:05.765744000 CEST',
    'frame.offset_shift': '0.000000000',
    'frame.time_epoch': '1686991565.765744000',
    'frame.time_delta': '0.000000000',
    'frame.time_delta_displayed': '0.000000000',
    'frame.time_relative': '0.000000000',
    'frame.number': '1',
    'frame.len': '86',
    'frame.cap_len': '86',
    'frame.marked': '0',
    'frame.ignored': '0',
    'frame.protocols': 'eth:ethertype:ipv6:tcp'},
   'eth': {'eth.dst': '5c:fa:25:41:fc:90',
    'eth.dst_tree': {'eth.dst_resolved': '5c:fa:25:41:fc:90',
     'eth.dst.oui': '6093349',
     'eth.addr': '5c:fa:25:41:fc:90',
     'eth.addr_resolved': '5c:fa:25:41:fc:90',
     'eth.addr.oui': '6093349',
     'eth.dst.lg': '0',
     'eth.lg': '0',
     'eth.dst.ig': '0',
     'eth.ig': '0'},
    'eth.src': 'a4:5d:36:5a:fe:7c',
    'eth.src_tree': {'eth.src_resolved': 'H

In [21]:
# exemple d'instanciation d'un objet Packet
p = Packet(json_object[0])

p.packet_data

{'frame_time': 'Jun 17, 2023 10:46:05.765744000 CEST',
 'frame_time_relative': '0.000000000',
 'frame_length': '86',
 'frame_protocols': 'eth:ethertype:ipv6:tcp',
 'eth_source': 'a4:5d:36:5a:fe:7c',
 'eth_dest': '5c:fa:25:41:fc:90',
 'ip_version': None,
 'ip_header_length': None,
 'ip_length': None,
 'ip_id': None,
 'ip_flags': None,
 'ip_ttl': None,
 'ip_proto': None,
 'ip_source': None,
 'ip_dest': None,
 'udp_source_port': None,
 'udp_dest_port': None,
 'udp_length': None,
 'tcp_source_port': '36100',
 'tcp_dest_port': '443',
 'tcp_length': '0',
 'tcp_flags': '0x00000010'}

# Produce DataFrame for Raw Packets analysis

In [22]:
# créé la liste de dictionnaires des data des objets Packets
packets = [ Packet(d).packet_data for d in json_object ]

In [23]:
df_packets = pd.DataFrame(packets)

In [24]:
df_packets.describe(include='all').transpose()

Unnamed: 0,count,unique,top,freq
frame_time,939,904,"Jun 17, 2023 10:47:05.680639000 CEST",5
frame_time_relative,939,904,59.914895000,5
frame_length,939,71,86,500
frame_protocols,939,15,eth:ethertype:ipv6:tcp,668
eth_source,939,6,5c:fa:25:41:fc:90,472
eth_dest,939,12,5c:fa:25:41:fc:90,455
ip_version,59,1,4,59
ip_header_length,59,2,20,50
ip_length,59,15,52,29
ip_id,59,49,0x00000000,11


# EVE JSON Output by Suricata

In [25]:
# run Suricata to produce an eve.json file with alerts

!suricata -r /home/benjamin/Folders_Python/Cyber/data/input_pcaps/input.pcap -l /home/benjamin/Folders_Python/Cyber/data/outputs

logger.info("run Suricata to reassemble flows and create alert logs")

[32m21/6/2023 -- 19:00:28[0m - <[33mInfo[0m> - Configuration node 'af-packet' redefined.[0m
[32m21/6/2023 -- 19:00:28[0m - <[1;33mNotice[0m> - [33mThis is Suricata version 6.0.1 RELEASE running in USER mode[0m
[32m21/6/2023 -- 19:01:30[0m - <[1;33mNotice[0m> - [33mall 5 packet processing threads, 4 management threads initialized, engine started.[0m
[32m21/6/2023 -- 19:01:30[0m - <[1;33mNotice[0m> - [33mSignal Received.  Stopping engine.[0m
[32m21/6/2023 -- 19:01:30[0m - <[1;33mNotice[0m> - [33mPcap-file module read 1 files, 939 packets, 1252107 bytes[0m


In [29]:
# Pandas provides a useful method – json_normalize – for normalizing nested JSON fields into dataframe. Resulting columns use dot notation to signify nested objects, similar to how Elasticsearch does it

SURICATA_EVE_LOG = "/home/benjamin/Folders_Python/Cyber/data/outputs/eve.json"

with open (SURICATA_EVE_LOG) as packets:
    df = pd.json_normalize(
        [json.loads(packet) for packet in packets],
        max_level=1
    )

In [30]:
df

Unnamed: 0,timestamp,flow_id,pcap_cnt,event_type,src_ip,src_port,dest_ip,dest_port,proto,community_id,...,stats.uptime,stats.decoder,stats.flow,stats.defrag,stats.flow_bypassed,stats.tcp,stats.detect,stats.app_layer,stats.http,stats.ftp
0,2023-06-17T10:46:05.765752+0200,2.081350e+15,2.0,alert,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,36104.0,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,TCP,1:wKigxIZskc7GM48zFtMEXRK7VpU=,...,,,,,,,,,,
1,2023-06-17T10:46:06.277747+0200,3.959185e+14,12.0,alert,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,54490.0,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,TCP,1:PJrhFNGVjSn+J2xjS7lzgaAbS0s=,...,,,,,,,,,,
2,2023-06-17T10:46:06.333430+0200,3.959185e+14,15.0,alert,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,54490.0,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,TCP,1:PJrhFNGVjSn+J2xjS7lzgaAbS0s=,...,,,,,,,,,,
3,2023-06-17T10:46:06.345251+0200,3.959185e+14,17.0,alert,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,54490.0,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,TCP,1:PJrhFNGVjSn+J2xjS7lzgaAbS0s=,...,,,,,,,,,,
4,2023-06-17T10:46:06.345317+0200,3.959185e+14,18.0,alert,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,54490.0,TCP,1:PJrhFNGVjSn+J2xjS7lzgaAbS0s=,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
711,2023-06-17T10:46:05.765744+0200,8.311576e+14,,flow,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,53142.0,2600:1901:0001:0a98:0000:0000:0000:0000,443.0,TCP,1:KfKgFx7575OpkfiZcjjotVEaf8g=,...,,,,,,,,,,
712,2023-06-17T10:46:05.765744+0200,8.316923e+14,,flow,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,54474.0,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,TCP,1:eSOdfbZqvqeAJWgAmGZLtw2N5Ds=,...,,,,,,,,,,
713,2023-06-17T10:46:05.765744+0200,1.282625e+14,,flow,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,50300.0,2a00:1450:4007:0813:0000:0000:0000:200e,443.0,TCP,1:pRtG1NVWqXkKvuik3pQwKwGYDG8=,...,,,,,,,,,,
714,2023-06-17T10:46:05.765744+0200,4.163195e+14,,flow,192.168.1.10,57578.0,192.229.221.95,80.0,TCP,1:nAfGnlZMYrDt5CdOeI1UDx4XW6k=,...,,,,,,,,,,


In [31]:
df[df['event_type']=='flow']

Unnamed: 0,timestamp,flow_id,pcap_cnt,event_type,src_ip,src_port,dest_ip,dest_port,proto,community_id,...,stats.uptime,stats.decoder,stats.flow,stats.defrag,stats.flow_bypassed,stats.tcp,stats.detect,stats.app_layer,stats.http,stats.ftp
672,2023-06-17T10:46:05.765744+0200,2114889000000000.0,,flow,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,36106.0,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,TCP,1:V6WvwDuyWk1zZ9zNwBTxWqs1YLM=,...,,,,,,,,,,
673,2023-06-17T10:46:05.765744+0200,851094800000000.0,,flow,192.168.1.10,57302.0,104.18.26.218,443.0,TCP,1:jq5x8Qwxg6TQo5uUDqOE5Ce7hQI=,...,,,,,,,,,,
674,2023-06-17T10:46:05.765744+0200,2118340000000000.0,,flow,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,,fe80:0000:0000:0000:5efa:25ff:fe41:fc90,,IPv6-ICMP,1:nuR//V8bRzubzwTItWB22gL5jmk=,...,,,,,,,,,,
675,2023-06-17T10:46:05.765744+0200,10825240000000.0,,flow,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,47864.0,2a00:1450:4007:081a:0000:0000:0000:2003,80.0,TCP,1:uRhWV544zvWeIohZCmryZHXZ5EA=,...,,,,,,,,,,
676,2023-06-17T10:46:05.765744+0200,1988338000000000.0,,flow,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,51918.0,2a00:1450:4007:0810:0000:0000:0000:200a,443.0,UDP,1:NUJlT/WhsiSsBYovvqiRwMcIytk=,...,,,,,,,,,,
677,2023-06-17T10:46:05.765744+0200,1005389000000000.0,,flow,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,36200.0,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,TCP,1:/4VyTG/nf2LqCg8bWrTg+FH/a0A=,...,,,,,,,,,,
678,2023-06-17T10:46:05.765744+0200,162001700000000.0,,flow,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,36130.0,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,TCP,1:YpqUSzY2MPuTwpcgiRsSLK1RoUU=,...,,,,,,,,,,
679,2023-06-17T10:46:05.765744+0200,1852086000000000.0,,flow,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,36124.0,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,TCP,1:qS6b8DUCNL2QP3gHebXsXvfYWtM=,...,,,,,,,,,,
680,2023-06-17T10:46:05.765744+0200,1430847000000000.0,,flow,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,36120.0,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,TCP,1:HhORRMa8pU37MFiMESZo7eeh7K0=,...,,,,,,,,,,
681,2023-06-17T10:46:05.765744+0200,1579642000000000.0,,flow,fe80:0000:0000:0000:a65d:36ff:fe5a:fe7c,,fe80:0000:0000:0000:5efa:25ff:fe41:fc90,,IPv6-ICMP,1:ruh4djloEPuhT1uM2K1odU3ezN4=,...,,,,,,,,,,


In [None]:
# from Suricata doc :

# 15.1.2.12. Event type: Flow
# 15.1.2.12.1. Fields

#     “pkts_toserver”: total number of packets to server, include bypassed packets
#     “pkts_toclient”: total number of packets to client
#     “bytes_toserver”: total bytes count to server
#     “bytes_toclient”: total bytes count to client
#     “bypassed.pkts_toserver”: number of bypassed packets to server
#     “bypassed.pkts_toclient”: number of bypassed packets to client
#     “bypassed.bytes_toserver”: bypassed bytes count to server
#     “bypassed.bytes_toclient”: bypassed bytes count to client
#     “start”: date of start of the flow
#     “end”: date of end of flow (last seen packet)
#     “age”: duration of the flow
#     “bypass”: if the flow has been bypassed, it is set to “local” (internal bypass) or “capture”
#     “state”: display state of the flow (include “new”, “established”, “closed”, “bypassed”)
#     “reason”: mechanism that did trigger the end of the flow (include “timeout”, “forced” and “shutdown”)
#     “alerted”: “true” or “false” depending if an alert has been seen on flow

In [None]:
# https://www.stamus-networks.com/blog/jupyter-playbooks-for-suricata-part-1

# https://malware-traffic-analysis.net/