# Imports

In [1]:
import numpy as np
import matplotlib.pyplot as plt

import json # original json library

import pandas as pd
import logging

## Log set-up

In [2]:
LOG_FILENAME = '/home/benjamin/Folders_Python/Cyber/logs/logfile.log'
LOG_FORMAT = '%(asctime)% -- %(name)s -- %(levelname)s -- %(message)s'
# LOG_LEVEL = logging.INFO

In [3]:
# specific logger for the module
logger = logging.getLogger(__name__)   # creates specific logger for the module
logger.setLevel(logging.DEBUG)    # entry level of messages from all handlers
LOG_FORMAT = '%(asctime)s -- %(name)s -- %(levelname)s -- %(message)s'
formatter = logging.Formatter(LOG_FORMAT)

# file handler to log everything
file_handler = logging.FileHandler(LOG_FILENAME, mode='w')
file_handler.setLevel(logging.INFO)  # all messages (DEBUG and up) get logged in the file
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)

# stream handler to show messages to the console
console = logging.StreamHandler()
console.setLevel(logging.WARNING)  # Warning messages and up get displayed to the console
console.setFormatter(formatter)
logger.addHandler(console)

# Import pcap file

In [4]:
# NB : tshark -r <file>.pcap -T json > <file_pcap>.json -t r
# commande shell qui prend un pcap et le passe en json

!rm /home/benjamin/Folders_Python/Cyber/data/outputs/exemple_pcap.json
!tshark -r /home/benjamin/Folders_Python/Cyber/data/input_pcaps/input.pcap -T json -t r > /home/benjamin/Folders_Python/Cyber/data/outputs/exemple_pcap.json

logger.info("run tshark from input.pcap to creat json")

In [5]:
class Packet():
    """Utility self-made unperfect class to parse the json object and extract features from a packet-like dict
    """
    
    def __init__(self, raw_packet:dict) -> None:
        self.raw_packet = raw_packet
        self._packet_data = None
        logger.debug('constructor of Packet instance has finished')
        
    @property
    def packet_data(self):
        # returns the full dictionnary of features
        if self._packet_data is not None:
            return self._packet_data
        else:
            sl = self.raw_packet.get('_source').get('layers')
            slf = sl.get('frame')
            sle = sl.get('eth')
            sli = sl.get('ip', {})  # return empty dict as default not found value so it can handle another get method
            slu = sl.get('udp', {})
            slt = sl.get('tcp', {})
                                               
            self._packet_data = {
                'frame_time' : slf.get('frame.time'),
                'frame_time_relative' : slf.get('frame.time_relative'),
                'frame_length' : slf.get("frame.len"),
                'frame_protocols' : slf.get("frame.protocols"),
                'eth_source': sle.get("eth.src"),
                'eth_dest': sle.get("eth.dst") ,
                'ip_version': sli.get("ip.version"),
                'ip_header_length': sli.get("ip.hdr_len"),
                'ip_length': sli.get("ip.len"),
                'ip_id': sli.get("ip.id"),
                'ip_flags': sli.get("ip.flags"),
                'ip_ttl': sli.get("ip.ttl"),
                'ip_proto': sli.get("ip.proto"),
                'ip_source': sli.get("ip.src"),
                'ip_dest': sli.get("ip.dst"),
                'udp_source_port': slu.get("udp.srcport"),
                'udp_dest_port': slu.get("udp.port"),
                'udp_length': slu.get("udp.length"),
                'tcp_source_port': slt.get("tcp.srcport"),
                'tcp_dest_port': slt.get("tcp.dstport"),
                'tcp_length': slt.get("tcp.len"),
                'tcp_flags': slt.get("tcp.flags"),
            }
            logger.debug('packet_data @property method has finished')
            return self._packet_data
        
    @packet_data.setter
    def packet_data(self, input):
        """illegal attempt to write packet_data"""
        logger.warning('Illegal attempt to write a data_packet in a packet object')
        pass

In [6]:
PCAP_FILENAME = "/home/benjamin/Folders_Python/Cyber/data/outputs/exemple_pcap.json"

with open (PCAP_FILENAME) as raw_packets:
    json_object = json.load(raw_packets)    # load le fichier json dans une structure Python (list of dicts)

In [7]:
#Exemple : premier dict de la liste : c'est un paquet (=une frame Ethernet)

json_object[0]

{'_index': 'packets-2023-06-17',
 '_type': 'doc',
 '_score': None,
 '_source': {'layers': {'frame': {'frame.encap_type': '1',
    'frame.time': 'Jun 17, 2023 10:46:05.765744000 CEST',
    'frame.offset_shift': '0.000000000',
    'frame.time_epoch': '1686991565.765744000',
    'frame.time_delta': '0.000000000',
    'frame.time_delta_displayed': '0.000000000',
    'frame.time_relative': '0.000000000',
    'frame.number': '1',
    'frame.len': '86',
    'frame.cap_len': '86',
    'frame.marked': '0',
    'frame.ignored': '0',
    'frame.protocols': 'eth:ethertype:ipv6:tcp'},
   'eth': {'eth.dst': '5c:fa:25:41:fc:90',
    'eth.dst_tree': {'eth.dst_resolved': '5c:fa:25:41:fc:90',
     'eth.dst.oui': '6093349',
     'eth.addr': '5c:fa:25:41:fc:90',
     'eth.addr_resolved': '5c:fa:25:41:fc:90',
     'eth.addr.oui': '6093349',
     'eth.dst.lg': '0',
     'eth.lg': '0',
     'eth.dst.ig': '0',
     'eth.ig': '0'},
    'eth.src': 'a4:5d:36:5a:fe:7c',
    'eth.src_tree': {'eth.src_resolved': 'H

In [8]:
# exemple d'instanciation d'un objet Packet
p = Packet(json_object[0])

p.packet_data

{'frame_time': 'Jun 17, 2023 10:46:05.765744000 CEST',
 'frame_time_relative': '0.000000000',
 'frame_length': '86',
 'frame_protocols': 'eth:ethertype:ipv6:tcp',
 'eth_source': 'a4:5d:36:5a:fe:7c',
 'eth_dest': '5c:fa:25:41:fc:90',
 'ip_version': None,
 'ip_header_length': None,
 'ip_length': None,
 'ip_id': None,
 'ip_flags': None,
 'ip_ttl': None,
 'ip_proto': None,
 'ip_source': None,
 'ip_dest': None,
 'udp_source_port': None,
 'udp_dest_port': None,
 'udp_length': None,
 'tcp_source_port': '36100',
 'tcp_dest_port': '443',
 'tcp_length': '0',
 'tcp_flags': '0x00000010'}

# Produce DataFrame for Raw Packets analysis

In [9]:
# créé la liste de dictionnaires des data des objets Packets
packets = [ Packet(d).packet_data for d in json_object ]

In [10]:
df_packets = pd.DataFrame(packets)

In [11]:
df_packets.describe(include='all').transpose()

Unnamed: 0,count,unique,top,freq
frame_time,939,904,"Jun 17, 2023 10:47:05.680639000 CEST",5
frame_time_relative,939,904,59.914895000,5
frame_length,939,71,86,500
frame_protocols,939,15,eth:ethertype:ipv6:tcp,668
eth_source,939,6,5c:fa:25:41:fc:90,472
eth_dest,939,12,5c:fa:25:41:fc:90,455
ip_version,59,1,4,59
ip_header_length,59,2,20,50
ip_length,59,15,52,29
ip_id,59,49,0x00000000,11


# EVE JSON Output by Suricata

In [12]:
# run Suricata to produce an eve.json file with alerts

!rm /home/benjamin/Folders_Python/Cyber/data/outputs/eve.json
!suricata -r /home/benjamin/Folders_Python/Cyber/data/input_pcaps/input.pcap -l /home/benjamin/Folders_Python/Cyber/data/outputs

logger.info("run Suricata to reassemble flows and create alert logs")

[32m22/6/2023 -- 15:10:26[0m - <[33mInfo[0m> - Configuration node 'af-packet' redefined.[0m
[32m22/6/2023 -- 15:10:26[0m - <[1;33mNotice[0m> - [33mThis is Suricata version 6.0.1 RELEASE running in USER mode[0m
[32m22/6/2023 -- 15:11:33[0m - <[1;33mNotice[0m> - [33mall 5 packet processing threads, 4 management threads initialized, engine started.[0m
[32m22/6/2023 -- 15:11:33[0m - <[1;33mNotice[0m> - [33mSignal Received.  Stopping engine.[0m
[32m22/6/2023 -- 15:11:33[0m - <[1;33mNotice[0m> - [33mPcap-file module read 1 files, 939 packets, 1252107 bytes[0m


In [13]:
# Pandas provides a useful method – json_normalize – for normalizing nested JSON fields into dataframe. Resulting columns use dot notation to signify nested objects, similar to how Elasticsearch does it

SURICATA_EVE_LOG = "/home/benjamin/Folders_Python/Cyber/data/outputs/eve.json"

with open (SURICATA_EVE_LOG) as packets:
    df = pd.json_normalize(
        [json.loads(packet) for packet in packets],
        max_level=1
    )

In [14]:
df

Unnamed: 0,timestamp,flow_id,pcap_cnt,event_type,src_ip,src_port,dest_ip,dest_port,proto,community_id,...,stats.uptime,stats.decoder,stats.flow,stats.defrag,stats.flow_bypassed,stats.tcp,stats.detect,stats.app_layer,stats.http,stats.ftp
0,2023-06-17T10:46:05.765752+0200,2.081350e+15,2.0,alert,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,36104.0,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,TCP,1:wKigxIZskc7GM48zFtMEXRK7VpU=,...,,,,,,,,,,
1,2023-06-17T10:46:06.277747+0200,3.959185e+14,12.0,alert,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,54490.0,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,TCP,1:PJrhFNGVjSn+J2xjS7lzgaAbS0s=,...,,,,,,,,,,
2,2023-06-17T10:46:06.333430+0200,3.959185e+14,15.0,alert,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,54490.0,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,TCP,1:PJrhFNGVjSn+J2xjS7lzgaAbS0s=,...,,,,,,,,,,
3,2023-06-17T10:46:06.345251+0200,3.959185e+14,17.0,alert,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,54490.0,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,TCP,1:PJrhFNGVjSn+J2xjS7lzgaAbS0s=,...,,,,,,,,,,
4,2023-06-17T10:46:06.345317+0200,3.959185e+14,18.0,alert,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,54490.0,TCP,1:PJrhFNGVjSn+J2xjS7lzgaAbS0s=,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1427,2023-06-17T10:46:05.765744+0200,1.387364e+15,,flow,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,36130.0,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,TCP,1:YpqUSzY2MPuTwpcgiRsSLK1RoUU=,...,,,,,,,,,,
1428,2023-06-17T10:46:05.765744+0200,2.617759e+14,,flow,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,51018.0,2a00:1450:4007:0819:0000:0000:0000:2003,443.0,TCP,1:emfKP8YPiJc5gn/X2n73NZ3vSQs=,...,,,,,,,,,,
1429,2023-06-17T10:46:05.765744+0200,8.312606e+14,,flow,192.168.1.10,57578.0,192.229.221.95,80.0,TCP,1:nAfGnlZMYrDt5CdOeI1UDx4XW6k=,...,,,,,,,,,,
1430,2023-06-17T10:46:05.765744+0200,1.539831e+15,,flow,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,50648.0,2a03:2880:f27b:02cc:face:b00c:0000:0167,443.0,TCP,1:6p4KEIGLfcaWMFbvx5WibHpIo2c=,...,,,,,,,,,,


In [15]:
df[df['event_type']=='flow']

Unnamed: 0,timestamp,flow_id,pcap_cnt,event_type,src_ip,src_port,dest_ip,dest_port,proto,community_id,...,stats.uptime,stats.decoder,stats.flow,stats.defrag,stats.flow_bypassed,stats.tcp,stats.detect,stats.app_layer,stats.http,stats.ftp
672,2023-06-17T10:46:05.765744+0200,2.114889e+15,,flow,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,36106.0,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,TCP,1:V6WvwDuyWk1zZ9zNwBTxWqs1YLM=,...,,,,,,,,,,
673,2023-06-17T10:46:05.765744+0200,8.510948e+14,,flow,192.168.1.10,57302.0,104.18.26.218,443.0,TCP,1:jq5x8Qwxg6TQo5uUDqOE5Ce7hQI=,...,,,,,,,,,,
674,2023-06-17T10:46:05.765744+0200,2.118340e+15,,flow,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,,fe80:0000:0000:0000:5efa:25ff:fe41:fc90,,IPv6-ICMP,1:nuR//V8bRzubzwTItWB22gL5jmk=,...,,,,,,,,,,
675,2023-06-17T10:46:05.765744+0200,1.082524e+13,,flow,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,47864.0,2a00:1450:4007:081a:0000:0000:0000:2003,80.0,TCP,1:uRhWV544zvWeIohZCmryZHXZ5EA=,...,,,,,,,,,,
676,2023-06-17T10:46:05.765744+0200,1.988338e+15,,flow,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,51918.0,2a00:1450:4007:0810:0000:0000:0000:200a,443.0,UDP,1:NUJlT/WhsiSsBYovvqiRwMcIytk=,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1426,2023-06-17T10:46:05.765744+0200,1.206562e+14,,flow,fe80:0000:0000:0000:a65d:36ff:fe5a:fe7c,,ff02:0000:0000:0000:0000:0000:0000:0016,,IPv6-ICMP,1:3ub42ko6I1PqiK9KYQPcORsX2h4=,...,,,,,,,,,,
1427,2023-06-17T10:46:05.765744+0200,1.387364e+15,,flow,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,36130.0,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,TCP,1:YpqUSzY2MPuTwpcgiRsSLK1RoUU=,...,,,,,,,,,,
1428,2023-06-17T10:46:05.765744+0200,2.617759e+14,,flow,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,51018.0,2a00:1450:4007:0819:0000:0000:0000:2003,443.0,TCP,1:emfKP8YPiJc5gn/X2n73NZ3vSQs=,...,,,,,,,,,,
1429,2023-06-17T10:46:05.765744+0200,8.312606e+14,,flow,192.168.1.10,57578.0,192.229.221.95,80.0,TCP,1:nAfGnlZMYrDt5CdOeI1UDx4XW6k=,...,,,,,,,,,,


## Produce DatFrame for Flow Analysis

In [69]:
# from Suricata doc :

# 15.1.2.12. Event type: Flow
# 15.1.2.12.1. Fields

#     “pkts_toserver”: total number of packets to server, include bypassed packets
#     “pkts_toclient”: total number of packets to client
#     “bytes_toserver”: total bytes count to server
#     “bytes_toclient”: total bytes count to client
#     “bypassed.pkts_toserver”: number of bypassed packets to server
#     “bypassed.pkts_toclient”: number of bypassed packets to client
#     “bypassed.bytes_toserver”: bypassed bytes count to server
#     “bypassed.bytes_toclient”: bypassed bytes count to client
#     “start”: date of start of the flow
#     “end”: date of end of flow (last seen packet)
#     “age”: duration of the flow
#     “bypass”: if the flow has been bypassed, it is set to “local” (internal bypass) or “capture”
#     “state”: display state of the flow (include “new”, “established”, “closed”, “bypassed”)
#     “reason”: mechanism that did trigger the end of the flow (include “timeout”, “forced” and “shutdown”)
#     “alerted”: “true” or “false” depending if an alert has been seen on flow

In [70]:
# https://www.stamus-networks.com/blog/jupyter-playbooks-for-suricata-part-1

# https://malware-traffic-analysis.net/

In [71]:
class Flow():
    """Utility class - takes a event-flow string out of eve.json,
       creates a one-level dict structure, suitable for dataframe creation
    """
    def __init__(self, flow_event:dict):
        if flow_event.get('event_type') != 'flow':
            logger.critical("Attempt to build a Flow instance with a non-flow event")
            raise ValueError
        self._raw_flow_event = flow_event
        self._features = None
        
    @property
    def features(self):
        if self._features is not None:
            return self._features
        else:
            keys_list_first_level = [
                'timestamp',
                'flow_id',
                'src_ip',
                'src_port',
                'dest_ip',
                'dest_port',
                'proto'
            ]
            keys_list_second_level = [
                'pkts_toserver',
                'pkts_toclient',
                'bytes_toserver',
                'bytes_toclient',
                'start',
                'end',
                'age',
                'state',
                'reason',
                'alerted'
            ]
            d1 = { k: self._raw_flow_event.get(k) for k in keys_list_first_level }
            d2 = { k: self._raw_flow_event.get('flow').get(k) for k in keys_list_second_level }
            self._features = { **d1, **d2 }
            logger.info("built a Flow features object")
            return self._features
        
    @features.setter
    def features(self, input):
        logger.critical("illegal attempt to hard write features in a Flow object")
        
    def __str__(self) -> str:
        return json.dumps(self.features, indent=4)
    
    def __repr__(self) -> str:
        return json.dumps(self.features, indent=4)


In [72]:
# exemple de flow JSON = 
# {
# "timestamp":"2023-06-17T10:46:05.765744+0200",
# "flow_id":860724109937755,
# "event_type":"flow",
# "src_ip":"2a01:cb19:872e:3000:0e4f:3187:540c:d66c",
# "src_port":47864,
# "dest_ip":"2a00:1450:4007:081a:0000:0000:0000:2003",
# "dest_port":80,
# "proto":"TCP",
# "flow":
#     {"pkts_toserver":6,
#     "pkts_toclient":5,
#     "bytes_toserver":516,
#     "bytes_toclient":430,
#     "start":"2023-06-17T10:46:10.625755+0200",
#     "end":"2023-06-17T10:46:44.150502+0200",
#     "age":34,
#     "state":"new",
#     "reason":"shutdown",
#     "alerted":true},
# "community_id":"1:uRhWV544zvWeIohZCmryZHXZ5EA=",
# "tcp":
#     {"tcp_flags":"00",
#     "tcp_flags_ts":"00",
#     "tcp_flags_tc":"00"
#     }
# }'

In [73]:
SURICATA_EVE_LOG = "/home/benjamin/Folders_Python/Cyber/data/outputs/eve.json"

i=0
columns_names =  [
                'timestamp',
                'flow_id',
                'src_ip',
                'src_port',
                'dest_ip',
                'dest_port',
                'proto'
            ] + [
                'pkts_toserver',
                'pkts_toclient',
                'bytes_toserver',
                'bytes_toclient',
                'start',
                'end',
                'age',
                'state',
                'reason',
                'alerted'
            ]
dict_for_dataframe = { k:[] for k in columns_names }

with open (SURICATA_EVE_LOG) as f:
    for event_string in f:
        python_object = json.loads(event_string)
        if python_object.get('event_type')=='flow':
            flow = Flow(python_object)
            for k in columns_names:
                if dict_for_dataframe[k] == []:
                    dict_for_dataframe[k] = [flow.features.get(k)]
                else:
                    dict_for_dataframe[k].append(flow.features.get(k))

In [74]:
df_flow = pd.DataFrame(data=dict_for_dataframe)


In [75]:
df_flow

Unnamed: 0,timestamp,flow_id,src_ip,src_port,dest_ip,dest_port,proto,pkts_toserver,pkts_toclient,bytes_toserver,bytes_toclient,start,end,age,state,reason,alerted
0,2023-06-17T10:46:05.765744+0200,2114888919789370,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,36106.0,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,TCP,9,9,781,805,2023-06-17T10:46:05.765754+0200,2023-06-17T10:46:50.158510+0200,45,new,shutdown,True
1,2023-06-17T10:46:05.765744+0200,851094794666390,192.168.1.10,57302.0,104.18.26.218,443.0,TCP,4,3,327,198,2023-06-17T10:46:32.131478+0200,2023-06-17T10:46:32.147962+0200,0,new,shutdown,True
2,2023-06-17T10:46:05.765744+0200,2118339927025636,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,,fe80:0000:0000:0000:5efa:25ff:fe41:fc90,,IPv6-ICMP,2,0,172,0,2023-06-17T10:46:21.600036+0200,2023-06-17T10:46:44.149057+0200,23,new,shutdown,False
3,2023-06-17T10:46:05.765744+0200,10825243987035,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,47864.0,2a00:1450:4007:081a:0000:0000:0000:2003,80.0,TCP,6,5,516,430,2023-06-17T10:46:10.625755+0200,2023-06-17T10:46:44.150502+0200,34,new,shutdown,True
4,2023-06-17T10:46:05.765744+0200,1988337709250378,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,51918.0,2a00:1450:4007:0810:0000:0000:0000:200a,443.0,UDP,14,14,3058,1473,2023-06-17T10:46:18.882506+0200,2023-06-17T10:47:04.090961+0200,46,established,shutdown,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81,2023-06-17T10:46:05.765744+0200,120656150391598,fe80:0000:0000:0000:a65d:36ff:fe5a:fe7c,,ff02:0000:0000:0000:0000:0000:0000:0016,,IPv6-ICMP,2,0,260,0,2023-06-17T10:46:51.125742+0200,2023-06-17T10:46:51.777762+0200,0,new,shutdown,False
82,2023-06-17T10:46:05.765744+0200,1387364409922097,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,36130.0,2a04:4e42:006a:0000:0000:0000:0000:0760,443.0,TCP,9,9,781,805,2023-06-17T10:46:11.905777+0200,2023-06-17T10:46:56.158234+0200,45,new,shutdown,True
83,2023-06-17T10:46:05.765744+0200,261775891017438,2a01:cb19:872e:3000:0e4f:3187:540c:d66c,51018.0,2a00:1450:4007:0819:0000:0000:0000:2003,443.0,TCP,2,1,211,125,2023-06-17T10:46:54.110302+0200,2023-06-17T10:46:54.123891+0200,0,new,shutdown,True
84,2023-06-17T10:46:05.765744+0200,831260634209947,192.168.1.10,57578.0,192.229.221.95,80.0,TCP,7,6,462,396,2023-06-17T10:46:09.089755+0200,2023-06-17T10:46:53.146263+0200,44,new,shutdown,True
