In [1]:
import numpy as np
import pandas as pd
import pyshark
import matplotlib.pyplot as plt
import json # original json library
import logging

In [2]:
LOG_FILENAME = '/home/benjamin/Folders_Python/Cyber/logs/logfile.log'
LOG_FORMAT = '%(asctime)% -- %(name)s -- %(levelname)s -- %(message)s'
# LOG_LEVEL = logging.INFO

# specific logger for the module
logger = logging.getLogger(__name__)   # creates specific logger for the module
logger.setLevel(logging.DEBUG)    # entry level of messages from all handlers
LOG_FORMAT = '%(asctime)s -- %(name)s -- %(levelname)s -- %(message)s'
formatter = logging.Formatter(LOG_FORMAT)

# file handler to log everything
file_handler = logging.FileHandler(LOG_FILENAME, mode='w')
file_handler.setLevel(logging.INFO)  # all messages (DEBUG and up) get logged in the file
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)

# stream handler to show messages to the console
console = logging.StreamHandler()
console.setLevel(logging.WARNING)  # Warning messages and up get displayed to the console
console.setFormatter(formatter)
logger.addHandler(console)

# start your engine
logger.info("-------- new run --------")

In [3]:
PCAPFILE = '/home/benjamin/Folders_Python/Cyber/data/input_pcaps/test.pcap'

In [4]:
capture = pyshark.FileCapture(
    input_file=PCAPFILE,
    use_ek=True
)

print(capture)

pkt = capture[0]

<FileCapture /home/benjamin/Folders_Python/Cyber/data/input_pcaps/test.pcap>


In [5]:
dir(pkt)

['__bool__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_packet_string',
 'captured_length',
 'eth',
 'frame_info',
 'get_multiple_layers',
 'get_raw_packet',
 'highest_layer',
 'interface_captured',
 'ip',
 'layers',
 'length',
 'number',
 'pretty_print',
 'show',
 'sniff_time',
 'sniff_timestamp',
 'tcp',
 'tls',
 'transport_layer']

In [6]:
list_layers = pkt.layers

print(list_layers)

[<ETH Layer>, <IP Layer>, <TCP Layer>, <TLS Layer>]


In [7]:
for i,layer in enumerate(list_layers):
    fields_names = layer.field_names
    print(f'layer {i} = {fields_names}')
    for field in fields_names:
        print(f"layer {i} -- {field} = {layer.get(field)}")
    print(f'\n')

layer 0 = ['src', 'type', 'ig', 'lg', 'addr', 'dst']
layer 0 -- src = <EkMultiField src: f8:1e:df:e5:84:3a>
layer 0 -- type = 2048
layer 0 -- ig = False
layer 0 -- lg = False
layer 0 -- addr = <EkMultiField addr: f8:1e:df:e5:84:3a>
layer 0 -- dst = <EkMultiField dst: 00:1f:f3:3c:e1:13>


layer 1 = ['src', 'id', 'hdr', 'proto', 'host', 'ttl', 'dsfield', 'version', 'addr', 'dst', 'len', 'frag', 'flags', 'checksum']
layer 1 -- src = <EkMultiField src: 172.16.11.12>
layer 1 -- id = 56915
layer 1 -- hdr = <EkMultiField hdr>
layer 1 -- proto = 6
layer 1 -- host = ['172.16.11.12', '74.125.19.17']
layer 1 -- ttl = 64
layer 1 -- dsfield = <EkMultiField dsfield>
layer 1 -- version = 4
layer 1 -- addr = ['172.16.11.12', '74.125.19.17']
layer 1 -- dst = <EkMultiField dst: 74.125.19.17>
layer 1 -- len = 79
layer 1 -- frag = <EkMultiField frag>
layer 1 -- flags = <EkMultiField flags: 64>
layer 1 -- checksum = <EkMultiField checksum: 18347>


layer 2 = ['payload', 'window', 'hdr', 'dstport', 'analysi

In [8]:
capture = pyshark.FileCapture(
    input_file=PCAPFILE,
    # use_ek=True
)

In [9]:
for i, pkt in enumerate(capture):
    pkt.pretty_print()
    print(f'-- {i} ----------------------------------------------------------\n')

Layer ETH
:	Destination: 00:1f:f3:3c:e1:13
	Address: 00:1f:f3:3c:e1:13
	.... ..0. .... .... .... .... = LG bit: Globally unique address (factory default)
	.... ...0 .... .... .... .... = IG bit: Individual address (unicast)
	Source: f8:1e:df:e5:84:3a
	.... ..0. .... .... .... .... = LG bit: Globally unique address (factory default)
	.... ...0 .... .... .... .... = IG bit: Individual address (unicast)
	Type: IPv4 (0x0800)
	Address: f8:1e:df:e5:84:3a
Layer IP
:	0100 .... = Version: 4
	.... 0101 = Header Length: 20 bytes (5)
	Differentiated Services Field: 0x00 (DSCP: CS0, ECN: Not-ECT)
	0000 00.. = Differentiated Services Codepoint: Default (0)
	.... ..00 = Explicit Congestion Notification: Not ECN-Capable Transport (0)
	Total Length: 79
	Identification: 0xde53 (56915)
	Flags: 0x40, Don't fragment
	0... .... = Reserved bit: Not set
	.1.. .... = Don't fragment: Set
	..0. .... = More fragments: Not set
	Fragment Offset: 0
	Time to Live: 64
	Protocol: TCP (6)
	Header Checksum: 0x47ab [valid

In [10]:
class PyPacket():
    """Wrapper for PyShark packet. Creates a dictionnary with ETH, IP, TCP, UDP data if/when present.
    NB : use_ek = False.
    """
    
    MAP = {
        'ETH' : ['dst', 'src', 'type'], # 'dst_resolved', 'dst_oui', 'dst_oui_resolved', 'addr', 'addr_resolved', 'addr_oui', 'addr_oui_resolved', 'dst_lg', 'lg', 'dst_ig', 'ig', 'src_resolved', 'src_oui', 'src_oui_resolved', 'src_lg', 'src_ig', 
        'IP' : ['version', 'hdr_len', 'len', 'id', 'flags', 'ttl', 'proto',  'src', 'dst' ], 
        # 'dsfield', 'dsfield_dscp', 'dsfield_ecn',  'flags_rb', 'flags_df', 'flags_mf', 'frag_offset','checksum', 'checksum_status','addr', 'src_host', 'host', 'dst_host'
        'TCP' : ['srcport', 'dstport',  'stream', 'len', 'seq',  'ack',  'hdr_len', 'flags', 'time_relative', 'time_delta', 'payload'],
        # 'port','seq_raw', 'nxtseq','ack_raw','flags_res', 'flags_ns', 'flags_cwr', 'flags_ecn', 'flags_urg', 'flags_ack', 'flags_push', 'flags_reset', 'flags_syn', 'flags_fin', 'flags_str', 
        # 'window_size_value', 'window_size', 'window_size_scalefactor', 'checksum', 'checksum_status', 'urgent_pointer', 'options', 'options_nop', 'option_kind', 
        # 'options_timestamp', 'option_len', 'options_timestamp_tsval', 'options_timestamp_tsecr', 'analysis', 'analysis_bytes_in_flight', 'analysis_push_bytes_sent', 
        'UDP' : ['srcport', 'dstport', 'length',  'stream', 'time_relative', 'time_delta', 'payload']
        # 'port', 'checksum', 'checksum_status',
    }
    
    def __init__(self, packet) -> None:
        self._packet = packet
        self._data = None
        self._dataframe = None
        logger.debug('Instantiated PyPacket object')
        
    @property
    def data(self):
        if self._data is not None:
            return self._data
        else:
            self._data = {}
            for k, list_fields in self.MAP.items():
                if k in self._packet:
                    dict_fields = {}
                    for field in list_fields:
                        dict_fields[field] = self._packet[k].get(field)
                    self._data[k] = dict_fields
            self._data['TIMESTAMP'] = { 'ts' : self._packet.sniff_time }
            return self._data
        
    @data.setter
    def data(self, input):
        logger.critical("attempt to write data in a PyShark object")
        
    @property
    def dataframe(self):
        if self._dataframe is not None:
            return self._dataframe
        else:
            data = self.data
            dict_for_data = {}
            for layer, layer_dict in data.items():
                for field, value in layer_dict.items():
                    key = layer + '_' + field
                    dict_for_data[key] = value
            # dict_for_data['TIMESTAMP'] = data['TIMESTAMP']['ts']
            self._dataframe = pd.DataFrame(data=dict_for_data, index=[0])
            logger.debug("created a dataframe out of a PyPacket object")
            return self._dataframe

In [11]:
pkt = capture[0]

In [12]:
pkt.sniff_time

datetime.datetime(2010, 7, 7, 5, 16, 19, 466743)

In [13]:
paquet = PyPacket(pkt)

In [14]:
paquet.data

{'ETH': {'dst': '00:1f:f3:3c:e1:13',
  'src': 'f8:1e:df:e5:84:3a',
  'type': '0x00000800'},
 'IP': {'version': '4',
  'hdr_len': '20',
  'len': '79',
  'id': '0x0000de53',
  'flags': '0x00000040',
  'ttl': '64',
  'proto': '6',
  'src': '172.16.11.12',
  'dst': '74.125.19.17'},
 'TCP': {'srcport': '64565',
  'dstport': '443',
  'stream': '0',
  'len': '27',
  'seq': '1',
  'ack': '1',
  'hdr_len': '32',
  'flags': '0x00000018',
  'time_relative': '0.000000000',
  'time_delta': '0.000000000',
  'payload': '15:03:01:00:16:43:1a:88:1e:fa:7a:bc:22:6e:e6:32:7a:53:47:00:a7:5d:cc:64:ea:8e:92'},
 'TIMESTAMP': {'ts': datetime.datetime(2010, 7, 7, 5, 16, 19, 466743)}}

In [15]:
df = paquet.dataframe

In [16]:
df

Unnamed: 0,ETH_dst,ETH_src,ETH_type,IP_version,IP_hdr_len,IP_len,IP_id,IP_flags,IP_ttl,IP_proto,...,TCP_stream,TCP_len,TCP_seq,TCP_ack,TCP_hdr_len,TCP_flags,TCP_time_relative,TCP_time_delta,TCP_payload,TIMESTAMP_ts
0,00:1f:f3:3c:e1:13,f8:1e:df:e5:84:3a,0x00000800,4,20,79,0x0000de53,0x00000040,64,6,...,0,27,1,1,32,0x00000018,0.0,0.0,15:03:01:00:16:43:1a:88:1e:fa:7a:bc:22:6e:e6:3...,2010-07-07 05:16:19.466743


In [17]:
df_full = pd.DataFrame()
i=0

while True:
    try:
        pkt = capture.next()
        paquet = PyPacket(pkt)
        df = paquet.dataframe
        df_full = pd.concat([df_full, df], axis=0)
        i += 1
    except StopIteration as e:
        logger.info(f"reached end of capture after reading {i} packets")
        break
    
df_full = df_full.reset_index(drop=True)

In [18]:
df_full

Unnamed: 0,ETH_dst,ETH_src,ETH_type,IP_version,IP_hdr_len,IP_len,IP_id,IP_flags,IP_ttl,IP_proto,...,TCP_time_delta,TCP_payload,TIMESTAMP_ts,UDP_srcport,UDP_dstport,UDP_length,UDP_stream,UDP_time_relative,UDP_time_delta,UDP_payload
0,00:1f:f3:3c:e1:13,f8:1e:df:e5:84:3a,0x00000800,4,20,52,0x00004c05,0x00000040,64,6,...,0.000722000,,2010-07-07 05:16:19.467465,,,,,,,
1,f8:1e:df:e5:84:3a,00:1f:f3:3c:e1:13,0x00000800,4,20,52,0x0000ab4f,0x00000000,54,6,...,0.020855000,,2010-07-07 05:16:19.488320,,,,,,,
2,00:1f:f3:3c:e1:13,f8:1e:df:e5:84:3a,0x00000800,4,20,52,0x0000fc17,0x00000040,64,6,...,0.000049000,,2010-07-07 05:16:19.488369,,,,,,,
3,f8:1e:df:e5:84:3a,00:1f:f3:3c:e1:13,0x00000800,4,20,52,0x0000ab50,0x00000000,54,6,...,0.000958000,,2010-07-07 05:16:19.489327,,,,,,,
4,00:1f:f3:3c:e1:13,f8:1e:df:e5:84:3a,0x00000800,4,20,52,0x0000170d,0x00000040,64,6,...,0.000027000,,2010-07-07 05:16:19.489354,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135,00:1f:f3:3c:e1:13,f8:1e:df:e5:84:3a,0x00000800,4,20,64,0x00002235,0x00000000,255,17,...,,,2010-07-07 05:16:22.196084,51145,53,44,11,0.000000000,0.000000000,d6:23:01:00:00:01:00:00:00:00:00:00:05:67:61:6...
136,f8:1e:df:e5:84:3a,00:1f:f3:3c:e1:13,0x00000800,4,20,80,0x0000c195,0x00000000,64,17,...,,,2010-07-07 05:16:22.202223,53,56758,60,10,0.016440000,0.016440000,e1:b6:81:80:00:01:00:01:00:00:00:00:05:67:61:6...
137,f8:1e:df:e5:84:3a,00:1f:f3:3c:e1:13,0x00000800,4,20,64,0x0000c196,0x00000000,64,17,...,,,2010-07-07 05:16:22.214655,53,51145,44,11,0.018571000,0.018571000,d6:23:81:80:00:01:00:00:00:00:00:00:05:67:61:6...
138,f8:1e:df:e5:84:3a,00:1f:f3:3c:e1:13,0x00000800,4,20,83,0x0000c197,0x00000000,64,17,...,,,2010-07-07 05:16:22.246264,53,57360,63,9,0.070082000,0.070082000,02:05:81:80:00:01:00:01:00:00:00:00:08:66:65:6...
