In [1]:
import os 
from binascii import hexlify 
from scapy.all import rdpcap
import yaml
import numpy as np
from utils.dataframe_tools import generate_vocabulary 



In [2]:
config_path = os.path.join('.', 'utils', 'f2v.yaml')
with open(config_path, 'r') as f:
    yaml_config = yaml.safe_load(f)['field_embedding_config']
fields = list(yaml_config.keys())

In [3]:
categorical_fields = [item for item in fields if yaml_config[item]['type'] == 'categorical']

In [3]:
addr_fields = ['eth.dst', 'eth.src', 'ip.src', 'ip.dst']
fields_except_addr = [item for item in fields if item not in addr_fields]

In [5]:
csv_path = os.path.join('.', 'Data', 'Test', 'merge_tls_test_01.csv')

In [6]:
vocab_reflect = generate_vocabulary(csv_path, categorical_fields, os.path.join('.', 'Data', 'Test', 'categorical_vocabs.yaml'))

Reading data from: .\Data\Test\merge_tls_test_01.csv


Processing fields...: 100%|██████████| 29/29 [00:00<00:00, 7249.66it/s]



Saving master vocabulary to: .\Data\Test\categorical_vocabs.yaml
Vocabulary generation complete!


In [None]:
print(vocab_reflect)

In [3]:
path_tls_pcap = os.path.join('./', 'Data', 'Test', 'tls_test_01.pcapng') 
pcap_test = rdpcap(path_tls_pcap) 
packet_0 = pcap_test[0] 
print(packet_0)

Ether / IP / TCP 192.168.5.3:49767 > 40.99.10.66:https FA


In [8]:
def load_from_yaml(yaml_file):
    with open(yaml_file, 'r') as f:
        return yaml.safe_load(f)['protocols'] 
    
def parse_packet(packet, protocols):
    raw_data = bytes(packet)  # 获取数据包的原始字节
    result = {}
    current_offset = 0
    current_proto = 'ETH'  # 从以太网层开始

    while current_proto and current_offset < len(raw_data):
        if current_proto not in protocols:
            break

        proto_def = protocols[current_proto]
        result[current_proto] = {}
        fields = proto_def['fields']

        # 解析当前层字段
        for field in fields:
            offset = current_offset + field['offset']
            length = field['length']
            field_name = field['name']
            field_type = field['type']

            # 处理动态长度
            if length == 'dynamic':
                length = len(raw_data) - offset
            else:
                length = int(length)

            # 提取字段数据
            if offset + length <= len(raw_data):
                field_data = raw_data[offset:offset + length]

                # 根据类型转换
                if field_type == 'hex':
                    value = hexlify(field_data).decode('utf-8')
                elif field_type == 'binary':
                    value = bin(int.from_bytes(field_data, 'big'))[2:].zfill(length * 8) 
                    # [2:]: 删去二进制标志位0b, 只保留数据部分
                    # zfill: 二进制直接去除会忽略左侧的0, 所以要填充
                    if 'bitmask' in field: 
                        mask = field['bitmask'] # 十六进制直接是整数, 不用转换
                        if isinstance(mask, str): 
                            mask = int(field['bitmask'], 16)
                        value = bin(int(value, 2) & mask)[2:].zfill(length * 8)
                        if 'shift' in field:
                            value = bin(int(value, 2) >> field['shift'])[2:]
                else:
                    value = field_data

                result[current_proto][field_name] = value

        # 计算当前层长度并更新偏移量
        layer_length = max(field['offset'] + (int(field['length']) if field['length'] != 'dynamic' else 0) 
                          for field in fields)
        current_offset += layer_length

        # 确定下一层协议
        if 'next_layer_map' in proto_def:
            proto_value = result[current_proto].get('Protocol')  # 示例：IP的Protocol字段
            current_proto = proto_def['next_layer_map'].get(proto_value)
        elif 'next_layer' in proto_def:
            current_proto = proto_def['next_layer']
        else:
            current_proto = None

    return result 



In [9]:
protocol_rules =  load_from_yaml('./utils/fields.yaml') 
parsed_data = parse_packet(packet_0, protocol_rules) 
print(parsed_data)

{'ETH': {'Destination_MAC': 'f42d06784ee9', 'Source_MAC': '6c2f804a964c', 'EtherType': '0800'}, 'IP': {'Version': '100', 'IHL': '45', 'Total_Length': '0034', 'Protocol': '06'}, 'TCP': {'Source_Port': '0000', 'Destination_Port': 'c0a8', 'Flags': '10111011'}, 'TLS': {'Content_Type': 'b2', 'Version': '915f', 'Length': 'c4a8'}}
