In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from scipy.stats import entropy 
import os

In [2]:
tshark_directory = os.path.join('.', 'traffictracer')
statistic_directory = ['WLAN_statistics', 'Meta_statistics'] 
ori_directory = ['WLAN', 'Meta'] 
port_directory = 'tshark_port' 
conn_directory = 'conn_in_out' 
evaluation_directory = 'evaluation'
data_date = '24-11-07'

meta_prefix = 'Meta-' 
wlan_prefix = 'WLAN-' 
conn_prefix = 'conn-in-out-'

In [3]:
def getTCPStreamNumberSet(traffic_set: pd.DataFrame): 
    """ 
    Input the conn_sni.csv files and

    Return the order set: ([M Stream ID, W Stream ID]) of traffic_set. 
    """
    
    return set(zip(traffic_set['M Stream ID'], traffic_set['W Stream ID'])) 
    

In [None]:
def getFlowReversals(packets: pd.DataFrame, fr_dict: dict): 
    if not packets.empty: 
        ori_ip = packets.iloc[0]['Source'] 
        src_ip_temp = packets.iloc[0]['Source'] 
        direction = 0 
        is_empty = True # 上一个反向的包空不空，其实仅用于初始化 
        for packet_no in range(packets.shape[0]): 
            current_packet = packets.iloc[packet_no]
            if current_packet['TCP Segment Len'] > 0: 
                if current_packet['Source'] != src_ip_temp: 
                    if not is_empty: 
                        fr_dict['Time'].append(current_packet['Time']) 
                        fr_dict['StreamID'].append(current_packet['TCP Stream index']) 
                        if current_packet['Source'] == ori_ip: # 翻转，但是包从B->A，即A非空包
                            direction = 0
                            fr_dict['Direction'].append(direction) 
                        else: # 产生翻转，且是A->B，即B非空包 
                            direction = 1 
                            fr_dict['Direction'].append(direction)
                        src_ip_temp = current_packet['Source'] 
                        is_empty = False 
                    else: 
                        is_empty = False 
                else: 
                    is_empty = False 

In [11]:
def recordPacketInfo(fr_dict: dict, packet: pd.Series, ori_ip, direction: int): 
    fr_dict['Time'].append(packet['Time']) 
    fr_dict['StreamID'].append(packet['TCP Stream index']) 
    if packet['Source'] == ori_ip: # 从alice发出的包
        direction = 0
        fr_dict['Direction'].append(direction) 
    else: # 从bob发出的包
        direction = 1 
        fr_dict['Direction'].append(direction) 

def getFlowReversalsReal(packets: pd.DataFrame, fr_dict: dict): 
    # 方向定义: 针对当前捕获到的包。
    # 0: alice -> bob; 1: bob -> alice
    if not packets.empty: 
        ori_ip = packets.iloc[0]['Source'] 
        src_ip_temp = packets.iloc[0]['Source'] 
        direction = 0 
        is_First = True # 上一个反向的包空不空，其实仅用于初始化 
        for packet_no in range(packets.shape[0]): 
            current_packet = packets.iloc[packet_no]
            if current_packet['TCP Segment Len'] > 0: # 非空包 
                if is_First: # 第一个包 
                    recordPacketInfo(fr_dict, current_packet, ori_ip, direction)
                    is_First = False
                if current_packet['Source'] != src_ip_temp: # 与上次IP不同 
                    recordPacketInfo(fr_dict, current_packet, ori_ip, direction)
                src_ip_temp = current_packet['Source'] 

In [10]:
def getBAFR(b_set: pd.DataFrame, a_set: pd.DataFrame, streams_pairs: set): 
    # fr_dict_temp = { # template of fr dictionary
    #     'Time': [], 
    #     'Direction': [], # A->B: 0, B-A: 1
    #     'StreamID': []
    # }
    # bp_fr_dict = fr_dict_temp.copy() 
    # ap_fr_dict = fr_dict_temp.copy() 
    bp_fr_dict = { # template of fr dictionary
        'Time': [], 
        'Direction': [], # A->B: 0, B-A: 1
        'StreamID': []
    }
    ap_fr_dict = { # template of fr dictionary
        'Time': [], 
        'Direction': [], # A->B: 0, B-A: 1
        'StreamID': []
    } 
    for b_id, a_id in streams_pairs: 
        bp_packets = b_set[b_set['TCP Stream index'] == b_id] 
        ap_packets = a_set[a_set['TCP Stream index'] == a_id] 
        # getFlowReversals(bp_packets, bp_fr_dict) 
        getFlowReversalsReal(bp_packets, bp_fr_dict) 
        # getFlowReversals(ap_packets, ap_fr_dict) 
        getFlowReversalsReal(ap_packets, ap_fr_dict) 
    return bp_fr_dict, ap_fr_dict

In [12]:
for conn_info in os.listdir(os.path.join(tshark_directory, evaluation_directory, 'tt_stream_distribution', 'conn_sni')): 
    if conn_info.startswith(conn_prefix + data_date): 
        df_conn = pd.read_csv(os.path.join(tshark_directory, evaluation_directory, 'tt_stream_distribution', 'conn_sni', conn_info)) 
        time = conn_info[12:-4] 
        df_meta = pd.read_csv(os.path.join(tshark_directory, ori_directory[1], meta_prefix + time + '.csv')) 
        df_wlan = pd.read_csv(os.path.join(tshark_directory, ori_directory[0], wlan_prefix + time + '.csv')) 
        stream_set = getTCPStreamNumberSet(df_conn) 
        print(conn_info, stream_set) 
        bp_dict, ap_dict = getBAFR(df_meta, df_wlan, stream_set) 
        bp_csv = pd.DataFrame(bp_dict) 
        ap_csv = pd.DataFrame(ap_dict) 
        bp_csv.to_csv(os.path.join(tshark_directory, evaluation_directory, 'tt_fr', 'time_distribution', 'Meta', meta_prefix + time + '.csv')) 
        ap_csv.to_csv(os.path.join(tshark_directory, evaluation_directory, 'tt_fr', 'time_distribution', 'WLAN', wlan_prefix + time + '.csv')) 

conn-in-out-24-11-07--10-19-16.csv {(42, 39), (43, 40), (78, 68), (118, 116), (66, 57), (79, 69), (67, 58), (61, 54), (44, 41), (106, 104), (58, 51), (24, 22), (41, 38), (77, 67), (52, 47), (156, 153), (46, 43), (157, 154), (53, 48), (47, 44), (59, 52)}
conn-in-out-24-11-07--10-25-29.csv {(52, 46), (86, 66), (76, 57), (85, 65), (63, 54), (164, 136), (69, 58), (59, 51), (72, 59), (48, 42), (50, 44), (93, 71), (56, 50), (47, 41), (49, 43), (53, 47), (64, 53), (24, 24)}
conn-in-out-24-11-07--10-32-20.csv {(86, 72), (26, 24), (46, 42), (75, 63), (76, 64), (71, 59), (42, 38), (41, 36), (43, 39), (77, 65), (52, 48), (40, 37), (83, 69), (53, 49), (84, 70), (142, 133), (80, 66), (49, 45), (85, 71), (63, 53), (45, 41), (81, 67), (64, 54)}
conn-in-out-24-11-07--11-08-34.csv {(51, 38), (60, 47), (69, 56), (78, 65), (86, 72), (122, 92), (48, 36), (74, 61), (47, 37), (79, 66), (56, 43), (99, 84), (52, 39), (113, 89), (61, 48), (70, 57), (57, 44), (66, 53), (75, 62), (100, 85), (76, 63), (111, 88), 

  df_meta = pd.read_csv(os.path.join(tshark_directory, ori_directory[1], meta_prefix + time + '.csv'))


conn-in-out-24-11-07--15-46-14.csv {(44, 36), (61, 52), (60, 50), (69, 59), (77, 66), (86, 75), (38, 32), (31, 26), (49, 41), (57, 48), (34, 28), (65, 55), (91, 80), (45, 37), (17, 18), (54, 46), (62, 53), (32, 27), (50, 42), (58, 49), (59, 51), (35, 29), (51, 43), (70, 60), (78, 67), (87, 76), (66, 56), (83, 72), (36, 30), (93, 82), (71, 61), (47, 39), (79, 68), (72, 62), (88, 77), (42, 34), (67, 57), (43, 35), (52, 44), (84, 73), (29, 24), (80, 69), (48, 40), (74, 65), (23, 20), (89, 78), (8, 7), (68, 58), (2, 3), (11, 12), (30, 25), (37, 31), (53, 45), (85, 74), (73, 63), (90, 79), (16, 17)}
conn-in-out-24-11-07--15-52-05.csv {(65, 53), (75, 61), (48, 40), (60, 50), (78, 62), (47, 39), (24, 26), (51, 43), (49, 41), (155, 131), (46, 38), (52, 44), (59, 49)}
conn-in-out-24-11-07--15-58-43.csv {(42, 35), (31, 27), (36, 33), (89, 71), (35, 32), (81, 62), (86, 68), (53, 42), (64, 51), (76, 61), (32, 29), (30, 28), (80, 63), (61, 50), (33, 30), (90, 72)}
conn-in-out-24-11-07--16-04-47.csv

  df_meta = pd.read_csv(os.path.join(tshark_directory, ori_directory[1], meta_prefix + time + '.csv'))


conn-in-out-24-11-07--16-55-07.csv {(127, 116), (89, 77), (19, 18), (85, 73), (63, 52), (128, 117), (81, 69), (59, 48), (124, 113), (86, 74), (65, 54), (2, 4), (87, 75), (49, 39), (83, 71), (131, 120), (93, 81), (54, 44), (79, 67), (55, 45), (24, 21), (84, 72), (35, 30), (94, 82), (56, 46), (90, 78), (44, 35), (133, 122), (20, 19), (39, 32), (57, 47), (129, 118), (91, 79), (9, 7), (68, 56), (33, 28), (134, 123), (40, 33), (97, 85), (130, 119), (92, 80), (88, 76), (98, 86), (118, 107), (104, 93), (72, 60), (105, 94), (99, 87), (51, 40), (38, 31), (95, 83), (45, 36), (96, 84), (73, 61), (69, 57), (112, 101), (34, 29), (53, 42), (52, 43), (70, 58), (80, 68), (75, 63), (123, 112), (71, 59), (47, 37), (60, 49), (77, 65), (100, 88), (82, 70), (7, 6), (50, 41), (61, 50), (78, 66), (126, 115), (74, 62), (62, 51), (117, 106)}
conn-in-out-24-11-07--17-01-16.csv {(35, 30), (43, 37), (51, 44), (58, 50), (70, 61), (46, 39), (50, 43), (20, 20), (31, 26), (40, 35), (36, 31), (11, 11), (71, 62), (47, 

In [5]:
test_df = pd.read_csv(os.path.join(tshark_directory, evaluation_directory, 'tt_stream_distribution/conn_sni/conn-in-out-24-11-07--10-19-16.csv')) 
test_meta = pd.read_csv(os.path.join(tshark_directory, ori_directory[1], 'Meta-24-11-07--10-19-16.csv')) 
test_wlan = pd.read_csv(os.path.join(tshark_directory, ori_directory[0], 'WLAN-24-11-07--10-19-16.csv')) 
test_set = getTCPStreamNumberSet(test_df) 
print(test_set)
test_bp_dict, test_ap_dict = getBAFR(test_meta, test_wlan, test_set)

{(42, 39), (43, 40), (78, 68), (118, 116), (66, 57), (79, 69), (67, 58), (61, 54), (44, 41), (106, 104), (58, 51), (24, 22), (41, 38), (77, 67), (52, 47), (156, 153), (46, 43), (157, 154), (53, 48), (47, 44), (59, 52)}
